Language improvements, replace whatlanguage with CLD (#2753)
* add failing en specs * add cld2 gem * Replace WhatLanguage with CLD
This commit is contained in:
		
							parent
							
								
									53384b0ffe
								
							
						
					
					
						commit
						8c5ad23b24
					
				
							
								
								
									
										2
									
								
								Gemfile
									
									
									
									
									
								
							
							
						
						
									
										2
									
								
								Gemfile
									
									
									
									
									
								
							@ -20,6 +20,7 @@ gem 'paperclip', '~> 5.1'
 | 
				
			|||||||
gem 'paperclip-av-transcoder'
 | 
					gem 'paperclip-av-transcoder'
 | 
				
			||||||
 | 
					
 | 
				
			||||||
gem 'addressable'
 | 
					gem 'addressable'
 | 
				
			||||||
 | 
					gem 'cld2', require: 'cld'
 | 
				
			||||||
gem 'devise'
 | 
					gem 'devise'
 | 
				
			||||||
gem 'devise-two-factor'
 | 
					gem 'devise-two-factor'
 | 
				
			||||||
gem 'doorkeeper'
 | 
					gem 'doorkeeper'
 | 
				
			||||||
@ -56,7 +57,6 @@ gem 'statsd-instrument'
 | 
				
			|||||||
gem 'twitter-text'
 | 
					gem 'twitter-text'
 | 
				
			||||||
gem 'tzinfo-data'
 | 
					gem 'tzinfo-data'
 | 
				
			||||||
gem 'webpacker', '~>1.2'
 | 
					gem 'webpacker', '~>1.2'
 | 
				
			||||||
gem 'whatlanguage'
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
# For some reason the view specs start failing without this
 | 
					# For some reason the view specs start failing without this
 | 
				
			||||||
gem 'react-rails'
 | 
					gem 'react-rails'
 | 
				
			||||||
 | 
				
			|||||||
@ -102,6 +102,8 @@ GEM
 | 
				
			|||||||
      rack-test (>= 0.5.4)
 | 
					      rack-test (>= 0.5.4)
 | 
				
			||||||
      xpath (~> 2.0)
 | 
					      xpath (~> 2.0)
 | 
				
			||||||
    chunky_png (1.3.8)
 | 
					    chunky_png (1.3.8)
 | 
				
			||||||
 | 
					    cld2 (1.0.3)
 | 
				
			||||||
 | 
					      ffi (~> 1.9.3)
 | 
				
			||||||
    climate_control (0.1.0)
 | 
					    climate_control (0.1.0)
 | 
				
			||||||
    cocaine (0.5.8)
 | 
					    cocaine (0.5.8)
 | 
				
			||||||
      climate_control (>= 0.0.3, < 1.0)
 | 
					      climate_control (>= 0.0.3, < 1.0)
 | 
				
			||||||
@ -153,6 +155,7 @@ GEM
 | 
				
			|||||||
    faker (1.7.3)
 | 
					    faker (1.7.3)
 | 
				
			||||||
      i18n (~> 0.5)
 | 
					      i18n (~> 0.5)
 | 
				
			||||||
    fast_blank (1.0.0)
 | 
					    fast_blank (1.0.0)
 | 
				
			||||||
 | 
					    ffi (1.9.18)
 | 
				
			||||||
    fuubar (2.2.0)
 | 
					    fuubar (2.2.0)
 | 
				
			||||||
      rspec-core (~> 3.0)
 | 
					      rspec-core (~> 3.0)
 | 
				
			||||||
      ruby-progressbar (~> 1.4)
 | 
					      ruby-progressbar (~> 1.4)
 | 
				
			||||||
@ -463,7 +466,6 @@ GEM
 | 
				
			|||||||
    websocket-driver (0.6.5)
 | 
					    websocket-driver (0.6.5)
 | 
				
			||||||
      websocket-extensions (>= 0.1.0)
 | 
					      websocket-extensions (>= 0.1.0)
 | 
				
			||||||
    websocket-extensions (0.1.2)
 | 
					    websocket-extensions (0.1.2)
 | 
				
			||||||
    whatlanguage (1.0.6)
 | 
					 | 
				
			||||||
    xpath (2.0.0)
 | 
					    xpath (2.0.0)
 | 
				
			||||||
      nokogiri (~> 1.3)
 | 
					      nokogiri (~> 1.3)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -484,6 +486,7 @@ DEPENDENCIES
 | 
				
			|||||||
  capistrano-rbenv
 | 
					  capistrano-rbenv
 | 
				
			||||||
  capistrano-yarn
 | 
					  capistrano-yarn
 | 
				
			||||||
  capybara
 | 
					  capybara
 | 
				
			||||||
 | 
					  cld2
 | 
				
			||||||
  devise
 | 
					  devise
 | 
				
			||||||
  devise-two-factor
 | 
					  devise-two-factor
 | 
				
			||||||
  doorkeeper
 | 
					  doorkeeper
 | 
				
			||||||
@ -549,7 +552,6 @@ DEPENDENCIES
 | 
				
			|||||||
  uglifier (>= 1.3.0)
 | 
					  uglifier (>= 1.3.0)
 | 
				
			||||||
  webmock
 | 
					  webmock
 | 
				
			||||||
  webpacker (~> 1.2)
 | 
					  webpacker (~> 1.2)
 | 
				
			||||||
  whatlanguage
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
RUBY VERSION
 | 
					RUBY VERSION
 | 
				
			||||||
   ruby 2.4.1p111
 | 
					   ruby 2.4.1p111
 | 
				
			||||||
 | 
				
			|||||||
@ -9,11 +9,23 @@ class LanguageDetector
 | 
				
			|||||||
  end
 | 
					  end
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  def to_iso_s
 | 
					  def to_iso_s
 | 
				
			||||||
    WhatLanguage.new(:all).language_iso(text_without_urls) || default_locale.to_sym
 | 
					    detected_language_code || default_locale.to_sym
 | 
				
			||||||
  end
 | 
					  end
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  private
 | 
					  private
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  def detected_language_code
 | 
				
			||||||
 | 
					    detected_language[:code].to_sym if detected_language_reliable?
 | 
				
			||||||
 | 
					  end
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  def detected_language
 | 
				
			||||||
 | 
					    @_detected_language ||= CLD.detect_language(text_without_urls)
 | 
				
			||||||
 | 
					  end
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  def detected_language_reliable?
 | 
				
			||||||
 | 
					    detected_language[:reliable]
 | 
				
			||||||
 | 
					  end
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  def text_without_urls
 | 
					  def text_without_urls
 | 
				
			||||||
    text.dup.tap do |new_text|
 | 
					    text.dup.tap do |new_text|
 | 
				
			||||||
      URI.extract(new_text).each do |url|
 | 
					      URI.extract(new_text).each do |url|
 | 
				
			||||||
 | 
				
			|||||||
@ -3,11 +3,17 @@ require 'rails_helper'
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
describe LanguageDetector do
 | 
					describe LanguageDetector do
 | 
				
			||||||
  describe 'to_iso_s' do
 | 
					  describe 'to_iso_s' do
 | 
				
			||||||
    it 'detects english language' do
 | 
					    it 'detects english language for basic strings' do
 | 
				
			||||||
      string = 'Hello and welcome to mastodon'
 | 
					      strings = [
 | 
				
			||||||
 | 
					        "Hello and welcome to mastodon",
 | 
				
			||||||
 | 
					        "I'd rather not!",
 | 
				
			||||||
 | 
					        "a lot of people just want to feel righteous all the time and that's all that matters",
 | 
				
			||||||
 | 
					      ]
 | 
				
			||||||
 | 
					      strings.each do |string|
 | 
				
			||||||
        result = described_class.new(string).to_iso_s
 | 
					        result = described_class.new(string).to_iso_s
 | 
				
			||||||
 | 
					
 | 
				
			||||||
      expect(result).to eq :en
 | 
					        expect(result).to eq(:en), string
 | 
				
			||||||
 | 
					      end
 | 
				
			||||||
    end
 | 
					    end
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    it 'detects spanish language' do
 | 
					    it 'detects spanish language' do
 | 
				
			||||||
@ -19,15 +25,15 @@ describe LanguageDetector do
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
    describe 'when language can\'t be detected' do
 | 
					    describe 'when language can\'t be detected' do
 | 
				
			||||||
      it 'confirm language engine cant detect' do
 | 
					      it 'confirm language engine cant detect' do
 | 
				
			||||||
        result = WhatLanguage.new(:all).language_iso('')
 | 
					        result = CLD.detect_language('')
 | 
				
			||||||
        expect(result).to be_nil
 | 
					        expect(result[:reliable]).to be false
 | 
				
			||||||
      end
 | 
					      end
 | 
				
			||||||
 | 
					
 | 
				
			||||||
      describe 'because of a URL' do
 | 
					      describe 'because of a URL' do
 | 
				
			||||||
        it 'uses default locale when sent just a URL' do
 | 
					        it 'uses default locale when sent just a URL' do
 | 
				
			||||||
          string = 'http://example.com/media/2kFTgOJLXhQf0g2nKB4'
 | 
					          string = 'http://example.com/media/2kFTgOJLXhQf0g2nKB4'
 | 
				
			||||||
          wl_result = WhatLanguage.new(:all).language_iso(string)
 | 
					          cld_result = CLD.detect_language(string)[:code]
 | 
				
			||||||
          expect(wl_result).not_to eq :en
 | 
					          expect(cld_result).not_to eq :en
 | 
				
			||||||
 | 
					
 | 
				
			||||||
          result = described_class.new(string).to_iso_s
 | 
					          result = described_class.new(string).to_iso_s
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
				
			|||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user