Language improvements, replace whatlanguage with CLD (#2753)
* add failing en specs * add cld2 gem * Replace WhatLanguage with CLD
This commit is contained in:
		
							parent
							
								
									53384b0ffe
								
							
						
					
					
						commit
						8c5ad23b24
					
				
							
								
								
									
										2
									
								
								Gemfile
									
									
									
									
									
								
							
							
						
						
									
										2
									
								
								Gemfile
									
									
									
									
									
								
							| @ -20,6 +20,7 @@ gem 'paperclip', '~> 5.1' | ||||
| gem 'paperclip-av-transcoder' | ||||
| 
 | ||||
| gem 'addressable' | ||||
| gem 'cld2', require: 'cld' | ||||
| gem 'devise' | ||||
| gem 'devise-two-factor' | ||||
| gem 'doorkeeper' | ||||
| @ -56,7 +57,6 @@ gem 'statsd-instrument' | ||||
| gem 'twitter-text' | ||||
| gem 'tzinfo-data' | ||||
| gem 'webpacker', '~>1.2' | ||||
| gem 'whatlanguage' | ||||
| 
 | ||||
| # For some reason the view specs start failing without this | ||||
| gem 'react-rails' | ||||
|  | ||||
| @ -102,6 +102,8 @@ GEM | ||||
|       rack-test (>= 0.5.4) | ||||
|       xpath (~> 2.0) | ||||
|     chunky_png (1.3.8) | ||||
|     cld2 (1.0.3) | ||||
|       ffi (~> 1.9.3) | ||||
|     climate_control (0.1.0) | ||||
|     cocaine (0.5.8) | ||||
|       climate_control (>= 0.0.3, < 1.0) | ||||
| @ -153,6 +155,7 @@ GEM | ||||
|     faker (1.7.3) | ||||
|       i18n (~> 0.5) | ||||
|     fast_blank (1.0.0) | ||||
|     ffi (1.9.18) | ||||
|     fuubar (2.2.0) | ||||
|       rspec-core (~> 3.0) | ||||
|       ruby-progressbar (~> 1.4) | ||||
| @ -463,7 +466,6 @@ GEM | ||||
|     websocket-driver (0.6.5) | ||||
|       websocket-extensions (>= 0.1.0) | ||||
|     websocket-extensions (0.1.2) | ||||
|     whatlanguage (1.0.6) | ||||
|     xpath (2.0.0) | ||||
|       nokogiri (~> 1.3) | ||||
| 
 | ||||
| @ -484,6 +486,7 @@ DEPENDENCIES | ||||
|   capistrano-rbenv | ||||
|   capistrano-yarn | ||||
|   capybara | ||||
|   cld2 | ||||
|   devise | ||||
|   devise-two-factor | ||||
|   doorkeeper | ||||
| @ -549,7 +552,6 @@ DEPENDENCIES | ||||
|   uglifier (>= 1.3.0) | ||||
|   webmock | ||||
|   webpacker (~> 1.2) | ||||
|   whatlanguage | ||||
| 
 | ||||
| RUBY VERSION | ||||
|    ruby 2.4.1p111 | ||||
|  | ||||
| @ -9,11 +9,23 @@ class LanguageDetector | ||||
|   end | ||||
| 
 | ||||
|   def to_iso_s | ||||
|     WhatLanguage.new(:all).language_iso(text_without_urls) || default_locale.to_sym | ||||
|     detected_language_code || default_locale.to_sym | ||||
|   end | ||||
| 
 | ||||
|   private | ||||
| 
 | ||||
|   def detected_language_code | ||||
|     detected_language[:code].to_sym if detected_language_reliable? | ||||
|   end | ||||
| 
 | ||||
|   def detected_language | ||||
|     @_detected_language ||= CLD.detect_language(text_without_urls) | ||||
|   end | ||||
| 
 | ||||
|   def detected_language_reliable? | ||||
|     detected_language[:reliable] | ||||
|   end | ||||
| 
 | ||||
|   def text_without_urls | ||||
|     text.dup.tap do |new_text| | ||||
|       URI.extract(new_text).each do |url| | ||||
|  | ||||
| @ -3,11 +3,17 @@ require 'rails_helper' | ||||
| 
 | ||||
| describe LanguageDetector do | ||||
|   describe 'to_iso_s' do | ||||
|     it 'detects english language' do | ||||
|       string = 'Hello and welcome to mastodon' | ||||
|       result = described_class.new(string).to_iso_s | ||||
|     it 'detects english language for basic strings' do | ||||
|       strings = [ | ||||
|         "Hello and welcome to mastodon", | ||||
|         "I'd rather not!", | ||||
|         "a lot of people just want to feel righteous all the time and that's all that matters", | ||||
|       ] | ||||
|       strings.each do |string| | ||||
|         result = described_class.new(string).to_iso_s | ||||
| 
 | ||||
|       expect(result).to eq :en | ||||
|         expect(result).to eq(:en), string | ||||
|       end | ||||
|     end | ||||
| 
 | ||||
|     it 'detects spanish language' do | ||||
| @ -19,15 +25,15 @@ describe LanguageDetector do | ||||
| 
 | ||||
|     describe 'when language can\'t be detected' do | ||||
|       it 'confirm language engine cant detect' do | ||||
|         result = WhatLanguage.new(:all).language_iso('') | ||||
|         expect(result).to be_nil | ||||
|         result = CLD.detect_language('') | ||||
|         expect(result[:reliable]).to be false | ||||
|       end | ||||
| 
 | ||||
|       describe 'because of a URL' do | ||||
|         it 'uses default locale when sent just a URL' do | ||||
|           string = 'http://example.com/media/2kFTgOJLXhQf0g2nKB4' | ||||
|           wl_result = WhatLanguage.new(:all).language_iso(string) | ||||
|           expect(wl_result).not_to eq :en | ||||
|           cld_result = CLD.detect_language(string)[:code] | ||||
|           expect(cld_result).not_to eq :en | ||||
| 
 | ||||
|           result = described_class.new(string).to_iso_s | ||||
| 
 | ||||
|  | ||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user