| OLD | NEW |
| 1 // Copyright 2014 The Chromium Authors. All rights reserved. | 1 // Copyright 2014 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "components/translate/core/language_detection/language_detection_util.h
" | 5 #include "components/translate/core/language_detection/language_detection_util.h
" |
| 6 | 6 |
| 7 #include "base/logging.h" | 7 #include "base/logging.h" |
| 8 #include "base/metrics/field_trial.h" | 8 #include "base/metrics/field_trial.h" |
| 9 #include "base/strings/string_split.h" | 9 #include "base/strings/string_split.h" |
| 10 #include "base/strings/string_util.h" | 10 #include "base/strings/string_util.h" |
| (...skipping 294 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 305 *code = base::StringToLowerASCII(code->substr(0, dash_index)) + | 305 *code = base::StringToLowerASCII(code->substr(0, dash_index)) + |
| 306 base::StringToUpperASCII(code->substr(dash_index)); | 306 base::StringToUpperASCII(code->substr(dash_index)); |
| 307 } else { | 307 } else { |
| 308 *code = base::StringToLowerASCII(*code); | 308 *code = base::StringToLowerASCII(*code); |
| 309 } | 309 } |
| 310 } | 310 } |
| 311 | 311 |
| 312 bool IsValidLanguageCode(const std::string& code) { | 312 bool IsValidLanguageCode(const std::string& code) { |
| 313 // Roughly check if the language code follows /[a-zA-Z]{2,3}(-[a-zA-Z]{2})?/. | 313 // Roughly check if the language code follows /[a-zA-Z]{2,3}(-[a-zA-Z]{2})?/. |
| 314 // TODO(hajimehoshi): How about es-419, which is used as an Accept language? | 314 // TODO(hajimehoshi): How about es-419, which is used as an Accept language? |
| 315 std::vector<std::string> chunks; | 315 std::vector<base::StringPiece> chunks = base::SplitStringPiece( |
| 316 base::SplitString(code, '-', &chunks); | 316 code, "-", base::KEEP_WHITESPACE, base::SPLIT_WANT_ALL); |
| 317 | 317 |
| 318 if (chunks.size() < 1 || 2 < chunks.size()) | 318 if (chunks.size() < 1 || 2 < chunks.size()) |
| 319 return false; | 319 return false; |
| 320 | 320 |
| 321 const std::string& main_code = chunks[0]; | 321 const base::StringPiece& main_code = chunks[0]; |
| 322 | 322 |
| 323 if (main_code.size() < 1 || 3 < main_code.size()) | 323 if (main_code.size() < 1 || 3 < main_code.size()) |
| 324 return false; | 324 return false; |
| 325 | 325 |
| 326 for (std::string::const_iterator it = main_code.begin(); | 326 for (char c : main_code) { |
| 327 it != main_code.end(); ++it) { | 327 if (!base::IsAsciiAlpha(c)) |
| 328 if (!base::IsAsciiAlpha(*it)) | |
| 329 return false; | 328 return false; |
| 330 } | 329 } |
| 331 | 330 |
| 332 if (chunks.size() == 1) | 331 if (chunks.size() == 1) |
| 333 return true; | 332 return true; |
| 334 | 333 |
| 335 const std::string& sub_code = chunks[1]; | 334 const base::StringPiece& sub_code = chunks[1]; |
| 336 | 335 |
| 337 if (sub_code.size() != 2) | 336 if (sub_code.size() != 2) |
| 338 return false; | 337 return false; |
| 339 | 338 |
| 340 for (std::string::const_iterator it = sub_code.begin(); | 339 for (char c : sub_code) { |
| 341 it != sub_code.end(); ++it) { | 340 if (!base::IsAsciiAlpha(c)) |
| 342 if (!base::IsAsciiAlpha(*it)) | |
| 343 return false; | 341 return false; |
| 344 } | 342 } |
| 345 | 343 |
| 346 return true; | 344 return true; |
| 347 } | 345 } |
| 348 | 346 |
| 349 bool IsSameOrSimilarLanguages(const std::string& page_language, | 347 bool IsSameOrSimilarLanguages(const std::string& page_language, |
| 350 const std::string& cld_language) { | 348 const std::string& cld_language) { |
| 351 std::vector<std::string> chunks; | 349 std::vector<std::string> chunks = base::SplitString( |
| 352 | 350 page_language, "-", base::KEEP_WHITESPACE, base::SPLIT_WANT_ALL); |
| 353 base::SplitString(page_language, '-', &chunks); | |
| 354 if (chunks.size() == 0) | 351 if (chunks.size() == 0) |
| 355 return false; | 352 return false; |
| 356 std::string page_language_main_part = chunks[0]; | 353 std::string page_language_main_part = chunks[0]; // Need copy. |
| 357 | 354 |
| 358 base::SplitString(cld_language, '-', &chunks); | 355 chunks = base::SplitString( |
| 356 cld_language, "-", base::KEEP_WHITESPACE, base::SPLIT_WANT_ALL); |
| 359 if (chunks.size() == 0) | 357 if (chunks.size() == 0) |
| 360 return false; | 358 return false; |
| 361 std::string cld_language_main_part = chunks[0]; | 359 const std::string& cld_language_main_part = chunks[0]; |
| 362 | 360 |
| 363 // Language code part of |page_language| is matched to one of |cld_language|. | 361 // Language code part of |page_language| is matched to one of |cld_language|. |
| 364 // Country code is ignored here. | 362 // Country code is ignored here. |
| 365 if (page_language_main_part == cld_language_main_part) { | 363 if (page_language_main_part == cld_language_main_part) { |
| 366 // Languages are matched strictly. Reports false to metrics, but returns | 364 // Languages are matched strictly. Reports false to metrics, but returns |
| 367 // true. | 365 // true. |
| 368 translate::ReportSimilarLanguageMatch(false); | 366 translate::ReportSimilarLanguageMatch(false); |
| 369 return true; | 367 return true; |
| 370 } | 368 } |
| 371 | 369 |
| (...skipping 20 matching lines...) Expand all Loading... |
| 392 // distinguish from English, and the language is one of well-known languages | 390 // distinguish from English, and the language is one of well-known languages |
| 393 // which often provide "en-*" meta information mistakenly. | 391 // which often provide "en-*" meta information mistakenly. |
| 394 for (size_t i = 0; i < arraysize(kWellKnownCodesOnWrongConfiguration); ++i) { | 392 for (size_t i = 0; i < arraysize(kWellKnownCodesOnWrongConfiguration); ++i) { |
| 395 if (cld_language == kWellKnownCodesOnWrongConfiguration[i]) | 393 if (cld_language == kWellKnownCodesOnWrongConfiguration[i]) |
| 396 return true; | 394 return true; |
| 397 } | 395 } |
| 398 return false; | 396 return false; |
| 399 } | 397 } |
| 400 | 398 |
| 401 } // namespace translate | 399 } // namespace translate |
| OLD | NEW |