OLD | NEW |
1 // Copyright 2014 The Chromium Authors. All rights reserved. | 1 // Copyright 2014 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "components/translate/core/language_detection/language_detection_util.h
" | 5 #include "components/translate/core/language_detection/language_detection_util.h
" |
6 | 6 |
7 #include "base/logging.h" | 7 #include "base/logging.h" |
8 #include "base/metrics/field_trial.h" | 8 #include "base/metrics/field_trial.h" |
9 #include "base/strings/string_split.h" | 9 #include "base/strings/string_split.h" |
10 #include "base/strings/string_util.h" | 10 #include "base/strings/string_util.h" |
(...skipping 294 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
305 *code = base::StringToLowerASCII(code->substr(0, dash_index)) + | 305 *code = base::StringToLowerASCII(code->substr(0, dash_index)) + |
306 base::StringToUpperASCII(code->substr(dash_index)); | 306 base::StringToUpperASCII(code->substr(dash_index)); |
307 } else { | 307 } else { |
308 *code = base::StringToLowerASCII(*code); | 308 *code = base::StringToLowerASCII(*code); |
309 } | 309 } |
310 } | 310 } |
311 | 311 |
312 bool IsValidLanguageCode(const std::string& code) { | 312 bool IsValidLanguageCode(const std::string& code) { |
313 // Roughly check if the language code follows /[a-zA-Z]{2,3}(-[a-zA-Z]{2})?/. | 313 // Roughly check if the language code follows /[a-zA-Z]{2,3}(-[a-zA-Z]{2})?/. |
314 // TODO(hajimehoshi): How about es-419, which is used as an Accept language? | 314 // TODO(hajimehoshi): How about es-419, which is used as an Accept language? |
315 std::vector<std::string> chunks; | 315 std::vector<base::StringPiece> chunks = base::SplitStringPiece( |
316 base::SplitString(code, '-', &chunks); | 316 code, "-", base::TRIM_WHITESPACE, base::SPLIT_WANT_ALL); |
317 | 317 |
318 if (chunks.size() < 1 || 2 < chunks.size()) | 318 if (chunks.size() < 1 || 2 < chunks.size()) |
319 return false; | 319 return false; |
320 | 320 |
321 const std::string& main_code = chunks[0]; | 321 const base::StringPiece& main_code = chunks[0]; |
322 | 322 |
323 if (main_code.size() < 1 || 3 < main_code.size()) | 323 if (main_code.size() < 1 || 3 < main_code.size()) |
324 return false; | 324 return false; |
325 | 325 |
326 for (std::string::const_iterator it = main_code.begin(); | 326 for (char c : main_code) { |
327 it != main_code.end(); ++it) { | 327 if (!base::IsAsciiAlpha(c)) |
328 if (!base::IsAsciiAlpha(*it)) | |
329 return false; | 328 return false; |
330 } | 329 } |
331 | 330 |
332 if (chunks.size() == 1) | 331 if (chunks.size() == 1) |
333 return true; | 332 return true; |
334 | 333 |
335 const std::string& sub_code = chunks[1]; | 334 const base::StringPiece& sub_code = chunks[1]; |
336 | 335 |
337 if (sub_code.size() != 2) | 336 if (sub_code.size() != 2) |
338 return false; | 337 return false; |
339 | 338 |
340 for (std::string::const_iterator it = sub_code.begin(); | 339 for (char c : sub_code) { |
341 it != sub_code.end(); ++it) { | 340 if (!base::IsAsciiAlpha(c)) |
342 if (!base::IsAsciiAlpha(*it)) | |
343 return false; | 341 return false; |
344 } | 342 } |
345 | 343 |
346 return true; | 344 return true; |
347 } | 345 } |
348 | 346 |
349 bool IsSameOrSimilarLanguages(const std::string& page_language, | 347 bool IsSameOrSimilarLanguages(const std::string& page_language, |
350 const std::string& cld_language) { | 348 const std::string& cld_language) { |
351 std::vector<std::string> chunks; | 349 std::vector<std::string> chunks = base::SplitString( |
352 | 350 page_language, "-", base::TRIM_WHITESPACE, base::SPLIT_WANT_ALL); |
353 base::SplitString(page_language, '-', &chunks); | |
354 if (chunks.size() == 0) | 351 if (chunks.size() == 0) |
355 return false; | 352 return false; |
356 std::string page_language_main_part = chunks[0]; | 353 std::string page_language_main_part = chunks[0]; // Need copy. |
357 | 354 |
358 base::SplitString(cld_language, '-', &chunks); | 355 chunks = base::SplitString( |
| 356 cld_language, "-", base::TRIM_WHITESPACE, base::SPLIT_WANT_ALL); |
359 if (chunks.size() == 0) | 357 if (chunks.size() == 0) |
360 return false; | 358 return false; |
361 std::string cld_language_main_part = chunks[0]; | 359 const std::string& cld_language_main_part = chunks[0]; |
362 | 360 |
363 // Language code part of |page_language| is matched to one of |cld_language|. | 361 // Language code part of |page_language| is matched to one of |cld_language|. |
364 // Country code is ignored here. | 362 // Country code is ignored here. |
365 if (page_language_main_part == cld_language_main_part) { | 363 if (page_language_main_part == cld_language_main_part) { |
366 // Languages are matched strictly. Reports false to metrics, but returns | 364 // Languages are matched strictly. Reports false to metrics, but returns |
367 // true. | 365 // true. |
368 translate::ReportSimilarLanguageMatch(false); | 366 translate::ReportSimilarLanguageMatch(false); |
369 return true; | 367 return true; |
370 } | 368 } |
371 | 369 |
(...skipping 20 matching lines...) Expand all Loading... |
392 // distinguish from English, and the language is one of well-known languages | 390 // distinguish from English, and the language is one of well-known languages |
393 // which often provide "en-*" meta information mistakenly. | 391 // which often provide "en-*" meta information mistakenly. |
394 for (size_t i = 0; i < arraysize(kWellKnownCodesOnWrongConfiguration); ++i) { | 392 for (size_t i = 0; i < arraysize(kWellKnownCodesOnWrongConfiguration); ++i) { |
395 if (cld_language == kWellKnownCodesOnWrongConfiguration[i]) | 393 if (cld_language == kWellKnownCodesOnWrongConfiguration[i]) |
396 return true; | 394 return true; |
397 } | 395 } |
398 return false; | 396 return false; |
399 } | 397 } |
400 | 398 |
401 } // namespace translate | 399 } // namespace translate |
OLD | NEW |