Chromium Code Reviews| OLD | NEW |
|---|---|
| (Empty) | |
| 1 // Copyright 2014 The Chromium Authors. All rights reserved. | |
|
droger
2016/10/05 13:11:32
2016
jkrcal
2016/10/05 15:14:46
Done.
| |
| 2 // Use of this source code is governed by a BSD-style license that can be | |
| 3 // found in the LICENSE file. | |
| 4 | |
| 5 #include "components/translate/core/browser/language_model.h" | |
| 6 | |
| 7 #include <algorithm> | |
| 8 #include <map> | |
| 9 #include <set> | |
| 10 | |
| 11 #include "components/prefs/pref_registry_simple.h" | |
| 12 #include "components/prefs/pref_service.h" | |
| 13 #include "components/prefs/scoped_user_pref_update.h" | |
| 14 #include "components/translate/core/language_detection/language_detection_util.h " | |
| 15 | |
| 16 namespace translate { | |
| 17 | |
| 18 namespace { | |
| 19 | |
| 20 const char kLanguageModelCounters[] = "language_model_counters"; | |
| 21 | |
| 22 const int kMaxCountersSum = 1000; | |
| 23 const float kCutoffRatio = 0.005; | |
| 24 const float kDiscountFactor = 0.75; | |
| 25 | |
| 26 // Get the sum of the counter for all languages in the model. | |
| 27 int GetCountersSum(const base::DictionaryValue* dict) { | |
| 28 int sum = 0; | |
| 29 int counter_value = 0; | |
| 30 for (base::DictionaryValue::Iterator itr(*dict); !itr.IsAtEnd(); | |
| 31 itr.Advance()) { | |
| 32 if (itr.value().GetAsInteger(&counter_value)) | |
| 33 sum += counter_value; | |
| 34 } | |
| 35 return sum; | |
| 36 } | |
| 37 | |
| 38 // Remove languages with small counter values and discount remaining counters. | |
| 39 void DiscountAndCleanCounters(base::DictionaryValue* dict) { | |
| 40 std::map<std::string, int> new_values; | |
| 41 std::set<std::string> remove_keys; | |
| 42 | |
| 43 int counter_value = 0; | |
| 44 for (base::DictionaryValue::Iterator itr(*dict); !itr.IsAtEnd(); | |
| 45 itr.Advance()) { | |
| 46 // Remove languages with invalid or small values. | |
| 47 if (!itr.value().GetAsInteger(&counter_value) || | |
| 48 counter_value < (kCutoffRatio * kMaxCountersSum)) { | |
| 49 remove_keys.insert(itr.key()); | |
| 50 continue; | |
| 51 } | |
| 52 | |
| 53 // Discount the value. | |
| 54 dict->SetInteger(itr.key(), counter_value * kDiscountFactor); | |
| 55 } | |
| 56 | |
| 57 for (const std::string lang_to_remove : remove_keys) | |
|
Bernhard Bauer
2016/10/05 14:48:16
const ref
jkrcal
2016/10/05 15:14:46
Done.
| |
| 58 dict->Remove(lang_to_remove, nullptr); | |
| 59 } | |
| 60 | |
| 61 // Transform the counters from prefs into a list of LanguageInfo structs. | |
| 62 std::vector<LanguageModel::LanguageInfo> GetAllLanguages( | |
| 63 const base::DictionaryValue* dict) { | |
| 64 | |
| 65 std::vector<LanguageModel::LanguageInfo> top_languages; | |
| 66 int counters_sum = GetCountersSum(dict); | |
| 67 | |
| 68 int counter_value = 0; | |
| 69 for (base::DictionaryValue::Iterator itr(*dict); !itr.IsAtEnd(); | |
| 70 itr.Advance()) { | |
| 71 if (!itr.value().GetAsInteger(&counter_value)) | |
| 72 continue; | |
| 73 top_languages.push_back( | |
| 74 {itr.key(), static_cast<float>(counter_value) / counters_sum}); | |
| 75 } | |
| 76 return top_languages; | |
| 77 } | |
| 78 | |
| 79 } // namespace | |
| 80 | |
| 81 LanguageModel::LanguageModel(PrefService* pref_service) | |
| 82 : pref_service_(pref_service) {} | |
| 83 | |
| 84 LanguageModel::~LanguageModel() = default; | |
| 85 | |
| 86 // static | |
| 87 void LanguageModel::RegisterProfilePrefs(PrefRegistrySimple* registry) { | |
| 88 registry->RegisterDictionaryPref(kLanguageModelCounters); | |
| 89 } | |
| 90 | |
| 91 std::vector<LanguageModel::LanguageInfo> LanguageModel::GetTopLanguages() | |
| 92 const { | |
| 93 std::vector<LanguageModel::LanguageInfo> top_languages = | |
| 94 GetAllLanguages(pref_service_->GetDictionary(kLanguageModelCounters)); | |
| 95 | |
| 96 std::sort(top_languages.begin(), top_languages.end(), | |
| 97 [](LanguageModel::LanguageInfo a, LanguageModel::LanguageInfo b) { | |
| 98 return a.frequency > b.frequency; | |
| 99 }); | |
| 100 | |
| 101 return top_languages; | |
| 102 } | |
| 103 | |
| 104 float LanguageModel::GetLanguageFrequency( | |
| 105 const std::string& language_code) const { | |
| 106 const base::DictionaryValue* dict = | |
| 107 pref_service_->GetDictionary(kLanguageModelCounters); | |
| 108 int counter_value = 0; | |
| 109 // If the key |language_code| does not exist, |counter_value| stays 0. | |
| 110 dict->GetInteger(language_code, &counter_value); | |
| 111 | |
| 112 int counters_sum = GetCountersSum(dict); | |
| 113 | |
| 114 return static_cast<float>(counter_value) / counters_sum; | |
| 115 } | |
| 116 | |
| 117 void LanguageModel::OnPageVisited(const std::string& language_code) { | |
| 118 DictionaryPrefUpdate update(pref_service_, kLanguageModelCounters); | |
| 119 base::DictionaryValue* dict = update.Get(); | |
| 120 int counter_value = 0; | |
| 121 // If the key |language_code| does not exist, |counter_value| stays 0. | |
| 122 dict->GetInteger(language_code, &counter_value); | |
| 123 dict->SetInteger(language_code, counter_value + 1); | |
| 124 | |
| 125 if (GetCountersSum(dict) > kMaxCountersSum) | |
| 126 DiscountAndCleanCounters(dict); | |
| 127 } | |
| 128 | |
| 129 } // namespace translate | |
| OLD | NEW |