OLD | NEW |
(Empty) | |
| 1 // Copyright 2016 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. |
| 4 |
| 5 #include "components/translate/core/browser/language_model.h" |
| 6 |
| 7 #include <algorithm> |
| 8 #include <map> |
| 9 #include <set> |
| 10 |
| 11 #include "components/prefs/pref_registry_simple.h" |
| 12 #include "components/prefs/pref_service.h" |
| 13 #include "components/prefs/scoped_user_pref_update.h" |
| 14 |
| 15 namespace translate { |
| 16 |
| 17 namespace { |
| 18 |
| 19 const char kLanguageModelCounters[] = "language_model_counters"; |
| 20 |
| 21 const int kMaxCountersSum = 1000; |
| 22 const float kCutoffRatio = 0.005f; |
| 23 const float kDiscountFactor = 0.75f; |
| 24 |
| 25 // Gets the sum of the counter for all languages in the model. |
| 26 int GetCountersSum(const base::DictionaryValue& dict) { |
| 27 int sum = 0; |
| 28 int counter_value = 0; |
| 29 for (base::DictionaryValue::Iterator itr(dict); !itr.IsAtEnd(); |
| 30 itr.Advance()) { |
| 31 if (itr.value().GetAsInteger(&counter_value)) |
| 32 sum += counter_value; |
| 33 } |
| 34 return sum; |
| 35 } |
| 36 |
| 37 // Removes languages with small counter values and discount remaining counters. |
| 38 void DiscountAndCleanCounters(base::DictionaryValue* dict) { |
| 39 std::set<std::string> remove_keys; |
| 40 |
| 41 int counter_value = 0; |
| 42 for (base::DictionaryValue::Iterator itr(*dict); !itr.IsAtEnd(); |
| 43 itr.Advance()) { |
| 44 // Remove languages with invalid or small values. |
| 45 if (!itr.value().GetAsInteger(&counter_value) || |
| 46 counter_value < (kCutoffRatio * kMaxCountersSum)) { |
| 47 remove_keys.insert(itr.key()); |
| 48 continue; |
| 49 } |
| 50 |
| 51 // Discount the value. |
| 52 dict->SetInteger(itr.key(), counter_value * kDiscountFactor); |
| 53 } |
| 54 |
| 55 for (const std::string& lang_to_remove : remove_keys) |
| 56 dict->Remove(lang_to_remove, nullptr); |
| 57 } |
| 58 |
| 59 // Transforms the counters from prefs into a list of LanguageInfo structs. |
| 60 std::vector<LanguageModel::LanguageInfo> GetAllLanguages( |
| 61 const base::DictionaryValue& dict) { |
| 62 |
| 63 std::vector<LanguageModel::LanguageInfo> top_languages; |
| 64 int counters_sum = GetCountersSum(dict); |
| 65 |
| 66 int counter_value = 0; |
| 67 for (base::DictionaryValue::Iterator itr(dict); !itr.IsAtEnd(); |
| 68 itr.Advance()) { |
| 69 if (!itr.value().GetAsInteger(&counter_value)) |
| 70 continue; |
| 71 top_languages.push_back( |
| 72 {itr.key(), static_cast<float>(counter_value) / counters_sum}); |
| 73 } |
| 74 return top_languages; |
| 75 } |
| 76 |
| 77 } // namespace |
| 78 |
| 79 LanguageModel::LanguageModel(PrefService* pref_service) |
| 80 : pref_service_(pref_service) {} |
| 81 |
| 82 LanguageModel::~LanguageModel() = default; |
| 83 |
| 84 // static |
| 85 void LanguageModel::RegisterProfilePrefs(PrefRegistrySimple* registry) { |
| 86 registry->RegisterDictionaryPref(kLanguageModelCounters); |
| 87 } |
| 88 |
| 89 std::vector<LanguageModel::LanguageInfo> LanguageModel::GetTopLanguages() |
| 90 const { |
| 91 std::vector<LanguageModel::LanguageInfo> top_languages = |
| 92 GetAllLanguages(*pref_service_->GetDictionary(kLanguageModelCounters)); |
| 93 |
| 94 std::sort(top_languages.begin(), top_languages.end(), |
| 95 [](LanguageModel::LanguageInfo a, LanguageModel::LanguageInfo b) { |
| 96 return a.frequency > b.frequency; |
| 97 }); |
| 98 |
| 99 return top_languages; |
| 100 } |
| 101 |
| 102 float LanguageModel::GetLanguageFrequency( |
| 103 const std::string& language_code) const { |
| 104 const base::DictionaryValue* dict = |
| 105 pref_service_->GetDictionary(kLanguageModelCounters); |
| 106 int counter_value = 0; |
| 107 // If the key |language_code| does not exist, |counter_value| stays 0. |
| 108 dict->GetInteger(language_code, &counter_value); |
| 109 |
| 110 int counters_sum = GetCountersSum(*dict); |
| 111 |
| 112 return static_cast<float>(counter_value) / counters_sum; |
| 113 } |
| 114 |
| 115 void LanguageModel::OnPageVisited(const std::string& language_code) { |
| 116 DictionaryPrefUpdate update(pref_service_, kLanguageModelCounters); |
| 117 base::DictionaryValue* dict = update.Get(); |
| 118 int counter_value = 0; |
| 119 // If the key |language_code| does not exist, |counter_value| stays 0. |
| 120 dict->GetInteger(language_code, &counter_value); |
| 121 dict->SetInteger(language_code, counter_value + 1); |
| 122 |
| 123 if (GetCountersSum(*dict) > kMaxCountersSum) |
| 124 DiscountAndCleanCounters(dict); |
| 125 } |
| 126 |
| 127 } // namespace translate |
OLD | NEW |