OLD | NEW |
---|---|
(Empty) | |
1 // Copyright 2016 The Chromium Authors. All rights reserved. | |
2 // Use of this source code is governed by a BSD-style license that can be | |
3 // found in the LICENSE file. | |
4 | |
5 #include "components/translate/core/browser/language_model.h" | |
6 | |
7 #include <algorithm> | |
8 #include <map> | |
9 #include <set> | |
10 | |
11 #include "components/prefs/pref_registry_simple.h" | |
12 #include "components/prefs/pref_service.h" | |
13 #include "components/prefs/scoped_user_pref_update.h" | |
14 #include "components/translate/core/language_detection/language_detection_util.h " | |
droger
2016/10/05 15:39:59
Could we remove this dep?
jkrcal
2016/10/06 08:50:58
True, I can!
| |
15 | |
16 namespace translate { | |
17 | |
18 namespace { | |
19 | |
20 const char kLanguageModelCounters[] = "language_model_counters"; | |
21 | |
22 const int kMaxCountersSum = 1000; | |
23 const float kCutoffRatio = 0.005; | |
24 const float kDiscountFactor = 0.75; | |
25 | |
26 // Get the sum of the counter for all languages in the model. | |
droger
2016/10/05 15:39:59
Gets
jkrcal
2016/10/06 08:50:58
I meant it as an imperative. You are probably righ
| |
27 int GetCountersSum(const base::DictionaryValue* dict) { | |
28 int sum = 0; | |
29 int counter_value = 0; | |
30 for (base::DictionaryValue::Iterator itr(*dict); !itr.IsAtEnd(); | |
31 itr.Advance()) { | |
32 if (itr.value().GetAsInteger(&counter_value)) | |
33 sum += counter_value; | |
34 } | |
35 return sum; | |
36 } | |
37 | |
38 // Remove languages with small counter values and discount remaining counters. | |
droger
2016/10/05 15:40:00
Removes
jkrcal
2016/10/06 08:50:58
Done.
| |
39 void DiscountAndCleanCounters(base::DictionaryValue* dict) { | |
40 std::map<std::string, int> new_values; | |
droger
2016/10/05 15:39:59
unused variable
jkrcal
2016/10/06 08:50:58
Right, thanks! I am surprised the compiler does no
| |
41 std::set<std::string> remove_keys; | |
42 | |
43 int counter_value = 0; | |
44 for (base::DictionaryValue::Iterator itr(*dict); !itr.IsAtEnd(); | |
45 itr.Advance()) { | |
46 // Remove languages with invalid or small values. | |
47 if (!itr.value().GetAsInteger(&counter_value) || | |
48 counter_value < (kCutoffRatio * kMaxCountersSum)) { | |
49 remove_keys.insert(itr.key()); | |
50 continue; | |
51 } | |
52 | |
53 // Discount the value. | |
54 dict->SetInteger(itr.key(), counter_value * kDiscountFactor); | |
55 } | |
56 | |
57 for (const std::string& lang_to_remove : remove_keys) | |
58 dict->Remove(lang_to_remove, nullptr); | |
59 } | |
60 | |
61 // Transform the counters from prefs into a list of LanguageInfo structs. | |
droger
2016/10/05 15:39:59
Transforms
jkrcal
2016/10/06 08:50:58
Done.
| |
62 std::vector<LanguageModel::LanguageInfo> GetAllLanguages( | |
63 const base::DictionaryValue* dict) { | |
droger
2016/10/05 15:40:00
nit: not 100% sure, but I think the recommended st
jkrcal
2016/10/06 08:50:58
Thanks, you are right!
| |
64 | |
65 std::vector<LanguageModel::LanguageInfo> top_languages; | |
66 int counters_sum = GetCountersSum(dict); | |
67 | |
68 int counter_value = 0; | |
69 for (base::DictionaryValue::Iterator itr(*dict); !itr.IsAtEnd(); | |
70 itr.Advance()) { | |
71 if (!itr.value().GetAsInteger(&counter_value)) | |
72 continue; | |
73 top_languages.push_back( | |
74 {itr.key(), static_cast<float>(counter_value) / counters_sum}); | |
75 } | |
76 return top_languages; | |
77 } | |
78 | |
79 } // namespace | |
80 | |
81 LanguageModel::LanguageModel(PrefService* pref_service) | |
82 : pref_service_(pref_service) {} | |
83 | |
84 LanguageModel::~LanguageModel() = default; | |
85 | |
86 // static | |
87 void LanguageModel::RegisterProfilePrefs(PrefRegistrySimple* registry) { | |
88 registry->RegisterDictionaryPref(kLanguageModelCounters); | |
89 } | |
90 | |
91 std::vector<LanguageModel::LanguageInfo> LanguageModel::GetTopLanguages() | |
92 const { | |
93 std::vector<LanguageModel::LanguageInfo> top_languages = | |
94 GetAllLanguages(pref_service_->GetDictionary(kLanguageModelCounters)); | |
95 | |
96 std::sort(top_languages.begin(), top_languages.end(), | |
97 [](LanguageModel::LanguageInfo a, LanguageModel::LanguageInfo b) { | |
98 return a.frequency > b.frequency; | |
99 }); | |
100 | |
101 return top_languages; | |
102 } | |
103 | |
104 float LanguageModel::GetLanguageFrequency( | |
105 const std::string& language_code) const { | |
106 const base::DictionaryValue* dict = | |
107 pref_service_->GetDictionary(kLanguageModelCounters); | |
108 int counter_value = 0; | |
109 // If the key |language_code| does not exist, |counter_value| stays 0. | |
110 dict->GetInteger(language_code, &counter_value); | |
111 | |
112 int counters_sum = GetCountersSum(dict); | |
113 | |
114 return static_cast<float>(counter_value) / counters_sum; | |
115 } | |
116 | |
117 void LanguageModel::OnPageVisited(const std::string& language_code) { | |
118 DictionaryPrefUpdate update(pref_service_, kLanguageModelCounters); | |
119 base::DictionaryValue* dict = update.Get(); | |
120 int counter_value = 0; | |
121 // If the key |language_code| does not exist, |counter_value| stays 0. | |
122 dict->GetInteger(language_code, &counter_value); | |
123 dict->SetInteger(language_code, counter_value + 1); | |
124 | |
125 if (GetCountersSum(dict) > kMaxCountersSum) | |
126 DiscountAndCleanCounters(dict); | |
127 } | |
128 | |
129 } // namespace translate | |
OLD | NEW |