OLD | NEW |
1 // Copyright 2016 The Chromium Authors. All rights reserved. | 1 // Copyright 2016 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "components/translate/core/browser/language_model.h" | 5 #include "components/translate/core/browser/language_model.h" |
6 | 6 |
7 #include <algorithm> | 7 #include <algorithm> |
8 #include <map> | 8 #include <map> |
9 #include <set> | 9 #include <set> |
10 | 10 |
11 #include "components/prefs/pref_registry_simple.h" | 11 #include "components/prefs/pref_registry_simple.h" |
12 #include "components/prefs/pref_service.h" | 12 #include "components/prefs/pref_service.h" |
13 #include "components/prefs/scoped_user_pref_update.h" | 13 #include "components/prefs/scoped_user_pref_update.h" |
14 | 14 |
15 namespace translate { | 15 namespace translate { |
16 | 16 |
17 namespace { | 17 namespace { |
18 | 18 |
19 const char kLanguageModelCounters[] = "language_model_counters"; | 19 const char kLanguageModelCounters[] = "language_model_counters"; |
20 | 20 |
21 const int kMaxCountersSum = 1000; | 21 const int kMaxCountersSum = 1000; |
| 22 const int kMinCountersSum = 100; |
22 const float kCutoffRatio = 0.005f; | 23 const float kCutoffRatio = 0.005f; |
23 const float kDiscountFactor = 0.75f; | 24 const float kDiscountFactor = 0.75f; |
24 | 25 |
25 // Gets the sum of the counter for all languages in the model. | 26 // Gets the sum of the counter for all languages in the model. |
26 int GetCountersSum(const base::DictionaryValue& dict) { | 27 int GetCountersSum(const base::DictionaryValue& dict) { |
27 int sum = 0; | 28 int sum = 0; |
28 int counter_value = 0; | 29 int counter_value = 0; |
29 for (base::DictionaryValue::Iterator itr(dict); !itr.IsAtEnd(); | 30 for (base::DictionaryValue::Iterator itr(dict); !itr.IsAtEnd(); |
30 itr.Advance()) { | 31 itr.Advance()) { |
31 if (itr.value().GetAsInteger(&counter_value)) | 32 if (itr.value().GetAsInteger(&counter_value)) |
(...skipping 21 matching lines...) Expand all Loading... |
53 } | 54 } |
54 | 55 |
55 for (const std::string& lang_to_remove : remove_keys) | 56 for (const std::string& lang_to_remove : remove_keys) |
56 dict->Remove(lang_to_remove, nullptr); | 57 dict->Remove(lang_to_remove, nullptr); |
57 } | 58 } |
58 | 59 |
59 // Transforms the counters from prefs into a list of LanguageInfo structs. | 60 // Transforms the counters from prefs into a list of LanguageInfo structs. |
60 std::vector<LanguageModel::LanguageInfo> GetAllLanguages( | 61 std::vector<LanguageModel::LanguageInfo> GetAllLanguages( |
61 const base::DictionaryValue& dict) { | 62 const base::DictionaryValue& dict) { |
62 | 63 |
63 std::vector<LanguageModel::LanguageInfo> top_languages; | |
64 int counters_sum = GetCountersSum(dict); | 64 int counters_sum = GetCountersSum(dict); |
65 | 65 |
| 66 // If the sample is not large enough yet, pretend there are no top languages. |
| 67 if (counters_sum < kMinCountersSum) |
| 68 return std::vector<LanguageModel::LanguageInfo>(); |
| 69 |
| 70 std::vector<LanguageModel::LanguageInfo> top_languages; |
66 int counter_value = 0; | 71 int counter_value = 0; |
67 for (base::DictionaryValue::Iterator itr(dict); !itr.IsAtEnd(); | 72 for (base::DictionaryValue::Iterator itr(dict); !itr.IsAtEnd(); |
68 itr.Advance()) { | 73 itr.Advance()) { |
69 if (!itr.value().GetAsInteger(&counter_value)) | 74 if (!itr.value().GetAsInteger(&counter_value)) |
70 continue; | 75 continue; |
71 top_languages.push_back( | 76 top_languages.push_back( |
72 {itr.key(), static_cast<float>(counter_value) / counters_sum}); | 77 {itr.key(), static_cast<float>(counter_value) / counters_sum}); |
73 } | 78 } |
74 return top_languages; | 79 return top_languages; |
75 } | 80 } |
(...skipping 20 matching lines...) Expand all Loading... |
96 return a.frequency > b.frequency; | 101 return a.frequency > b.frequency; |
97 }); | 102 }); |
98 | 103 |
99 return top_languages; | 104 return top_languages; |
100 } | 105 } |
101 | 106 |
102 float LanguageModel::GetLanguageFrequency( | 107 float LanguageModel::GetLanguageFrequency( |
103 const std::string& language_code) const { | 108 const std::string& language_code) const { |
104 const base::DictionaryValue* dict = | 109 const base::DictionaryValue* dict = |
105 pref_service_->GetDictionary(kLanguageModelCounters); | 110 pref_service_->GetDictionary(kLanguageModelCounters); |
| 111 int counters_sum = GetCountersSum(*dict); |
| 112 // If the sample is not large enough yet, pretend there are no top languages. |
| 113 if (counters_sum < kMinCountersSum) |
| 114 return 0; |
| 115 |
106 int counter_value = 0; | 116 int counter_value = 0; |
107 // If the key |language_code| does not exist, |counter_value| stays 0. | 117 // If the key |language_code| does not exist, |counter_value| stays 0. |
108 dict->GetInteger(language_code, &counter_value); | 118 dict->GetInteger(language_code, &counter_value); |
109 | 119 |
110 int counters_sum = GetCountersSum(*dict); | |
111 | |
112 return static_cast<float>(counter_value) / counters_sum; | 120 return static_cast<float>(counter_value) / counters_sum; |
113 } | 121 } |
114 | 122 |
115 void LanguageModel::OnPageVisited(const std::string& language_code) { | 123 void LanguageModel::OnPageVisited(const std::string& language_code) { |
116 DictionaryPrefUpdate update(pref_service_, kLanguageModelCounters); | 124 DictionaryPrefUpdate update(pref_service_, kLanguageModelCounters); |
117 base::DictionaryValue* dict = update.Get(); | 125 base::DictionaryValue* dict = update.Get(); |
118 int counter_value = 0; | 126 int counter_value = 0; |
119 // If the key |language_code| does not exist, |counter_value| stays 0. | 127 // If the key |language_code| does not exist, |counter_value| stays 0. |
120 dict->GetInteger(language_code, &counter_value); | 128 dict->GetInteger(language_code, &counter_value); |
121 dict->SetInteger(language_code, counter_value + 1); | 129 dict->SetInteger(language_code, counter_value + 1); |
122 | 130 |
123 if (GetCountersSum(*dict) > kMaxCountersSum) | 131 if (GetCountersSum(*dict) > kMaxCountersSum) |
124 DiscountAndCleanCounters(dict); | 132 DiscountAndCleanCounters(dict); |
125 } | 133 } |
126 | 134 |
127 } // namespace translate | 135 } // namespace translate |
OLD | NEW |