Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(168)

Side by Side Diff: components/translate/core/browser/language_model.cc

Issue 2396783002: Add LanguageModel, a keyed service that collects language info from CLD. (Closed)
Patch Set: Created 4 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 // Copyright 2014 The Chromium Authors. All rights reserved.
droger 2016/10/05 13:11:32 2016
jkrcal 2016/10/05 15:14:46 Done.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "components/translate/core/browser/language_model.h"
6
7 #include <algorithm>
8 #include <map>
9 #include <set>
10
11 #include "components/prefs/pref_registry_simple.h"
12 #include "components/prefs/pref_service.h"
13 #include "components/prefs/scoped_user_pref_update.h"
14 #include "components/translate/core/language_detection/language_detection_util.h "
15
16 namespace translate {
17
18 namespace {
19
20 const char kLanguageModelCounters[] = "language_model_counters";
21
22 const int kMaxCountersSum = 1000;
23 const float kCutoffRatio = 0.005;
24 const float kDiscountFactor = 0.75;
25
26 // Get the sum of the counter for all languages in the model.
27 int GetCountersSum(const base::DictionaryValue* dict) {
28 int sum = 0;
29 int counter_value = 0;
30 for (base::DictionaryValue::Iterator itr(*dict); !itr.IsAtEnd();
31 itr.Advance()) {
32 if (itr.value().GetAsInteger(&counter_value))
33 sum += counter_value;
34 }
35 return sum;
36 }
37
38 // Remove languages with small counter values and discount remaining counters.
39 void DiscountAndCleanCounters(base::DictionaryValue* dict) {
40 std::map<std::string, int> new_values;
41 std::set<std::string> remove_keys;
42
43 int counter_value = 0;
44 for (base::DictionaryValue::Iterator itr(*dict); !itr.IsAtEnd();
45 itr.Advance()) {
46 // Remove languages with invalid or small values.
47 if (!itr.value().GetAsInteger(&counter_value) ||
48 counter_value < (kCutoffRatio * kMaxCountersSum)) {
49 remove_keys.insert(itr.key());
50 continue;
51 }
52
53 // Discount the value.
54 dict->SetInteger(itr.key(), counter_value * kDiscountFactor);
55 }
56
57 for (const std::string lang_to_remove : remove_keys)
Bernhard Bauer 2016/10/05 14:48:16 const ref
jkrcal 2016/10/05 15:14:46 Done.
58 dict->Remove(lang_to_remove, nullptr);
59 }
60
61 // Transform the counters from prefs into a list of LanguageInfo structs.
62 std::vector<LanguageModel::LanguageInfo> GetAllLanguages(
63 const base::DictionaryValue* dict) {
64
65 std::vector<LanguageModel::LanguageInfo> top_languages;
66 int counters_sum = GetCountersSum(dict);
67
68 int counter_value = 0;
69 for (base::DictionaryValue::Iterator itr(*dict); !itr.IsAtEnd();
70 itr.Advance()) {
71 if (!itr.value().GetAsInteger(&counter_value))
72 continue;
73 top_languages.push_back(
74 {itr.key(), static_cast<float>(counter_value) / counters_sum});
75 }
76 return top_languages;
77 }
78
79 } // namespace
80
81 LanguageModel::LanguageModel(PrefService* pref_service)
82 : pref_service_(pref_service) {}
83
84 LanguageModel::~LanguageModel() = default;
85
86 // static
87 void LanguageModel::RegisterProfilePrefs(PrefRegistrySimple* registry) {
88 registry->RegisterDictionaryPref(kLanguageModelCounters);
89 }
90
91 std::vector<LanguageModel::LanguageInfo> LanguageModel::GetTopLanguages()
92 const {
93 std::vector<LanguageModel::LanguageInfo> top_languages =
94 GetAllLanguages(pref_service_->GetDictionary(kLanguageModelCounters));
95
96 std::sort(top_languages.begin(), top_languages.end(),
97 [](LanguageModel::LanguageInfo a, LanguageModel::LanguageInfo b) {
98 return a.frequency > b.frequency;
99 });
100
101 return top_languages;
102 }
103
104 float LanguageModel::GetLanguageFrequency(
105 const std::string& language_code) const {
106 const base::DictionaryValue* dict =
107 pref_service_->GetDictionary(kLanguageModelCounters);
108 int counter_value = 0;
109 // If the key |language_code| does not exist, |counter_value| stays 0.
110 dict->GetInteger(language_code, &counter_value);
111
112 int counters_sum = GetCountersSum(dict);
113
114 return static_cast<float>(counter_value) / counters_sum;
115 }
116
117 void LanguageModel::OnPageVisited(const std::string& language_code) {
118 DictionaryPrefUpdate update(pref_service_, kLanguageModelCounters);
119 base::DictionaryValue* dict = update.Get();
120 int counter_value = 0;
121 // If the key |language_code| does not exist, |counter_value| stays 0.
122 dict->GetInteger(language_code, &counter_value);
123 dict->SetInteger(language_code, counter_value + 1);
124
125 if (GetCountersSum(dict) > kMaxCountersSum)
126 DiscountAndCleanCounters(dict);
127 }
128
129 } // namespace translate
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698