Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(93)

Side by Side Diff: components/translate/core/browser/language_model.cc

Issue 2391383005: [LanguageModel] Return top languages only with a reasonable sample set (Closed)
Patch Set: Fix the constructor Created 4 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright 2016 The Chromium Authors. All rights reserved. 1 // Copyright 2016 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "components/translate/core/browser/language_model.h" 5 #include "components/translate/core/browser/language_model.h"
6 6
7 #include <algorithm> 7 #include <algorithm>
8 #include <map> 8 #include <map>
9 #include <set> 9 #include <set>
10 10
11 #include "components/prefs/pref_registry_simple.h" 11 #include "components/prefs/pref_registry_simple.h"
12 #include "components/prefs/pref_service.h" 12 #include "components/prefs/pref_service.h"
13 #include "components/prefs/scoped_user_pref_update.h" 13 #include "components/prefs/scoped_user_pref_update.h"
14 14
15 namespace translate { 15 namespace translate {
16 16
17 namespace { 17 namespace {
18 18
19 const char kLanguageModelCounters[] = "language_model_counters"; 19 const char kLanguageModelCounters[] = "language_model_counters";
20 20
21 const int kMaxCountersSum = 1000; 21 const int kMaxCountersSum = 1000;
22 const int kMinCountersSum = 100;
22 const float kCutoffRatio = 0.005f; 23 const float kCutoffRatio = 0.005f;
23 const float kDiscountFactor = 0.75f; 24 const float kDiscountFactor = 0.75f;
24 25
25 // Gets the sum of the counter for all languages in the model. 26 // Gets the sum of the counter for all languages in the model.
26 int GetCountersSum(const base::DictionaryValue& dict) { 27 int GetCountersSum(const base::DictionaryValue& dict) {
27 int sum = 0; 28 int sum = 0;
28 int counter_value = 0; 29 int counter_value = 0;
29 for (base::DictionaryValue::Iterator itr(dict); !itr.IsAtEnd(); 30 for (base::DictionaryValue::Iterator itr(dict); !itr.IsAtEnd();
30 itr.Advance()) { 31 itr.Advance()) {
31 if (itr.value().GetAsInteger(&counter_value)) 32 if (itr.value().GetAsInteger(&counter_value))
(...skipping 21 matching lines...) Expand all
53 } 54 }
54 55
55 for (const std::string& lang_to_remove : remove_keys) 56 for (const std::string& lang_to_remove : remove_keys)
56 dict->Remove(lang_to_remove, nullptr); 57 dict->Remove(lang_to_remove, nullptr);
57 } 58 }
58 59
59 // Transforms the counters from prefs into a list of LanguageInfo structs. 60 // Transforms the counters from prefs into a list of LanguageInfo structs.
60 std::vector<LanguageModel::LanguageInfo> GetAllLanguages( 61 std::vector<LanguageModel::LanguageInfo> GetAllLanguages(
61 const base::DictionaryValue& dict) { 62 const base::DictionaryValue& dict) {
62 63
63 std::vector<LanguageModel::LanguageInfo> top_languages;
64 int counters_sum = GetCountersSum(dict); 64 int counters_sum = GetCountersSum(dict);
65 65
66 // If the sample is not large enough yet, pretend there are no top languages.
67 if (counters_sum < kMinCountersSum)
68 return std::vector<LanguageModel::LanguageInfo>();
69
70 std::vector<LanguageModel::LanguageInfo> top_languages;
66 int counter_value = 0; 71 int counter_value = 0;
67 for (base::DictionaryValue::Iterator itr(dict); !itr.IsAtEnd(); 72 for (base::DictionaryValue::Iterator itr(dict); !itr.IsAtEnd();
68 itr.Advance()) { 73 itr.Advance()) {
69 if (!itr.value().GetAsInteger(&counter_value)) 74 if (!itr.value().GetAsInteger(&counter_value))
70 continue; 75 continue;
71 top_languages.push_back( 76 top_languages.push_back(
72 {itr.key(), static_cast<float>(counter_value) / counters_sum}); 77 {itr.key(), static_cast<float>(counter_value) / counters_sum});
73 } 78 }
74 return top_languages; 79 return top_languages;
75 } 80 }
(...skipping 20 matching lines...) Expand all
96 return a.frequency > b.frequency; 101 return a.frequency > b.frequency;
97 }); 102 });
98 103
99 return top_languages; 104 return top_languages;
100 } 105 }
101 106
102 float LanguageModel::GetLanguageFrequency( 107 float LanguageModel::GetLanguageFrequency(
103 const std::string& language_code) const { 108 const std::string& language_code) const {
104 const base::DictionaryValue* dict = 109 const base::DictionaryValue* dict =
105 pref_service_->GetDictionary(kLanguageModelCounters); 110 pref_service_->GetDictionary(kLanguageModelCounters);
111 int counters_sum = GetCountersSum(*dict);
112 // If the sample is not large enough yet, pretend there are no top languages.
113 if (counters_sum < kMinCountersSum)
114 return 0;
115
106 int counter_value = 0; 116 int counter_value = 0;
107 // If the key |language_code| does not exist, |counter_value| stays 0. 117 // If the key |language_code| does not exist, |counter_value| stays 0.
108 dict->GetInteger(language_code, &counter_value); 118 dict->GetInteger(language_code, &counter_value);
109 119
110 int counters_sum = GetCountersSum(*dict);
111
112 return static_cast<float>(counter_value) / counters_sum; 120 return static_cast<float>(counter_value) / counters_sum;
113 } 121 }
114 122
115 void LanguageModel::OnPageVisited(const std::string& language_code) { 123 void LanguageModel::OnPageVisited(const std::string& language_code) {
116 DictionaryPrefUpdate update(pref_service_, kLanguageModelCounters); 124 DictionaryPrefUpdate update(pref_service_, kLanguageModelCounters);
117 base::DictionaryValue* dict = update.Get(); 125 base::DictionaryValue* dict = update.Get();
118 int counter_value = 0; 126 int counter_value = 0;
119 // If the key |language_code| does not exist, |counter_value| stays 0. 127 // If the key |language_code| does not exist, |counter_value| stays 0.
120 dict->GetInteger(language_code, &counter_value); 128 dict->GetInteger(language_code, &counter_value);
121 dict->SetInteger(language_code, counter_value + 1); 129 dict->SetInteger(language_code, counter_value + 1);
122 130
123 if (GetCountersSum(*dict) > kMaxCountersSum) 131 if (GetCountersSum(*dict) > kMaxCountersSum)
124 DiscountAndCleanCounters(dict); 132 DiscountAndCleanCounters(dict);
125 } 133 }
126 134
127 } // namespace translate 135 } // namespace translate
OLDNEW
« no previous file with comments | « components/translate/core/browser/language_model.h ('k') | components/translate/core/browser/language_model_unittest.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698