Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(39)

Side by Side Diff: components/ntp_snippets/ntp_snippet.cc

Issue 1987333003: [NTP Snippets] Persist snippets in a LevelDB instead of prefs (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: fix test memleaks Created 4 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « components/ntp_snippets/ntp_snippet.h ('k') | components/ntp_snippets/ntp_snippets_constants.h » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2015 The Chromium Authors. All rights reserved. 1 // Copyright 2015 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "components/ntp_snippets/ntp_snippet.h" 5 #include "components/ntp_snippets/ntp_snippet.h"
6 6
7 #include "base/strings/string_number_conversions.h" 7 #include "base/strings/string_number_conversions.h"
8 #include "base/strings/stringprintf.h" 8 #include "base/strings/stringprintf.h"
9 #include "base/values.h" 9 #include "base/values.h"
10 #include "components/ntp_snippets/proto/ntp_snippets.pb.h"
10 11
11 namespace { 12 namespace {
12 13
13 const char kScore[] = "score"; 14 const char kScore[] = "score";
14 const char kContentInfo[] = "contentInfo"; 15 const char kContentInfo[] = "contentInfo";
15 16
16 const char kId[] = "url"; 17 const char kId[] = "url";
17 const char kTitle[] = "title"; 18 const char kTitle[] = "title";
18 const char kSalientImageUrl[] = "thumbnailUrl"; 19 const char kSalientImageUrl[] = "thumbnailUrl";
19 const char kSnippet[] = "snippet"; 20 const char kSnippet[] = "snippet";
20 const char kPublishDate[] = "creationTimestampSec"; 21 const char kPublishDate[] = "creationTimestampSec";
21 const char kExpiryDate[] = "expiryTimestampSec"; 22 const char kExpiryDate[] = "expiryTimestampSec";
22 const char kSiteTitle[] = "sourceName"; 23 const char kSiteTitle[] = "sourceName";
23 const char kPublisherData[] = "publisherData"; 24 const char kPublisherData[] = "publisherData";
24 const char kCorpusId[] = "corpusId"; 25 const char kCorpusId[] = "corpusId";
25 const char kSourceCorpusInfo[] = "sourceCorpusInfo"; 26 const char kSourceCorpusInfo[] = "sourceCorpusInfo";
26 const char kAmpUrl[] = "ampUrl"; 27 const char kAmpUrl[] = "ampUrl";
27 28
28 } // namespace 29 } // namespace
29 30
30 namespace ntp_snippets { 31 namespace ntp_snippets {
31 32
32 NTPSnippet::NTPSnippet(const std::string& id) 33 NTPSnippet::NTPSnippet(const std::string& id)
33 : id_(id), score_(0), best_source_index_(0) {} 34 : id_(id), score_(0), is_discarded_(false), best_source_index_(0) {}
34 35
35 NTPSnippet::~NTPSnippet() {} 36 NTPSnippet::~NTPSnippet() {}
36 37
37 // static 38 // static
38 std::unique_ptr<NTPSnippet> NTPSnippet::CreateFromDictionary( 39 std::unique_ptr<NTPSnippet> NTPSnippet::CreateFromDictionary(
39 const base::DictionaryValue& dict) { 40 const base::DictionaryValue& dict) {
40 const base::DictionaryValue* content = nullptr; 41 const base::DictionaryValue* content = nullptr;
41 if (!dict.GetDictionary(kContentInfo, &content)) 42 if (!dict.GetDictionary(kContentInfo, &content))
42 return nullptr; 43 return nullptr;
43 44
44 // Need at least the url. 45 // Need at least the id.
45 std::string id; 46 std::string id;
46 if (!content->GetString(kId, &id)) 47 if (!content->GetString(kId, &id) || id.empty())
47 return nullptr; 48 return nullptr;
48 49
49 std::unique_ptr<NTPSnippet> snippet(new NTPSnippet(id)); 50 std::unique_ptr<NTPSnippet> snippet(new NTPSnippet(id));
50 51
51 std::string title; 52 std::string title;
52 if (content->GetString(kTitle, &title)) 53 if (content->GetString(kTitle, &title))
53 snippet->set_title(title); 54 snippet->set_title(title);
54 std::string salient_image_url; 55 std::string salient_image_url;
55 if (content->GetString(kSalientImageUrl, &salient_image_url)) 56 if (content->GetString(kSalientImageUrl, &salient_image_url))
56 snippet->set_salient_image_url(GURL(salient_image_url)); 57 snippet->set_salient_image_url(GURL(salient_image_url));
(...skipping 48 matching lines...) Expand 10 before | Expand all | Expand 10 after
105 // Expected to not have AMP url sometimes. 106 // Expected to not have AMP url sometimes.
106 if (dict_value->GetString(kAmpUrl, &amp_url_str)) { 107 if (dict_value->GetString(kAmpUrl, &amp_url_str)) {
107 amp_url = GURL(amp_url_str); 108 amp_url = GURL(amp_url_str);
108 DLOG_IF(WARNING, !amp_url.is_valid()) << "Invalid AMP url " 109 DLOG_IF(WARNING, !amp_url.is_valid()) << "Invalid AMP url "
109 << amp_url_str; 110 << amp_url_str;
110 } 111 }
111 SnippetSource source(corpus_id, site_title, 112 SnippetSource source(corpus_id, site_title,
112 amp_url.is_valid() ? amp_url : GURL()); 113 amp_url.is_valid() ? amp_url : GURL());
113 snippet->add_source(source); 114 snippet->add_source(source);
114 } 115 }
115 // The previous url we have saved can be one of several sources for the
116 // article. For example, the same article can be hosted by nytimes.com,
117 // cnn.com, etc. We need to parse the list of sources for this article and
118 // find the best match. In order of preference:
119 // 1) A source that has url, publisher name, AMP url
120 // 2) A source that has url, publisher name
121 // 3) A source that has url and AMP url, or url only (since we won't show
122 // the snippet to users if the article does not have a publisher name, it
123 // doesn't matter whether the snippet has the AMP url or not)
124 size_t best_source_index = 0;
125 for (size_t i = 0; i < snippet->sources_.size(); ++i) {
126 const SnippetSource& source = snippet->sources_[i];
127 if (!source.publisher_name.empty()) {
128 best_source_index = i;
129 if (!source.amp_url.is_empty()) {
130 // This is the best possible source, stop looking.
131 break;
132 }
133 }
134 }
135 snippet->set_source_index(best_source_index);
136 116
137 if (snippet->sources_.empty()) { 117 if (snippet->sources_.empty()) {
138 DLOG(WARNING) << "No sources found for article " << id; 118 DLOG(WARNING) << "No sources found for article " << id;
139 return nullptr; 119 return nullptr;
140 } 120 }
141 121
122 snippet->FindBestSource();
123
142 double score; 124 double score;
143 if (dict.GetDouble(kScore, &score)) 125 if (dict.GetDouble(kScore, &score))
144 snippet->set_score(score); 126 snippet->set_score(score);
145 127
146 return snippet; 128 return snippet;
147 } 129 }
148 130
149 // static 131 // static
132 std::unique_ptr<NTPSnippet> NTPSnippet::CreateFromProto(
133 const SnippetProto& proto) {
134 // Need at least the id.
135 if (!proto.has_id() || proto.id().empty())
136 return nullptr;
137
138 std::unique_ptr<NTPSnippet> snippet(new NTPSnippet(proto.id()));
139
140 snippet->set_title(proto.title());
141 snippet->set_snippet(proto.snippet());
142 snippet->set_salient_image_url(GURL(proto.salient_image_url()));
143 snippet->set_publish_date(
144 base::Time::FromInternalValue(proto.publish_date()));
145 snippet->set_expiry_date(base::Time::FromInternalValue(proto.expiry_date()));
146 snippet->set_score(proto.score());
147 snippet->set_discarded(proto.discarded());
148
149 for (int i = 0; i < proto.sources_size(); ++i) {
150 const SnippetSourceProto& source_proto = proto.sources(i);
151 GURL url(source_proto.url());
152 if (!url.is_valid()) {
153 // We must at least have a valid source URL.
154 DLOG(WARNING) << "Invalid article url " << source_proto.url();
155 continue;
156 }
157 std::string publisher_name = source_proto.publisher_name();
158 GURL amp_url;
159 if (source_proto.has_amp_url()) {
160 amp_url = GURL(source_proto.amp_url());
161 DLOG_IF(WARNING, !amp_url.is_valid()) << "Invalid AMP URL "
162 << source_proto.amp_url();
163 }
164
165 snippet->add_source(SnippetSource(url, publisher_name, amp_url));
166 }
167
168 if (snippet->sources_.empty()) {
169 DLOG(WARNING) << "No sources found for article " << snippet->id();
170 return nullptr;
171 }
172
173 snippet->FindBestSource();
174
175 return snippet;
176 }
177
178 // static
150 bool NTPSnippet::AddFromListValue(const base::ListValue& list, 179 bool NTPSnippet::AddFromListValue(const base::ListValue& list,
151 PtrVector* snippets) { 180 PtrVector* snippets) {
152 for (const auto& value : list) { 181 for (const auto& value : list) {
153 const base::DictionaryValue* dict = nullptr; 182 const base::DictionaryValue* dict = nullptr;
154 if (!value->GetAsDictionary(&dict)) 183 if (!value->GetAsDictionary(&dict))
155 return false; 184 return false;
156 185
157 std::unique_ptr<NTPSnippet> snippet = CreateFromDictionary(*dict); 186 std::unique_ptr<NTPSnippet> snippet = CreateFromDictionary(*dict);
158 if (!snippet) 187 if (!snippet)
159 return false; 188 return false;
160 189
161 snippets->push_back(std::move(snippet)); 190 snippets->push_back(std::move(snippet));
162 } 191 }
163 return true; 192 return true;
164 } 193 }
165 194
166 std::unique_ptr<base::DictionaryValue> NTPSnippet::ToDictionary() const { 195 SnippetProto NTPSnippet::ToProto() const {
167 std::unique_ptr<base::DictionaryValue> dict(new base::DictionaryValue); 196 SnippetProto result;
168 197
169 dict->SetString(kId, id_); 198 result.set_id(id_);
170 if (!title_.empty()) 199 if (!title_.empty())
171 dict->SetString(kTitle, title_); 200 result.set_title(title_);
201 if (!snippet_.empty())
202 result.set_snippet(snippet_);
172 if (salient_image_url_.is_valid()) 203 if (salient_image_url_.is_valid())
173 dict->SetString(kSalientImageUrl, salient_image_url_.spec()); 204 result.set_salient_image_url(salient_image_url_.spec());
174 if (!snippet_.empty())
175 dict->SetString(kSnippet, snippet_);
176 if (!publish_date_.is_null()) 205 if (!publish_date_.is_null())
177 dict->SetString(kPublishDate, TimeToJsonString(publish_date_)); 206 result.set_publish_date(publish_date_.ToInternalValue());
178 if (!expiry_date_.is_null()) 207 if (!expiry_date_.is_null())
179 dict->SetString(kExpiryDate, TimeToJsonString(expiry_date_)); 208 result.set_expiry_date(expiry_date_.ToInternalValue());
209 result.set_score(score_);
210 result.set_discarded(is_discarded_);
180 211
181 std::unique_ptr<base::ListValue> corpus_infos_list(new base::ListValue);
182 for (const SnippetSource& source : sources_) { 212 for (const SnippetSource& source : sources_) {
183 std::unique_ptr<base::DictionaryValue> corpus_info_dict( 213 SnippetSourceProto* source_proto = result.add_sources();
184 new base::DictionaryValue); 214 source_proto->set_url(source.url.spec());
185
186 corpus_info_dict->SetString(kCorpusId, source.url.spec());
187 if (!source.amp_url.is_empty())
188 corpus_info_dict->SetString(kAmpUrl, source.amp_url.spec());
189 if (!source.publisher_name.empty()) 215 if (!source.publisher_name.empty())
190 corpus_info_dict->SetString( 216 source_proto->set_publisher_name(source.publisher_name);
191 base::StringPrintf("%s.%s", kPublisherData, kSiteTitle), 217 if (source.amp_url.is_valid())
192 source.publisher_name); 218 source_proto->set_amp_url(source.amp_url.spec());
193
194 corpus_infos_list->Append(std::move(corpus_info_dict));
195 } 219 }
196 220
197 dict->Set(kSourceCorpusInfo, std::move(corpus_infos_list));
198
199 std::unique_ptr<base::DictionaryValue> result(new base::DictionaryValue);
200 result->Set(kContentInfo, std::move(dict));
201
202 result->SetDouble(kScore, score_);
203
204 return result; 221 return result;
205 } 222 }
206 223
207 // static 224 // static
208 base::Time NTPSnippet::TimeFromJsonString(const std::string& timestamp_str) { 225 base::Time NTPSnippet::TimeFromJsonString(const std::string& timestamp_str) {
209 int64_t timestamp; 226 int64_t timestamp;
210 if (!base::StringToInt64(timestamp_str, &timestamp)) { 227 if (!base::StringToInt64(timestamp_str, &timestamp)) {
211 // Even if there's an error in the conversion, some garbage data may still 228 // Even if there's an error in the conversion, some garbage data may still
212 // be written to the output var, so reset it. 229 // be written to the output var, so reset it.
213 timestamp = 0; 230 timestamp = 0;
214 } 231 }
215 return base::Time::UnixEpoch() + base::TimeDelta::FromSeconds(timestamp); 232 return base::Time::UnixEpoch() + base::TimeDelta::FromSeconds(timestamp);
216 } 233 }
217 234
218 // static 235 // static
219 std::string NTPSnippet::TimeToJsonString(const base::Time& time) { 236 std::string NTPSnippet::TimeToJsonString(const base::Time& time) {
220 return base::Int64ToString((time - base::Time::UnixEpoch()).InSeconds()); 237 return base::Int64ToString((time - base::Time::UnixEpoch()).InSeconds());
221 } 238 }
222 239
240 void NTPSnippet::FindBestSource() {
241 // The same article can be hosted by multiple sources, e.g. nytimes.com,
242 // cnn.com, etc. We need to parse the list of sources for this article and
243 // find the best match. In order of preference:
244 // 1) A source that has URL, publisher name, AMP URL
245 // 2) A source that has URL, publisher name
246 // 3) A source that has URL and AMP URL, or URL only (since we won't show
247 // the snippet to users if the article does not have a publisher name, it
248 // doesn't matter whether the snippet has the AMP URL or not)
249 best_source_index_ = 0;
250 for (size_t i = 0; i < sources_.size(); ++i) {
251 const SnippetSource& source = sources_[i];
252 if (!source.publisher_name.empty()) {
253 best_source_index_ = i;
254 if (!source.amp_url.is_empty()) {
255 // This is the best possible source, stop looking.
256 break;
257 }
258 }
259 }
260 }
261
223 } // namespace ntp_snippets 262 } // namespace ntp_snippets
OLDNEW
« no previous file with comments | « components/ntp_snippets/ntp_snippet.h ('k') | components/ntp_snippets/ntp_snippets_constants.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698