| OLD | NEW |
| 1 // Copyright 2015 The Chromium Authors. All rights reserved. | 1 // Copyright 2015 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "components/ntp_snippets/ntp_snippet.h" | 5 #include "components/ntp_snippets/ntp_snippet.h" |
| 6 | 6 |
| 7 #include "base/strings/string_number_conversions.h" | 7 #include "base/strings/string_number_conversions.h" |
| 8 #include "base/strings/stringprintf.h" | 8 #include "base/strings/stringprintf.h" |
| 9 #include "base/values.h" | 9 #include "base/values.h" |
| 10 #include "components/ntp_snippets/proto/ntp_snippets.pb.h" |
| 10 | 11 |
| 11 namespace { | 12 namespace { |
| 12 | 13 |
| 13 const char kScore[] = "score"; | 14 const char kScore[] = "score"; |
| 14 const char kContentInfo[] = "contentInfo"; | 15 const char kContentInfo[] = "contentInfo"; |
| 15 | 16 |
| 16 const char kId[] = "url"; | 17 const char kId[] = "url"; |
| 17 const char kTitle[] = "title"; | 18 const char kTitle[] = "title"; |
| 18 const char kSalientImageUrl[] = "thumbnailUrl"; | 19 const char kSalientImageUrl[] = "thumbnailUrl"; |
| 19 const char kSnippet[] = "snippet"; | 20 const char kSnippet[] = "snippet"; |
| 20 const char kPublishDate[] = "creationTimestampSec"; | 21 const char kPublishDate[] = "creationTimestampSec"; |
| 21 const char kExpiryDate[] = "expiryTimestampSec"; | 22 const char kExpiryDate[] = "expiryTimestampSec"; |
| 22 const char kSiteTitle[] = "sourceName"; | 23 const char kSiteTitle[] = "sourceName"; |
| 23 const char kPublisherData[] = "publisherData"; | 24 const char kPublisherData[] = "publisherData"; |
| 24 const char kCorpusId[] = "corpusId"; | 25 const char kCorpusId[] = "corpusId"; |
| 25 const char kSourceCorpusInfo[] = "sourceCorpusInfo"; | 26 const char kSourceCorpusInfo[] = "sourceCorpusInfo"; |
| 26 const char kAmpUrl[] = "ampUrl"; | 27 const char kAmpUrl[] = "ampUrl"; |
| 27 | 28 |
| 28 } // namespace | 29 } // namespace |
| 29 | 30 |
| 30 namespace ntp_snippets { | 31 namespace ntp_snippets { |
| 31 | 32 |
| 32 NTPSnippet::NTPSnippet(const std::string& id) | 33 NTPSnippet::NTPSnippet(const std::string& id) |
| 33 : id_(id), score_(0), best_source_index_(0) {} | 34 : id_(id), score_(0), is_discarded_(false), best_source_index_(0) {} |
| 34 | 35 |
| 35 NTPSnippet::~NTPSnippet() {} | 36 NTPSnippet::~NTPSnippet() {} |
| 36 | 37 |
| 37 // static | 38 // static |
| 38 std::unique_ptr<NTPSnippet> NTPSnippet::CreateFromDictionary( | 39 std::unique_ptr<NTPSnippet> NTPSnippet::CreateFromDictionary( |
| 39 const base::DictionaryValue& dict) { | 40 const base::DictionaryValue& dict) { |
| 40 const base::DictionaryValue* content = nullptr; | 41 const base::DictionaryValue* content = nullptr; |
| 41 if (!dict.GetDictionary(kContentInfo, &content)) | 42 if (!dict.GetDictionary(kContentInfo, &content)) |
| 42 return nullptr; | 43 return nullptr; |
| 43 | 44 |
| 44 // Need at least the url. | 45 // Need at least the id. |
| 45 std::string id; | 46 std::string id; |
| 46 if (!content->GetString(kId, &id)) | 47 if (!content->GetString(kId, &id) || id.empty()) |
| 47 return nullptr; | 48 return nullptr; |
| 48 | 49 |
| 49 std::unique_ptr<NTPSnippet> snippet(new NTPSnippet(id)); | 50 std::unique_ptr<NTPSnippet> snippet(new NTPSnippet(id)); |
| 50 | 51 |
| 51 std::string title; | 52 std::string title; |
| 52 if (content->GetString(kTitle, &title)) | 53 if (content->GetString(kTitle, &title)) |
| 53 snippet->set_title(title); | 54 snippet->set_title(title); |
| 54 std::string salient_image_url; | 55 std::string salient_image_url; |
| 55 if (content->GetString(kSalientImageUrl, &salient_image_url)) | 56 if (content->GetString(kSalientImageUrl, &salient_image_url)) |
| 56 snippet->set_salient_image_url(GURL(salient_image_url)); | 57 snippet->set_salient_image_url(GURL(salient_image_url)); |
| (...skipping 48 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 105 // Expected to not have AMP url sometimes. | 106 // Expected to not have AMP url sometimes. |
| 106 if (dict_value->GetString(kAmpUrl, &_url_str)) { | 107 if (dict_value->GetString(kAmpUrl, &_url_str)) { |
| 107 amp_url = GURL(amp_url_str); | 108 amp_url = GURL(amp_url_str); |
| 108 DLOG_IF(WARNING, !amp_url.is_valid()) << "Invalid AMP url " | 109 DLOG_IF(WARNING, !amp_url.is_valid()) << "Invalid AMP url " |
| 109 << amp_url_str; | 110 << amp_url_str; |
| 110 } | 111 } |
| 111 SnippetSource source(corpus_id, site_title, | 112 SnippetSource source(corpus_id, site_title, |
| 112 amp_url.is_valid() ? amp_url : GURL()); | 113 amp_url.is_valid() ? amp_url : GURL()); |
| 113 snippet->add_source(source); | 114 snippet->add_source(source); |
| 114 } | 115 } |
| 115 // The previous url we have saved can be one of several sources for the | |
| 116 // article. For example, the same article can be hosted by nytimes.com, | |
| 117 // cnn.com, etc. We need to parse the list of sources for this article and | |
| 118 // find the best match. In order of preference: | |
| 119 // 1) A source that has url, publisher name, AMP url | |
| 120 // 2) A source that has url, publisher name | |
| 121 // 3) A source that has url and AMP url, or url only (since we won't show | |
| 122 // the snippet to users if the article does not have a publisher name, it | |
| 123 // doesn't matter whether the snippet has the AMP url or not) | |
| 124 size_t best_source_index = 0; | |
| 125 for (size_t i = 0; i < snippet->sources_.size(); ++i) { | |
| 126 const SnippetSource& source = snippet->sources_[i]; | |
| 127 if (!source.publisher_name.empty()) { | |
| 128 best_source_index = i; | |
| 129 if (!source.amp_url.is_empty()) { | |
| 130 // This is the best possible source, stop looking. | |
| 131 break; | |
| 132 } | |
| 133 } | |
| 134 } | |
| 135 snippet->set_source_index(best_source_index); | |
| 136 | 116 |
| 137 if (snippet->sources_.empty()) { | 117 if (snippet->sources_.empty()) { |
| 138 DLOG(WARNING) << "No sources found for article " << id; | 118 DLOG(WARNING) << "No sources found for article " << id; |
| 139 return nullptr; | 119 return nullptr; |
| 140 } | 120 } |
| 141 | 121 |
| 122 snippet->FindBestSource(); |
| 123 |
| 142 double score; | 124 double score; |
| 143 if (dict.GetDouble(kScore, &score)) | 125 if (dict.GetDouble(kScore, &score)) |
| 144 snippet->set_score(score); | 126 snippet->set_score(score); |
| 145 | 127 |
| 146 return snippet; | 128 return snippet; |
| 147 } | 129 } |
| 148 | 130 |
| 149 // static | 131 // static |
| 132 std::unique_ptr<NTPSnippet> NTPSnippet::CreateFromProto( |
| 133 const SnippetProto& proto) { |
| 134 // Need at least the id. |
| 135 if (!proto.has_id() || proto.id().empty()) |
| 136 return nullptr; |
| 137 |
| 138 std::unique_ptr<NTPSnippet> snippet(new NTPSnippet(proto.id())); |
| 139 |
| 140 snippet->set_title(proto.title()); |
| 141 snippet->set_snippet(proto.snippet()); |
| 142 snippet->set_salient_image_url(GURL(proto.salient_image_url())); |
| 143 snippet->set_publish_date( |
| 144 base::Time::FromInternalValue(proto.publish_date())); |
| 145 snippet->set_expiry_date(base::Time::FromInternalValue(proto.expiry_date())); |
| 146 snippet->set_score(proto.score()); |
| 147 snippet->set_discarded(proto.discarded()); |
| 148 |
| 149 for (int i = 0; i < proto.sources_size(); ++i) { |
| 150 const SnippetSourceProto& source_proto = proto.sources(i); |
| 151 GURL url(source_proto.url()); |
| 152 if (!url.is_valid()) { |
| 153 // We must at least have a valid source URL. |
| 154 DLOG(WARNING) << "Invalid article url " << source_proto.url(); |
| 155 continue; |
| 156 } |
| 157 std::string publisher_name = source_proto.publisher_name(); |
| 158 GURL amp_url; |
| 159 if (source_proto.has_amp_url()) { |
| 160 amp_url = GURL(source_proto.amp_url()); |
| 161 DLOG_IF(WARNING, !amp_url.is_valid()) << "Invalid AMP URL " |
| 162 << source_proto.amp_url(); |
| 163 } |
| 164 |
| 165 snippet->add_source(SnippetSource(url, publisher_name, amp_url)); |
| 166 } |
| 167 |
| 168 if (snippet->sources_.empty()) { |
| 169 DLOG(WARNING) << "No sources found for article " << snippet->id(); |
| 170 return nullptr; |
| 171 } |
| 172 |
| 173 snippet->FindBestSource(); |
| 174 |
| 175 return snippet; |
| 176 } |
| 177 |
| 178 // static |
| 150 bool NTPSnippet::AddFromListValue(const base::ListValue& list, | 179 bool NTPSnippet::AddFromListValue(const base::ListValue& list, |
| 151 PtrVector* snippets) { | 180 PtrVector* snippets) { |
| 152 for (const auto& value : list) { | 181 for (const auto& value : list) { |
| 153 const base::DictionaryValue* dict = nullptr; | 182 const base::DictionaryValue* dict = nullptr; |
| 154 if (!value->GetAsDictionary(&dict)) | 183 if (!value->GetAsDictionary(&dict)) |
| 155 return false; | 184 return false; |
| 156 | 185 |
| 157 std::unique_ptr<NTPSnippet> snippet = CreateFromDictionary(*dict); | 186 std::unique_ptr<NTPSnippet> snippet = CreateFromDictionary(*dict); |
| 158 if (!snippet) | 187 if (!snippet) |
| 159 return false; | 188 return false; |
| 160 | 189 |
| 161 snippets->push_back(std::move(snippet)); | 190 snippets->push_back(std::move(snippet)); |
| 162 } | 191 } |
| 163 return true; | 192 return true; |
| 164 } | 193 } |
| 165 | 194 |
| 166 std::unique_ptr<base::DictionaryValue> NTPSnippet::ToDictionary() const { | 195 SnippetProto NTPSnippet::ToProto() const { |
| 167 std::unique_ptr<base::DictionaryValue> dict(new base::DictionaryValue); | 196 SnippetProto result; |
| 168 | 197 |
| 169 dict->SetString(kId, id_); | 198 result.set_id(id_); |
| 170 if (!title_.empty()) | 199 if (!title_.empty()) |
| 171 dict->SetString(kTitle, title_); | 200 result.set_title(title_); |
| 201 if (!snippet_.empty()) |
| 202 result.set_snippet(snippet_); |
| 172 if (salient_image_url_.is_valid()) | 203 if (salient_image_url_.is_valid()) |
| 173 dict->SetString(kSalientImageUrl, salient_image_url_.spec()); | 204 result.set_salient_image_url(salient_image_url_.spec()); |
| 174 if (!snippet_.empty()) | |
| 175 dict->SetString(kSnippet, snippet_); | |
| 176 if (!publish_date_.is_null()) | 205 if (!publish_date_.is_null()) |
| 177 dict->SetString(kPublishDate, TimeToJsonString(publish_date_)); | 206 result.set_publish_date(publish_date_.ToInternalValue()); |
| 178 if (!expiry_date_.is_null()) | 207 if (!expiry_date_.is_null()) |
| 179 dict->SetString(kExpiryDate, TimeToJsonString(expiry_date_)); | 208 result.set_expiry_date(expiry_date_.ToInternalValue()); |
| 209 result.set_score(score_); |
| 210 result.set_discarded(is_discarded_); |
| 180 | 211 |
| 181 std::unique_ptr<base::ListValue> corpus_infos_list(new base::ListValue); | |
| 182 for (const SnippetSource& source : sources_) { | 212 for (const SnippetSource& source : sources_) { |
| 183 std::unique_ptr<base::DictionaryValue> corpus_info_dict( | 213 SnippetSourceProto* source_proto = result.add_sources(); |
| 184 new base::DictionaryValue); | 214 source_proto->set_url(source.url.spec()); |
| 185 | |
| 186 corpus_info_dict->SetString(kCorpusId, source.url.spec()); | |
| 187 if (!source.amp_url.is_empty()) | |
| 188 corpus_info_dict->SetString(kAmpUrl, source.amp_url.spec()); | |
| 189 if (!source.publisher_name.empty()) | 215 if (!source.publisher_name.empty()) |
| 190 corpus_info_dict->SetString( | 216 source_proto->set_publisher_name(source.publisher_name); |
| 191 base::StringPrintf("%s.%s", kPublisherData, kSiteTitle), | 217 if (source.amp_url.is_valid()) |
| 192 source.publisher_name); | 218 source_proto->set_amp_url(source.amp_url.spec()); |
| 193 | |
| 194 corpus_infos_list->Append(std::move(corpus_info_dict)); | |
| 195 } | 219 } |
| 196 | 220 |
| 197 dict->Set(kSourceCorpusInfo, std::move(corpus_infos_list)); | |
| 198 | |
| 199 std::unique_ptr<base::DictionaryValue> result(new base::DictionaryValue); | |
| 200 result->Set(kContentInfo, std::move(dict)); | |
| 201 | |
| 202 result->SetDouble(kScore, score_); | |
| 203 | |
| 204 return result; | 221 return result; |
| 205 } | 222 } |
| 206 | 223 |
| 207 // static | 224 // static |
| 208 base::Time NTPSnippet::TimeFromJsonString(const std::string& timestamp_str) { | 225 base::Time NTPSnippet::TimeFromJsonString(const std::string& timestamp_str) { |
| 209 int64_t timestamp; | 226 int64_t timestamp; |
| 210 if (!base::StringToInt64(timestamp_str, ×tamp)) { | 227 if (!base::StringToInt64(timestamp_str, ×tamp)) { |
| 211 // Even if there's an error in the conversion, some garbage data may still | 228 // Even if there's an error in the conversion, some garbage data may still |
| 212 // be written to the output var, so reset it. | 229 // be written to the output var, so reset it. |
| 213 timestamp = 0; | 230 timestamp = 0; |
| 214 } | 231 } |
| 215 return base::Time::UnixEpoch() + base::TimeDelta::FromSeconds(timestamp); | 232 return base::Time::UnixEpoch() + base::TimeDelta::FromSeconds(timestamp); |
| 216 } | 233 } |
| 217 | 234 |
| 218 // static | 235 // static |
| 219 std::string NTPSnippet::TimeToJsonString(const base::Time& time) { | 236 std::string NTPSnippet::TimeToJsonString(const base::Time& time) { |
| 220 return base::Int64ToString((time - base::Time::UnixEpoch()).InSeconds()); | 237 return base::Int64ToString((time - base::Time::UnixEpoch()).InSeconds()); |
| 221 } | 238 } |
| 222 | 239 |
| 240 void NTPSnippet::FindBestSource() { |
| 241 // The same article can be hosted by multiple sources, e.g. nytimes.com, |
| 242 // cnn.com, etc. We need to parse the list of sources for this article and |
| 243 // find the best match. In order of preference: |
| 244 // 1) A source that has URL, publisher name, AMP URL |
| 245 // 2) A source that has URL, publisher name |
| 246 // 3) A source that has URL and AMP URL, or URL only (since we won't show |
| 247 // the snippet to users if the article does not have a publisher name, it |
| 248 // doesn't matter whether the snippet has the AMP URL or not) |
| 249 best_source_index_ = 0; |
| 250 for (size_t i = 0; i < sources_.size(); ++i) { |
| 251 const SnippetSource& source = sources_[i]; |
| 252 if (!source.publisher_name.empty()) { |
| 253 best_source_index_ = i; |
| 254 if (!source.amp_url.is_empty()) { |
| 255 // This is the best possible source, stop looking. |
| 256 break; |
| 257 } |
| 258 } |
| 259 } |
| 260 } |
| 261 |
| 223 } // namespace ntp_snippets | 262 } // namespace ntp_snippets |
| OLD | NEW |