Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 // Copyright 2015 The Chromium Authors. All rights reserved. | 1 // Copyright 2015 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "components/ntp_snippets/ntp_snippet.h" | 5 #include "components/ntp_snippets/ntp_snippet.h" |
| 6 | 6 |
| 7 #include "base/memory/scoped_ptr.h" | |
|
Marc Treib
2016/04/27 07:03:09
Not needed anymore (bad merge?)
May
2016/04/27 16:45:08
Done.
| |
| 7 #include "base/strings/string_number_conversions.h" | 8 #include "base/strings/string_number_conversions.h" |
| 9 #include "base/strings/stringprintf.h" | |
| 8 #include "base/values.h" | 10 #include "base/values.h" |
| 9 | 11 |
| 10 namespace { | 12 namespace { |
| 11 | 13 |
| 12 const char kUrl[] = "url"; | 14 const char kUrl[] = "url"; |
| 13 const char kSiteTitle[] = "site_title"; | 15 const char kSiteTitle[] = "sourceName"; |
|
Marc Treib
2016/04/27 07:03:09
nit: I'd move these two down to the other sourceco
May
2016/04/27 16:45:08
Done. Removed sourceLogoUrl since we don't use it.
| |
| 14 const char kTitle[] = "title"; | 16 const char kTitle[] = "title"; |
| 15 const char kFaviconUrl[] = "favicon_url"; | 17 const char kFaviconUrl[] = "sourceLogoUrl"; |
| 16 const char kSalientImageUrl[] = "thumbnailUrl"; | 18 const char kSalientImageUrl[] = "thumbnailUrl"; |
| 17 const char kSnippet[] = "snippet"; | 19 const char kSnippet[] = "snippet"; |
| 18 const char kPublishDate[] = "creationTimestampSec"; | 20 const char kPublishDate[] = "creationTimestampSec"; |
| 19 const char kExpiryDate[] = "expiryTimestampSec"; | 21 const char kExpiryDate[] = "expiryTimestampSec"; |
| 22 const char kPublisherData[] = "publisherData"; | |
| 23 const char kCorpusId[] = "corpusId"; | |
| 20 const char kSourceCorpusInfo[] = "sourceCorpusInfo"; | 24 const char kSourceCorpusInfo[] = "sourceCorpusInfo"; |
| 21 const char kAmpUrl[] = "ampUrl"; | 25 const char kAmpUrl[] = "ampUrl"; |
| 22 | 26 |
| 23 } // namespace | 27 } // namespace |
| 24 | 28 |
| 25 namespace ntp_snippets { | 29 namespace ntp_snippets { |
| 26 | 30 |
| 27 NTPSnippet::NTPSnippet(const GURL& url) : url_(url) { | 31 NTPSnippet::NTPSnippet(const GURL& url) : url_(url) { |
| 28 DCHECK(url_.is_valid()); | 32 DCHECK(url_.is_valid()); |
| 29 } | 33 } |
| 30 | 34 |
| 31 NTPSnippet::~NTPSnippet() {} | 35 NTPSnippet::~NTPSnippet() {} |
| 32 | 36 |
| 33 // static | 37 // static |
| 34 std::unique_ptr<NTPSnippet> NTPSnippet::CreateFromDictionary( | 38 std::unique_ptr<NTPSnippet> NTPSnippet::CreateFromDictionary( |
| 35 const base::DictionaryValue& dict) { | 39 const base::DictionaryValue& dict) { |
| 36 // Need at least the url. | 40 // Need at least the url. |
| 37 std::string url_str; | 41 std::string url_str; |
| 38 if (!dict.GetString("url", &url_str)) | 42 if (!dict.GetString("url", &url_str)) |
| 39 return nullptr; | 43 return nullptr; |
| 40 GURL url(url_str); | 44 GURL url(url_str); |
| 41 if (!url.is_valid()) | 45 if (!url.is_valid()) |
| 42 return nullptr; | 46 return nullptr; |
| 43 | 47 |
| 44 std::unique_ptr<NTPSnippet> snippet(new NTPSnippet(url)); | 48 std::unique_ptr<NTPSnippet> snippet(new NTPSnippet(url)); |
| 45 | 49 |
| 46 std::string site_title; | |
| 47 if (dict.GetString(kSiteTitle, &site_title)) | |
| 48 snippet->set_site_title(site_title); | |
| 49 std::string title; | 50 std::string title; |
| 50 if (dict.GetString(kTitle, &title)) | 51 if (dict.GetString(kTitle, &title)) |
| 51 snippet->set_title(title); | 52 snippet->set_title(title); |
| 52 std::string favicon_url; | |
| 53 if (dict.GetString(kFaviconUrl, &favicon_url)) | |
| 54 snippet->set_favicon_url(GURL(favicon_url)); | |
| 55 std::string salient_image_url; | 53 std::string salient_image_url; |
| 56 if (dict.GetString(kSalientImageUrl, &salient_image_url)) | 54 if (dict.GetString(kSalientImageUrl, &salient_image_url)) |
| 57 snippet->set_salient_image_url(GURL(salient_image_url)); | 55 snippet->set_salient_image_url(GURL(salient_image_url)); |
| 58 std::string snippet_str; | 56 std::string snippet_str; |
| 59 if (dict.GetString(kSnippet, &snippet_str)) | 57 if (dict.GetString(kSnippet, &snippet_str)) |
| 60 snippet->set_snippet(snippet_str); | 58 snippet->set_snippet(snippet_str); |
| 61 // The creation and expiry timestamps are uint64s which are stored as strings. | 59 // The creation and expiry timestamps are uint64s which are stored as strings. |
| 62 std::string creation_timestamp_str; | 60 std::string creation_timestamp_str; |
| 63 if (dict.GetString(kPublishDate, &creation_timestamp_str)) | 61 if (dict.GetString(kPublishDate, &creation_timestamp_str)) |
| 64 snippet->set_publish_date(TimeFromJsonString(creation_timestamp_str)); | 62 snippet->set_publish_date(TimeFromJsonString(creation_timestamp_str)); |
| 65 std::string expiry_timestamp_str; | 63 std::string expiry_timestamp_str; |
| 66 if (dict.GetString(kExpiryDate, &expiry_timestamp_str)) | 64 if (dict.GetString(kExpiryDate, &expiry_timestamp_str)) |
| 67 snippet->set_expiry_date(TimeFromJsonString(expiry_timestamp_str)); | 65 snippet->set_expiry_date(TimeFromJsonString(expiry_timestamp_str)); |
| 68 | 66 |
| 69 const base::ListValue* corpus_infos_list = nullptr; | 67 const base::ListValue* corpus_infos_list = nullptr; |
| 70 if (dict.GetList(kSourceCorpusInfo, &corpus_infos_list)) { | 68 if (!dict.GetList(kSourceCorpusInfo, &corpus_infos_list)) { |
| 69 DLOG(WARNING) << "No sources found for article " << url_str; | |
|
Bernhard Bauer
2016/04/27 13:22:05
You could `return snippet` here to avoid having to
May
2016/04/27 16:45:08
Done.
| |
| 70 } else { | |
| 71 for (base::Value* value : *corpus_infos_list) { | 71 for (base::Value* value : *corpus_infos_list) { |
| 72 const base::DictionaryValue* dict_value = nullptr; | 72 const base::DictionaryValue* dict_value = nullptr; |
| 73 if (value->GetAsDictionary(&dict_value)) { | 73 if (value->GetAsDictionary(&dict_value)) { |
|
Marc Treib
2016/04/27 07:03:09
nit: "if (!Get...) continue;", to remove one level
May
2016/04/27 16:45:08
Done.
| |
| 74 std::string amp_url; | 74 std::string corpus_id_str; |
| 75 if (dict_value->GetString(kAmpUrl, &_url)) { | 75 GURL corpus_id; |
| 76 snippet->set_amp_url(GURL(amp_url)); | 76 if (dict_value->GetString(kCorpusId, &corpus_id_str)) { |
|
Marc Treib
2016/04/27 07:03:09
You probably also want to handle the "else" here,
May
2016/04/27 16:45:08
Done.
| |
| 77 break; | 77 corpus_id = GURL(corpus_id_str); |
| 78 if (!corpus_id.is_valid()) { | |
| 79 // We must at least have a valid source URL | |
| 80 DLOG(WARNING) << "Invalid article url " << corpus_id.spec(); | |
|
Marc Treib
2016/04/27 07:03:09
I think if a GURL is invalid, then it's spec will
May
2016/04/27 16:45:08
Eep, good catch. Done.
| |
| 81 continue; | |
| 82 } | |
| 83 } | |
| 84 | |
| 85 const base::DictionaryValue* publisher_data = nullptr; | |
| 86 std::string site_title; | |
| 87 if (dict_value->GetDictionary(kPublisherData, &publisher_data)) { | |
|
Marc Treib
2016/04/27 07:03:09
Also here: Do we want to warn also if the publishe
May
2016/04/27 16:45:08
Done.
| |
| 88 if (!publisher_data->GetString(kSiteTitle, &site_title)) { | |
| 89 // It's possible but not desirable to have no publisher data | |
| 90 DLOG(WARNING) << "No publisher data for article " | |
| 91 << corpus_id.spec(); | |
|
Marc Treib
2016/04/27 07:03:09
nit: misaligned
May
2016/04/27 16:45:08
Done.
| |
| 92 } | |
| 93 } | |
| 94 | |
| 95 std::string amp_url_str; | |
| 96 GURL amp_url; | |
| 97 // Expected to not have AMP url sometimes | |
| 98 if (dict_value->GetString(kAmpUrl, &_url_str)) { | |
| 99 amp_url = GURL(amp_url_str); | |
| 100 if (!amp_url.is_valid()) | |
| 101 DLOG(WARNING) << "Invalid AMP url " << amp_url.spec(); | |
|
Marc Treib
2016/04/27 07:03:09
nit: DLOG_IF
May
2016/04/27 16:45:08
Done.
| |
| 102 } | |
| 103 snippet->sources_.push_back(SnippetSource( | |
| 104 corpus_id, site_title, amp_url.is_valid() ? amp_url : GURL())); | |
| 105 } | |
| 106 } | |
| 107 // The previous url we have saved can be one of several sources for the | |
| 108 // article. For example, the same article can be hosted by nytimes.com, | |
| 109 // cnn.com, etc. We need to parse the list of sources for this article and | |
| 110 // find the best match. In order of preference: | |
| 111 // 1) A source that has url, publisher name, AMP url | |
| 112 // 2) A source that has url, publisher name | |
| 113 // 3) A source that has url and AMP url, or url only | |
|
Marc Treib
2016/04/27 07:03:09
3) ...so, just URL (which is required anyway), and
May
2016/04/27 16:45:08
yeah, clarified a bit in the comments.
| |
| 114 if (snippet->sources_.size() > 0) { | |
|
Marc Treib
2016/04/27 07:03:09
nit: !.empty()
May
2016/04/27 16:45:08
Done.
| |
| 115 SnippetSource best_source_found = snippet->sources_[0]; | |
| 116 for (size_t i = 1; i < snippet->sources_.size(); ++i) { | |
| 117 SnippetSource current_source = snippet->sources_[i]; | |
|
Marc Treib
2016/04/27 07:03:09
const SnippetSource& ?
May
2016/04/27 16:45:08
Done.
| |
| 118 if (!best_source_found.publisher_name.empty()) { | |
|
Bernhard Bauer
2016/04/27 13:22:05
I feel like this would be best done with a compari
May
2016/04/27 16:45:08
Pshaw, you kids with your fancy al-gow-rhythms. Gi
| |
| 119 if (!best_source_found.amp_url.is_empty()) { | |
| 120 // We already have the best source | |
| 121 break; | |
| 122 } | |
| 123 if (!current_source.publisher_name.empty() && | |
| 124 !current_source.amp_url.is_empty()) { | |
| 125 best_source_found = current_source; | |
|
Marc Treib
2016/04/27 07:03:09
Here we can break directly, no?
May
2016/04/27 16:45:08
Done.
| |
| 126 } | |
| 127 } else { | |
| 128 if (!current_source.publisher_name.empty()) | |
| 129 best_source_found = current_source; | |
| 78 } | 130 } |
| 79 } | 131 } |
| 132 | |
| 133 // The url from source_info is a url for a site that is one of the | |
| 134 // HOST_RESTRICT parameters, so if we have this, we need to replace the | |
| 135 // previously saved url with this one as this is hosted on a site that the | |
| 136 // user actually visits. | |
| 137 snippet->set_url(best_source_found.url); | |
| 138 snippet->set_site_title(best_source_found.publisher_name); | |
| 139 snippet->set_amp_url(best_source_found.amp_url); | |
| 80 } | 140 } |
| 81 } | 141 } |
| 82 | 142 |
| 83 return snippet; | 143 return snippet; |
| 84 } | 144 } |
| 85 | 145 |
| 86 std::unique_ptr<base::DictionaryValue> NTPSnippet::ToDictionary() const { | 146 std::unique_ptr<base::DictionaryValue> NTPSnippet::ToDictionary() const { |
| 87 std::unique_ptr<base::DictionaryValue> dict(new base::DictionaryValue); | 147 std::unique_ptr<base::DictionaryValue> dict(new base::DictionaryValue); |
| 88 | 148 |
| 89 dict->SetString(kUrl, url_.spec()); | 149 dict->SetString(kUrl, url_.spec()); |
|
Marc Treib
2016/04/27 07:03:09
Hm. So the URL is now effectively stored twice, bu
May
2016/04/27 16:45:08
yeah, mainly because the original url that we get
| |
| 90 if (!site_title_.empty()) | 150 if (!site_title_.empty()) |
| 91 dict->SetString(kSiteTitle, site_title_); | 151 dict->SetString(kSiteTitle, site_title_); |
| 92 if (!title_.empty()) | 152 if (!title_.empty()) |
| 93 dict->SetString(kTitle, title_); | 153 dict->SetString(kTitle, title_); |
| 94 if (favicon_url_.is_valid()) | 154 if (favicon_url_.is_valid()) |
| 95 dict->SetString(kFaviconUrl, favicon_url_.spec()); | 155 dict->SetString(kFaviconUrl, favicon_url_.spec()); |
| 96 if (salient_image_url_.is_valid()) | 156 if (salient_image_url_.is_valid()) |
| 97 dict->SetString(kSalientImageUrl, salient_image_url_.spec()); | 157 dict->SetString(kSalientImageUrl, salient_image_url_.spec()); |
| 98 if (!snippet_.empty()) | 158 if (!snippet_.empty()) |
| 99 dict->SetString(kSnippet, snippet_); | 159 dict->SetString(kSnippet, snippet_); |
| 100 if (!publish_date_.is_null()) | 160 if (!publish_date_.is_null()) |
| 101 dict->SetString(kPublishDate, TimeToJsonString(publish_date_)); | 161 dict->SetString(kPublishDate, TimeToJsonString(publish_date_)); |
| 102 if (!expiry_date_.is_null()) | 162 if (!expiry_date_.is_null()) |
| 103 dict->SetString(kExpiryDate, TimeToJsonString(expiry_date_)); | 163 dict->SetString(kExpiryDate, TimeToJsonString(expiry_date_)); |
| 104 if (amp_url_.is_valid()) { | 164 |
| 105 std::unique_ptr<base::ListValue> corpus_infos_list(new base::ListValue); | 165 std::unique_ptr<base::ListValue> corpus_infos_list(new base::ListValue); |
| 166 for (const SnippetSource source : sources_) { | |
| 106 std::unique_ptr<base::DictionaryValue> corpus_info_dict( | 167 std::unique_ptr<base::DictionaryValue> corpus_info_dict( |
| 107 new base::DictionaryValue); | 168 new base::DictionaryValue); |
| 108 corpus_info_dict->SetString(kAmpUrl, amp_url_.spec()); | 169 |
| 109 corpus_infos_list->Set(0, std::move(corpus_info_dict)); | 170 corpus_info_dict->SetString(kCorpusId, source.url.spec()); |
| 110 dict->Set(kSourceCorpusInfo, std::move(corpus_infos_list)); | 171 if (!source.amp_url.is_empty()) |
| 172 corpus_info_dict->SetString(kAmpUrl, source.amp_url.spec()); | |
| 173 if (!source.publisher_name.empty()) | |
| 174 corpus_info_dict->SetString( | |
| 175 base::StringPrintf("%s.%s", kPublisherData, kSiteTitle), | |
| 176 source.publisher_name); | |
| 177 | |
| 178 corpus_infos_list->Append(std::move(corpus_info_dict)); | |
| 111 } | 179 } |
| 180 | |
| 181 dict->Set(kSourceCorpusInfo, std::move(corpus_infos_list)); | |
| 182 | |
| 112 return dict; | 183 return dict; |
| 113 } | 184 } |
| 114 | 185 |
| 115 // static | 186 // static |
| 116 base::Time NTPSnippet::TimeFromJsonString(const std::string& timestamp_str) { | 187 base::Time NTPSnippet::TimeFromJsonString(const std::string& timestamp_str) { |
| 117 int64_t timestamp; | 188 int64_t timestamp; |
| 118 if (!base::StringToInt64(timestamp_str, ×tamp)) { | 189 if (!base::StringToInt64(timestamp_str, ×tamp)) { |
| 119 // Even if there's an error in the conversion, some garbage data may still | 190 // Even if there's an error in the conversion, some garbage data may still |
| 120 // be written to the output var, so reset it. | 191 // be written to the output var, so reset it. |
| 121 timestamp = 0; | 192 timestamp = 0; |
| 122 } | 193 } |
| 123 return base::Time::UnixEpoch() + base::TimeDelta::FromSeconds(timestamp); | 194 return base::Time::UnixEpoch() + base::TimeDelta::FromSeconds(timestamp); |
| 124 } | 195 } |
| 125 | 196 |
| 126 // static | 197 // static |
| 127 std::string NTPSnippet::TimeToJsonString(const base::Time& time) { | 198 std::string NTPSnippet::TimeToJsonString(const base::Time& time) { |
| 128 return base::Int64ToString((time - base::Time::UnixEpoch()).InSeconds()); | 199 return base::Int64ToString((time - base::Time::UnixEpoch()).InSeconds()); |
| 129 } | 200 } |
| 130 | 201 |
| 131 } // namespace ntp_snippets | 202 } // namespace ntp_snippets |
| OLD | NEW |