OLD | NEW |
1 // Copyright 2015 The Chromium Authors. All rights reserved. | 1 // Copyright 2015 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "components/ntp_snippets/ntp_snippet.h" | 5 #include "components/ntp_snippets/ntp_snippet.h" |
6 | 6 |
7 #include "base/strings/string_number_conversions.h" | 7 #include "base/strings/string_number_conversions.h" |
| 8 #include "base/strings/stringprintf.h" |
8 #include "base/values.h" | 9 #include "base/values.h" |
9 | 10 |
10 namespace { | 11 namespace { |
11 | 12 |
12 const char kUrl[] = "url"; | 13 const char kUrl[] = "url"; |
13 const char kSiteTitle[] = "site_title"; | |
14 const char kTitle[] = "title"; | 14 const char kTitle[] = "title"; |
15 const char kFaviconUrl[] = "favicon_url"; | |
16 const char kSalientImageUrl[] = "thumbnailUrl"; | 15 const char kSalientImageUrl[] = "thumbnailUrl"; |
17 const char kSnippet[] = "snippet"; | 16 const char kSnippet[] = "snippet"; |
18 const char kPublishDate[] = "creationTimestampSec"; | 17 const char kPublishDate[] = "creationTimestampSec"; |
19 const char kExpiryDate[] = "expiryTimestampSec"; | 18 const char kExpiryDate[] = "expiryTimestampSec"; |
| 19 const char kSiteTitle[] = "sourceName"; |
| 20 const char kPublisherData[] = "publisherData"; |
| 21 const char kCorpusId[] = "corpusId"; |
20 const char kSourceCorpusInfo[] = "sourceCorpusInfo"; | 22 const char kSourceCorpusInfo[] = "sourceCorpusInfo"; |
21 const char kAmpUrl[] = "ampUrl"; | 23 const char kAmpUrl[] = "ampUrl"; |
22 | 24 |
23 } // namespace | 25 } // namespace |
24 | 26 |
25 namespace ntp_snippets { | 27 namespace ntp_snippets { |
26 | 28 |
27 NTPSnippet::NTPSnippet(const GURL& url) : url_(url) { | 29 NTPSnippet::NTPSnippet(const GURL& url) : url_(url), best_source_index_(0) { |
28 DCHECK(url_.is_valid()); | 30 DCHECK(url_.is_valid()); |
29 } | 31 } |
30 | 32 |
31 NTPSnippet::~NTPSnippet() {} | 33 NTPSnippet::~NTPSnippet() {} |
32 | 34 |
33 // static | 35 // static |
34 std::unique_ptr<NTPSnippet> NTPSnippet::CreateFromDictionary( | 36 std::unique_ptr<NTPSnippet> NTPSnippet::CreateFromDictionary( |
35 const base::DictionaryValue& dict) { | 37 const base::DictionaryValue& dict) { |
36 // Need at least the url. | 38 // Need at least the url. |
37 std::string url_str; | 39 std::string url_str; |
38 if (!dict.GetString("url", &url_str)) | 40 if (!dict.GetString("url", &url_str)) |
39 return nullptr; | 41 return nullptr; |
40 GURL url(url_str); | 42 GURL url(url_str); |
41 if (!url.is_valid()) | 43 if (!url.is_valid()) |
42 return nullptr; | 44 return nullptr; |
43 | 45 |
44 std::unique_ptr<NTPSnippet> snippet(new NTPSnippet(url)); | 46 std::unique_ptr<NTPSnippet> snippet(new NTPSnippet(url)); |
45 | 47 |
46 std::string site_title; | |
47 if (dict.GetString(kSiteTitle, &site_title)) | |
48 snippet->set_site_title(site_title); | |
49 std::string title; | 48 std::string title; |
50 if (dict.GetString(kTitle, &title)) | 49 if (dict.GetString(kTitle, &title)) |
51 snippet->set_title(title); | 50 snippet->set_title(title); |
52 std::string favicon_url; | |
53 if (dict.GetString(kFaviconUrl, &favicon_url)) | |
54 snippet->set_favicon_url(GURL(favicon_url)); | |
55 std::string salient_image_url; | 51 std::string salient_image_url; |
56 if (dict.GetString(kSalientImageUrl, &salient_image_url)) | 52 if (dict.GetString(kSalientImageUrl, &salient_image_url)) |
57 snippet->set_salient_image_url(GURL(salient_image_url)); | 53 snippet->set_salient_image_url(GURL(salient_image_url)); |
58 std::string snippet_str; | 54 std::string snippet_str; |
59 if (dict.GetString(kSnippet, &snippet_str)) | 55 if (dict.GetString(kSnippet, &snippet_str)) |
60 snippet->set_snippet(snippet_str); | 56 snippet->set_snippet(snippet_str); |
61 // The creation and expiry timestamps are uint64s which are stored as strings. | 57 // The creation and expiry timestamps are uint64s which are stored as strings. |
62 std::string creation_timestamp_str; | 58 std::string creation_timestamp_str; |
63 if (dict.GetString(kPublishDate, &creation_timestamp_str)) | 59 if (dict.GetString(kPublishDate, &creation_timestamp_str)) |
64 snippet->set_publish_date(TimeFromJsonString(creation_timestamp_str)); | 60 snippet->set_publish_date(TimeFromJsonString(creation_timestamp_str)); |
65 std::string expiry_timestamp_str; | 61 std::string expiry_timestamp_str; |
66 if (dict.GetString(kExpiryDate, &expiry_timestamp_str)) | 62 if (dict.GetString(kExpiryDate, &expiry_timestamp_str)) |
67 snippet->set_expiry_date(TimeFromJsonString(expiry_timestamp_str)); | 63 snippet->set_expiry_date(TimeFromJsonString(expiry_timestamp_str)); |
68 | 64 |
69 const base::ListValue* corpus_infos_list = nullptr; | 65 const base::ListValue* corpus_infos_list = nullptr; |
70 if (dict.GetList(kSourceCorpusInfo, &corpus_infos_list)) { | 66 if (!dict.GetList(kSourceCorpusInfo, &corpus_infos_list)) { |
71 for (base::Value* value : *corpus_infos_list) { | 67 DLOG(WARNING) << "No sources found for article " << title; |
72 const base::DictionaryValue* dict_value = nullptr; | 68 return nullptr; |
73 if (value->GetAsDictionary(&dict_value)) { | 69 } |
74 std::string amp_url; | 70 |
75 if (dict_value->GetString(kAmpUrl, &_url)) { | 71 for (base::Value* value : *corpus_infos_list) { |
76 snippet->set_amp_url(GURL(amp_url)); | 72 const base::DictionaryValue* dict_value = nullptr; |
77 break; | 73 if (!value->GetAsDictionary(&dict_value)) { |
78 } | 74 DLOG(WARNING) << "Invalid source info for article " << url_str; |
| 75 continue; |
| 76 } |
| 77 |
| 78 std::string corpus_id_str; |
| 79 GURL corpus_id; |
| 80 if (dict_value->GetString(kCorpusId, &corpus_id_str)) { |
| 81 corpus_id = GURL(corpus_id_str); |
| 82 } |
| 83 |
| 84 if (!corpus_id.is_valid()) { |
| 85 // We must at least have a valid source URL |
| 86 DLOG(WARNING) << "Invalid article url " << corpus_id_str; |
| 87 continue; |
| 88 } |
| 89 |
| 90 const base::DictionaryValue* publisher_data = nullptr; |
| 91 std::string site_title; |
| 92 if (dict_value->GetDictionary(kPublisherData, &publisher_data)) { |
| 93 if (!publisher_data->GetString(kSiteTitle, &site_title)) { |
| 94 // It's possible but not desirable to have no publisher data |
| 95 DLOG(WARNING) << "No publisher name for article " << corpus_id.spec(); |
| 96 } |
| 97 } else { |
| 98 DLOG(WARNING) << "No publisher data for article " << corpus_id.spec(); |
| 99 } |
| 100 |
| 101 std::string amp_url_str; |
| 102 GURL amp_url; |
| 103 // Expected to not have AMP url sometimes |
| 104 if (dict_value->GetString(kAmpUrl, &_url_str)) { |
| 105 amp_url = GURL(amp_url_str); |
| 106 DLOG_IF(WARNING, !amp_url.is_valid()) << "Invalid AMP url " |
| 107 << amp_url_str; |
| 108 } |
| 109 SnippetSource source(corpus_id, site_title, |
| 110 amp_url.is_valid() ? amp_url : GURL()); |
| 111 snippet->add_source(source); |
| 112 } |
| 113 // The previous url we have saved can be one of several sources for the |
| 114 // article. For example, the same article can be hosted by nytimes.com, |
| 115 // cnn.com, etc. We need to parse the list of sources for this article and |
| 116 // find the best match. In order of preference: |
| 117 // 1) A source that has url, publisher name, AMP url |
| 118 // 2) A source that has url, publisher name |
| 119 // 3) A source that has url and AMP url, or url only (since we won't show |
| 120 // the snippet to users if the article does not have a publisher name, it |
| 121 // doesn't matter whether the snippet has the AMP url or not) |
| 122 size_t best_source_index = 0; |
| 123 for (size_t i = 0; i < snippet->sources_.size(); ++i) { |
| 124 const SnippetSource& best_source = snippet->sources_[i]; |
| 125 if (!best_source.publisher_name.empty()) { |
| 126 best_source_index = i; |
| 127 if (!best_source.amp_url.is_empty()) { |
| 128 // We already have the best possible source |
| 129 break; |
79 } | 130 } |
80 } | 131 } |
81 } | 132 } |
| 133 snippet->set_source_index(best_source_index); |
| 134 |
| 135 if (snippet->sources_.empty()) |
| 136 return nullptr; |
82 | 137 |
83 return snippet; | 138 return snippet; |
84 } | 139 } |
85 | 140 |
86 std::unique_ptr<base::DictionaryValue> NTPSnippet::ToDictionary() const { | 141 std::unique_ptr<base::DictionaryValue> NTPSnippet::ToDictionary() const { |
87 std::unique_ptr<base::DictionaryValue> dict(new base::DictionaryValue); | 142 std::unique_ptr<base::DictionaryValue> dict(new base::DictionaryValue); |
88 | 143 |
89 dict->SetString(kUrl, url_.spec()); | 144 dict->SetString(kUrl, url_.spec()); |
90 if (!site_title_.empty()) | |
91 dict->SetString(kSiteTitle, site_title_); | |
92 if (!title_.empty()) | 145 if (!title_.empty()) |
93 dict->SetString(kTitle, title_); | 146 dict->SetString(kTitle, title_); |
94 if (favicon_url_.is_valid()) | |
95 dict->SetString(kFaviconUrl, favicon_url_.spec()); | |
96 if (salient_image_url_.is_valid()) | 147 if (salient_image_url_.is_valid()) |
97 dict->SetString(kSalientImageUrl, salient_image_url_.spec()); | 148 dict->SetString(kSalientImageUrl, salient_image_url_.spec()); |
98 if (!snippet_.empty()) | 149 if (!snippet_.empty()) |
99 dict->SetString(kSnippet, snippet_); | 150 dict->SetString(kSnippet, snippet_); |
100 if (!publish_date_.is_null()) | 151 if (!publish_date_.is_null()) |
101 dict->SetString(kPublishDate, TimeToJsonString(publish_date_)); | 152 dict->SetString(kPublishDate, TimeToJsonString(publish_date_)); |
102 if (!expiry_date_.is_null()) | 153 if (!expiry_date_.is_null()) |
103 dict->SetString(kExpiryDate, TimeToJsonString(expiry_date_)); | 154 dict->SetString(kExpiryDate, TimeToJsonString(expiry_date_)); |
104 if (amp_url_.is_valid()) { | 155 |
105 std::unique_ptr<base::ListValue> corpus_infos_list(new base::ListValue); | 156 std::unique_ptr<base::ListValue> corpus_infos_list(new base::ListValue); |
| 157 for (const SnippetSource source : sources_) { |
106 std::unique_ptr<base::DictionaryValue> corpus_info_dict( | 158 std::unique_ptr<base::DictionaryValue> corpus_info_dict( |
107 new base::DictionaryValue); | 159 new base::DictionaryValue); |
108 corpus_info_dict->SetString(kAmpUrl, amp_url_.spec()); | 160 |
109 corpus_infos_list->Set(0, std::move(corpus_info_dict)); | 161 corpus_info_dict->SetString(kCorpusId, source.url.spec()); |
110 dict->Set(kSourceCorpusInfo, std::move(corpus_infos_list)); | 162 if (!source.amp_url.is_empty()) |
| 163 corpus_info_dict->SetString(kAmpUrl, source.amp_url.spec()); |
| 164 if (!source.publisher_name.empty()) |
| 165 corpus_info_dict->SetString( |
| 166 base::StringPrintf("%s.%s", kPublisherData, kSiteTitle), |
| 167 source.publisher_name); |
| 168 |
| 169 corpus_infos_list->Append(std::move(corpus_info_dict)); |
111 } | 170 } |
| 171 |
| 172 dict->Set(kSourceCorpusInfo, std::move(corpus_infos_list)); |
| 173 |
112 return dict; | 174 return dict; |
113 } | 175 } |
114 | 176 |
115 // static | 177 // static |
116 base::Time NTPSnippet::TimeFromJsonString(const std::string& timestamp_str) { | 178 base::Time NTPSnippet::TimeFromJsonString(const std::string& timestamp_str) { |
117 int64_t timestamp; | 179 int64_t timestamp; |
118 if (!base::StringToInt64(timestamp_str, ×tamp)) { | 180 if (!base::StringToInt64(timestamp_str, ×tamp)) { |
119 // Even if there's an error in the conversion, some garbage data may still | 181 // Even if there's an error in the conversion, some garbage data may still |
120 // be written to the output var, so reset it. | 182 // be written to the output var, so reset it. |
121 timestamp = 0; | 183 timestamp = 0; |
122 } | 184 } |
123 return base::Time::UnixEpoch() + base::TimeDelta::FromSeconds(timestamp); | 185 return base::Time::UnixEpoch() + base::TimeDelta::FromSeconds(timestamp); |
124 } | 186 } |
125 | 187 |
126 // static | 188 // static |
127 std::string NTPSnippet::TimeToJsonString(const base::Time& time) { | 189 std::string NTPSnippet::TimeToJsonString(const base::Time& time) { |
128 return base::Int64ToString((time - base::Time::UnixEpoch()).InSeconds()); | 190 return base::Int64ToString((time - base::Time::UnixEpoch()).InSeconds()); |
129 } | 191 } |
130 | 192 |
131 } // namespace ntp_snippets | 193 } // namespace ntp_snippets |
OLD | NEW |