Index: components/ntp_snippets/ntp_snippet.cc |
diff --git a/components/ntp_snippets/ntp_snippet.cc b/components/ntp_snippets/ntp_snippet.cc |
index dcac6d27908fa1b2f51124e3502818f6668543ed..f0ec5ddc8fcc9440831aa1add45271e84b23ad31 100644 |
--- a/components/ntp_snippets/ntp_snippet.cc |
+++ b/components/ntp_snippets/ntp_snippet.cc |
@@ -5,18 +5,20 @@ |
#include "components/ntp_snippets/ntp_snippet.h" |
#include "base/strings/string_number_conversions.h" |
+#include "base/strings/stringprintf.h" |
#include "base/values.h" |
namespace { |
const char kUrl[] = "url"; |
-const char kSiteTitle[] = "site_title"; |
const char kTitle[] = "title"; |
-const char kFaviconUrl[] = "favicon_url"; |
const char kSalientImageUrl[] = "thumbnailUrl"; |
const char kSnippet[] = "snippet"; |
const char kPublishDate[] = "creationTimestampSec"; |
const char kExpiryDate[] = "expiryTimestampSec"; |
+const char kSiteTitle[] = "sourceName"; |
+const char kPublisherData[] = "publisherData"; |
+const char kCorpusId[] = "corpusId"; |
const char kSourceCorpusInfo[] = "sourceCorpusInfo"; |
const char kAmpUrl[] = "ampUrl"; |
@@ -24,7 +26,7 @@ const char kAmpUrl[] = "ampUrl"; |
namespace ntp_snippets { |
-NTPSnippet::NTPSnippet(const GURL& url) : url_(url) { |
+NTPSnippet::NTPSnippet(const GURL& url) : url_(url), best_source_index_(0) { |
DCHECK(url_.is_valid()); |
} |
@@ -43,15 +45,9 @@ std::unique_ptr<NTPSnippet> NTPSnippet::CreateFromDictionary( |
std::unique_ptr<NTPSnippet> snippet(new NTPSnippet(url)); |
- std::string site_title; |
- if (dict.GetString(kSiteTitle, &site_title)) |
- snippet->set_site_title(site_title); |
std::string title; |
if (dict.GetString(kTitle, &title)) |
snippet->set_title(title); |
- std::string favicon_url; |
- if (dict.GetString(kFaviconUrl, &favicon_url)) |
- snippet->set_favicon_url(GURL(favicon_url)); |
std::string salient_image_url; |
if (dict.GetString(kSalientImageUrl, &salient_image_url)) |
snippet->set_salient_image_url(GURL(salient_image_url)); |
@@ -67,18 +63,77 @@ std::unique_ptr<NTPSnippet> NTPSnippet::CreateFromDictionary( |
snippet->set_expiry_date(TimeFromJsonString(expiry_timestamp_str)); |
const base::ListValue* corpus_infos_list = nullptr; |
- if (dict.GetList(kSourceCorpusInfo, &corpus_infos_list)) { |
- for (base::Value* value : *corpus_infos_list) { |
- const base::DictionaryValue* dict_value = nullptr; |
- if (value->GetAsDictionary(&dict_value)) { |
- std::string amp_url; |
- if (dict_value->GetString(kAmpUrl, &_url)) { |
- snippet->set_amp_url(GURL(amp_url)); |
- break; |
- } |
+ if (!dict.GetList(kSourceCorpusInfo, &corpus_infos_list)) { |
+ DLOG(WARNING) << "No sources found for article " << title; |
+ return nullptr; |
+ } |
+ |
+ for (base::Value* value : *corpus_infos_list) { |
+ const base::DictionaryValue* dict_value = nullptr; |
+ if (!value->GetAsDictionary(&dict_value)) { |
+ DLOG(WARNING) << "Invalid source info for article " << url_str; |
+ continue; |
+ } |
+ |
+ std::string corpus_id_str; |
+ GURL corpus_id; |
+ if (dict_value->GetString(kCorpusId, &corpus_id_str)) { |
Marc Treib
2016/04/29 12:33:32
nit: no braces required
May
2016/04/29 17:00:14
Done.
|
+ corpus_id = GURL(corpus_id_str); |
+ } |
+ |
+ if (!corpus_id.is_valid()) { |
+ // We must at least have a valid source URL |
Marc Treib
2016/04/29 12:33:32
nit: period after comment, also below
May
2016/04/29 17:00:15
Done.
|
+ DLOG(WARNING) << "Invalid article url " << corpus_id_str; |
+ continue; |
+ } |
+ |
+ const base::DictionaryValue* publisher_data = nullptr; |
+ std::string site_title; |
+ if (dict_value->GetDictionary(kPublisherData, &publisher_data)) { |
+ if (!publisher_data->GetString(kSiteTitle, &site_title)) { |
+ // It's possible but not desirable to have no publisher data |
+ DLOG(WARNING) << "No publisher name for article " << corpus_id.spec(); |
} |
+ } else { |
+ DLOG(WARNING) << "No publisher data for article " << corpus_id.spec(); |
+ } |
+ |
+ std::string amp_url_str; |
+ GURL amp_url; |
+ // Expected to not have AMP url sometimes |
+ if (dict_value->GetString(kAmpUrl, &_url_str)) { |
+ amp_url = GURL(amp_url_str); |
+ DLOG_IF(WARNING, !amp_url.is_valid()) << "Invalid AMP url " |
+ << amp_url_str; |
} |
+ SnippetSource source(corpus_id, site_title, |
+ amp_url.is_valid() ? amp_url : GURL()); |
+ snippet->add_source(source); |
} |
+ // The previous url we have saved can be one of several sources for the |
+ // article. For example, the same article can be hosted by nytimes.com, |
+ // cnn.com, etc. We need to parse the list of sources for this article and |
+ // find the best match. In order of preference: |
+ // 1) A source that has url, publisher name, AMP url |
+ // 2) A source that has url, publisher name |
+ // 3) A source that has url and AMP url, or url only (since we won't show |
+ // the snippet to users if the article does not have a publisher name, it |
+ // doesn't matter whether the snippet has the AMP url or not) |
+ size_t best_source_index = 0; |
+ for (size_t i = 0; i < snippet->sources_.size(); ++i) { |
+ const SnippetSource& best_source = snippet->sources_[i]; |
Marc Treib
2016/04/29 12:33:32
This should be just "source" now, no? It's not the
May
2016/04/29 17:00:15
Done.
|
+ if (!best_source.publisher_name.empty()) { |
+ best_source_index = i; |
+ if (!best_source.amp_url.is_empty()) { |
+ // We already have the best possible source |
Marc Treib
2016/04/29 12:33:32
"This is the best possible source, stop looking."
May
2016/04/29 17:00:14
Done.
|
+ break; |
+ } |
+ } |
+ } |
+ snippet->set_source_index(best_source_index); |
+ |
+ if (snippet->sources_.empty()) |
+ return nullptr; |
Marc Treib
2016/04/29 12:33:32
log?
May
2016/04/29 17:00:15
Done.
|
return snippet; |
} |
@@ -87,12 +142,8 @@ std::unique_ptr<base::DictionaryValue> NTPSnippet::ToDictionary() const { |
std::unique_ptr<base::DictionaryValue> dict(new base::DictionaryValue); |
dict->SetString(kUrl, url_.spec()); |
- if (!site_title_.empty()) |
- dict->SetString(kSiteTitle, site_title_); |
if (!title_.empty()) |
dict->SetString(kTitle, title_); |
- if (favicon_url_.is_valid()) |
- dict->SetString(kFaviconUrl, favicon_url_.spec()); |
if (salient_image_url_.is_valid()) |
dict->SetString(kSalientImageUrl, salient_image_url_.spec()); |
if (!snippet_.empty()) |
@@ -101,14 +152,25 @@ std::unique_ptr<base::DictionaryValue> NTPSnippet::ToDictionary() const { |
dict->SetString(kPublishDate, TimeToJsonString(publish_date_)); |
if (!expiry_date_.is_null()) |
dict->SetString(kExpiryDate, TimeToJsonString(expiry_date_)); |
- if (amp_url_.is_valid()) { |
- std::unique_ptr<base::ListValue> corpus_infos_list(new base::ListValue); |
+ |
+ std::unique_ptr<base::ListValue> corpus_infos_list(new base::ListValue); |
+ for (const SnippetSource source : sources_) { |
Marc Treib
2016/04/29 12:33:32
const SnippetSource&
Maybe SnippetSource should b
May
2016/04/29 17:00:15
Changed to SnippetSource&.
I can't think of when
Marc Treib
2016/04/30 13:51:19
Nope, it can remain a vector of instances, thanks
|
std::unique_ptr<base::DictionaryValue> corpus_info_dict( |
new base::DictionaryValue); |
- corpus_info_dict->SetString(kAmpUrl, amp_url_.spec()); |
- corpus_infos_list->Set(0, std::move(corpus_info_dict)); |
- dict->Set(kSourceCorpusInfo, std::move(corpus_infos_list)); |
+ |
+ corpus_info_dict->SetString(kCorpusId, source.url.spec()); |
+ if (!source.amp_url.is_empty()) |
+ corpus_info_dict->SetString(kAmpUrl, source.amp_url.spec()); |
+ if (!source.publisher_name.empty()) |
+ corpus_info_dict->SetString( |
+ base::StringPrintf("%s.%s", kPublisherData, kSiteTitle), |
+ source.publisher_name); |
+ |
+ corpus_infos_list->Append(std::move(corpus_info_dict)); |
} |
+ |
+ dict->Set(kSourceCorpusInfo, std::move(corpus_infos_list)); |
+ |
return dict; |
} |