OLD | NEW |
1 // Copyright 2013 The Chromium Authors. All rights reserved. | 1 // Copyright 2013 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "components/dom_distiller/core/distiller.h" | 5 #include "components/dom_distiller/core/distiller.h" |
6 | 6 |
7 #include <map> | 7 #include <map> |
8 | 8 |
9 #include "base/bind.h" | 9 #include "base/bind.h" |
10 #include "base/callback.h" | 10 #include "base/callback.h" |
11 #include "base/strings/stringprintf.h" | 11 #include "base/strings/string_number_conversions.h" |
12 #include "base/strings/utf_string_conversions.h" | 12 #include "base/strings/utf_string_conversions.h" |
13 #include "base/values.h" | 13 #include "base/values.h" |
14 #include "components/dom_distiller/core/distiller_page.h" | 14 #include "components/dom_distiller/core/distiller_page.h" |
15 #include "components/dom_distiller/core/distiller_url_fetcher.h" | 15 #include "components/dom_distiller/core/distiller_url_fetcher.h" |
| 16 #include "components/dom_distiller/core/proto/distilled_article.pb.h" |
16 #include "components/dom_distiller/core/proto/distilled_page.pb.h" | 17 #include "components/dom_distiller/core/proto/distilled_page.pb.h" |
17 #include "grit/dom_distiller_resources.h" | |
18 #include "net/url_request/url_request_context_getter.h" | 18 #include "net/url_request/url_request_context_getter.h" |
19 #include "ui/base/resource/resource_bundle.h" | 19 |
20 #include "url/gurl.h" | 20 namespace { |
| 21 // Maximum number of distilled pages in an article. |
| 22 const int kMaxPagesInArticle = 32; |
| 23 } |
21 | 24 |
22 namespace dom_distiller { | 25 namespace dom_distiller { |
23 | 26 |
24 DistillerFactoryImpl::DistillerFactoryImpl( | 27 DistillerFactoryImpl::DistillerFactoryImpl( |
25 scoped_ptr<DistillerPageFactory> distiller_page_factory, | 28 scoped_ptr<DistillerPageFactory> distiller_page_factory, |
26 scoped_ptr<DistillerURLFetcherFactory> distiller_url_fetcher_factory) | 29 scoped_ptr<DistillerURLFetcherFactory> distiller_url_fetcher_factory) |
27 : distiller_page_factory_(distiller_page_factory.Pass()), | 30 : distiller_page_factory_(distiller_page_factory.Pass()), |
28 distiller_url_fetcher_factory_(distiller_url_fetcher_factory.Pass()) {} | 31 distiller_url_fetcher_factory_(distiller_url_fetcher_factory.Pass()) {} |
29 | 32 |
30 DistillerFactoryImpl::~DistillerFactoryImpl() {} | 33 DistillerFactoryImpl::~DistillerFactoryImpl() {} |
31 | 34 |
32 scoped_ptr<Distiller> DistillerFactoryImpl::CreateDistiller() { | 35 scoped_ptr<Distiller> DistillerFactoryImpl::CreateDistiller() { |
33 scoped_ptr<DistillerImpl> distiller(new DistillerImpl( | 36 scoped_ptr<DistillerImpl> distiller(new DistillerImpl( |
34 *distiller_page_factory_, *distiller_url_fetcher_factory_)); | 37 *distiller_page_factory_, *distiller_url_fetcher_factory_)); |
35 distiller->Init(); | 38 distiller->Init(); |
36 return distiller.PassAs<Distiller>(); | 39 return distiller.PassAs<Distiller>(); |
37 } | 40 } |
38 | 41 |
39 DistillerImpl::DistillerImpl( | 42 DistillerImpl::DistillerImpl( |
40 const DistillerPageFactory& distiller_page_factory, | 43 const DistillerPageFactory& distiller_page_factory, |
41 const DistillerURLFetcherFactory& distiller_url_fetcher_factory) | 44 const DistillerURLFetcherFactory& distiller_url_fetcher_factory) |
42 : distiller_page_factory_(distiller_page_factory), | 45 : distiller_url_fetcher_factory_(distiller_url_fetcher_factory), |
43 distiller_url_fetcher_factory_(distiller_url_fetcher_factory) { | 46 distillation_in_progress_(false) { |
44 distiller_page_ = distiller_page_factory_.CreateDistillerPage(this).Pass(); | 47 page_distiller_.reset(new PageDistiller(distiller_page_factory)); |
45 } | 48 } |
46 | 49 |
47 DistillerImpl::~DistillerImpl() { | 50 DistillerImpl::~DistillerImpl() { |
48 } | 51 } |
49 | 52 |
50 void DistillerImpl::Init() { | 53 void DistillerImpl::Init() { |
51 distiller_page_->Init(); | 54 DCHECK(!distillation_in_progress_); |
| 55 page_distiller_->Init(); |
| 56 article_proto_.reset(new DistilledArticleProto()); |
52 } | 57 } |
53 | 58 |
54 void DistillerImpl::DistillPage(const GURL& url, | 59 void DistillerImpl::DistillPage(const GURL& url, |
55 const DistillerCallback& distillation_cb) { | 60 const DistillerCallback& distillation_cb) { |
| 61 DCHECK(!distillation_in_progress_); |
56 distillation_cb_ = distillation_cb; | 62 distillation_cb_ = distillation_cb; |
57 proto_.reset(new DistilledPageProto()); | 63 DistillPage(url); |
58 proto_->set_url(url.spec()); | |
59 LoadURL(url); | |
60 } | 64 } |
61 | 65 |
62 void DistillerImpl::LoadURL(const GURL& url) { | 66 void DistillerImpl::DistillPage(const GURL& url) { |
63 distiller_page_->LoadURL(url); | 67 DCHECK(!distillation_in_progress_); |
| 68 if (url.is_valid() && article_proto_->pages_size() < kMaxPagesInArticle && |
| 69 processed_urls_.find(url.spec()) == processed_urls_.end()) { |
| 70 distillation_in_progress_ = true; |
| 71 // Distill the next page. |
| 72 DCHECK(url.is_valid()); |
| 73 DCHECK_LT(article_proto_->pages_size(), kMaxPagesInArticle); |
| 74 page_distiller_->DistillPage( |
| 75 url, |
| 76 base::Bind(&DistillerImpl::OnPageDistillationFinished, |
| 77 base::Unretained(this), |
| 78 url)); |
| 79 } else { |
| 80 RunDistillerCallbackIfDone(); |
| 81 } |
64 } | 82 } |
65 | 83 |
66 void DistillerImpl::OnLoadURLDone() { | 84 void DistillerImpl::OnPageDistillationFinished( |
67 GetDistilledContent(); | 85 const GURL& page_url, |
| 86 scoped_ptr<DistilledPageInfo> distilled_page, |
| 87 bool distillation_successful) { |
| 88 DCHECK(distillation_in_progress_); |
| 89 DCHECK(distilled_page.get()); |
| 90 if (!distillation_successful) { |
| 91 RunDistillerCallbackIfDone(); |
| 92 } else { |
| 93 DistilledPageProto* current_page = article_proto_->add_pages(); |
| 94 // Set the title of the article as the title of the first page. |
| 95 if (article_proto_->pages_size() == 1) { |
| 96 article_proto_->set_title(distilled_page->title); |
| 97 } |
| 98 |
| 99 current_page->set_url(page_url.spec()); |
| 100 current_page->set_html(distilled_page->html); |
| 101 |
| 102 GURL next_page_url(distilled_page->next_page_url); |
| 103 if (next_page_url.is_valid()) { |
| 104 // The pages should be in same origin. |
| 105 DCHECK_EQ(next_page_url.GetOrigin(), page_url.GetOrigin()); |
| 106 } |
| 107 |
| 108 processed_urls_.insert(page_url.spec()); |
| 109 distillation_in_progress_ = false; |
| 110 int page_number = article_proto_->pages_size(); |
| 111 for (size_t img_num = 0; img_num < distilled_page->image_urls.size(); |
| 112 ++img_num) { |
| 113 std::string image_id = |
| 114 base::IntToString(page_number) + "_" + base::IntToString(img_num); |
| 115 FetchImage(current_page, image_id, distilled_page->image_urls[img_num]); |
| 116 } |
| 117 DistillPage(next_page_url); |
| 118 } |
68 } | 119 } |
69 | 120 |
70 void DistillerImpl::GetDistilledContent() { | 121 void DistillerImpl::FetchImage(DistilledPageProto* distilled_page_proto, |
71 std::string script = | 122 const std::string& image_id, |
72 ResourceBundle::GetSharedInstance().GetRawDataResource( | |
73 IDR_DISTILLER_JS).as_string(); | |
74 distiller_page_->ExecuteJavaScript(script); | |
75 } | |
76 | |
77 void DistillerImpl::OnExecuteJavaScriptDone(const base::Value* value) { | |
78 std::string result; | |
79 bool fetched_image = false; | |
80 const base::ListValue* result_list = NULL; | |
81 if (!value->GetAsList(&result_list)) { | |
82 DCHECK(proto_); | |
83 distillation_cb_.Run(proto_.Pass()); | |
84 return; | |
85 } | |
86 int i = 0; | |
87 for (base::ListValue::const_iterator iter = result_list->begin(); | |
88 iter != result_list->end(); ++iter, ++i) { | |
89 std::string item; | |
90 (*iter)->GetAsString(&item); | |
91 // The JavaScript returns an array where the first element is the title, | |
92 // the second element is the article content HTML, and the remaining | |
93 // elements are image URLs referenced in the HTML. | |
94 switch (i) { | |
95 case 0: | |
96 proto_->set_title(item); | |
97 break; | |
98 case 1: | |
99 proto_->set_html(item); | |
100 break; | |
101 default: | |
102 int image_number = i - 2; | |
103 std::string image_id = base::StringPrintf("%d", image_number); | |
104 FetchImage(image_id, item); | |
105 fetched_image = true; | |
106 } | |
107 } | |
108 if (!fetched_image) | |
109 distillation_cb_.Run(proto_.Pass()); | |
110 } | |
111 | |
112 void DistillerImpl::FetchImage(const std::string& image_id, | |
113 const std::string& item) { | 123 const std::string& item) { |
114 DistillerURLFetcher* fetcher = | 124 DistillerURLFetcher* fetcher = |
115 distiller_url_fetcher_factory_.CreateDistillerURLFetcher(); | 125 distiller_url_fetcher_factory_.CreateDistillerURLFetcher(); |
116 image_fetchers_[image_id] = fetcher; | 126 image_fetchers_[image_id] = fetcher; |
117 fetcher->FetchURL(item, | 127 fetcher->FetchURL(item, |
118 base::Bind(&DistillerImpl::OnFetchImageDone, | 128 base::Bind(&DistillerImpl::OnFetchImageDone, |
119 base::Unretained(this), image_id)); | 129 base::Unretained(this), |
| 130 base::Unretained(distilled_page_proto), |
| 131 image_id)); |
120 } | 132 } |
121 | 133 |
122 void DistillerImpl::OnFetchImageDone(const std::string& id, | 134 void DistillerImpl::OnFetchImageDone(DistilledPageProto* distilled_page_proto, |
| 135 const std::string& id, |
123 const std::string& response) { | 136 const std::string& response) { |
124 DCHECK(proto_); | 137 DCHECK_GT(article_proto_->pages_size(), 0); |
125 DistilledPageProto_Image* image = proto_->add_image(); | 138 DCHECK(distilled_page_proto); |
| 139 DistilledPageProto_Image* image = distilled_page_proto->add_image(); |
126 image->set_name(id); | 140 image->set_name(id); |
127 image->set_data(response); | 141 image->set_data(response); |
128 DCHECK(image_fetchers_.end() != image_fetchers_.find(id)); | 142 DCHECK(image_fetchers_.end() != image_fetchers_.find(id)); |
129 DistillerURLFetcher* fetcher = image_fetchers_[id]; | 143 DistillerURLFetcher* fetcher = image_fetchers_[id]; |
130 int result = image_fetchers_.erase(id); | 144 int result = image_fetchers_.erase(id); |
131 delete fetcher; | 145 delete fetcher; |
132 DCHECK_EQ(1, result); | 146 DCHECK_EQ(1, result); |
133 if (image_fetchers_.empty()) { | 147 RunDistillerCallbackIfDone(); |
134 distillation_cb_.Run(proto_.Pass()); | 148 } |
| 149 |
| 150 void DistillerImpl::RunDistillerCallbackIfDone() { |
| 151 if (image_fetchers_.empty() && !distillation_in_progress_) { |
| 152 distillation_cb_.Run(article_proto_.Pass()); |
135 } | 153 } |
136 } | 154 } |
137 | 155 |
138 } // namespace dom_distiller | 156 } // namespace dom_distiller |
OLD | NEW |