Index: components/dom_distiller/core/distiller.cc |
diff --git a/components/dom_distiller/core/distiller.cc b/components/dom_distiller/core/distiller.cc |
index 84b8a0508db4eb8951c5b7443da8f6aa4cbe3d9b..7b4c92a061a733d08d782a2e9b2429b55fc07325 100644 |
--- a/components/dom_distiller/core/distiller.cc |
+++ b/components/dom_distiller/core/distiller.cc |
@@ -8,16 +8,19 @@ |
#include "base/bind.h" |
#include "base/callback.h" |
-#include "base/strings/stringprintf.h" |
+#include "base/strings/string_number_conversions.h" |
#include "base/strings/utf_string_conversions.h" |
#include "base/values.h" |
#include "components/dom_distiller/core/distiller_page.h" |
#include "components/dom_distiller/core/distiller_url_fetcher.h" |
+#include "components/dom_distiller/core/proto/distilled_article.pb.h" |
#include "components/dom_distiller/core/proto/distilled_page.pb.h" |
-#include "grit/dom_distiller_resources.h" |
#include "net/url_request/url_request_context_getter.h" |
-#include "ui/base/resource/resource_bundle.h" |
-#include "url/gurl.h" |
+ |
+namespace { |
+// Maximum number of distilled pages in an article. |
+const int kMaxPagesInArticle = 32; |
+} |
namespace dom_distiller { |
@@ -39,90 +42,101 @@ scoped_ptr<Distiller> DistillerFactoryImpl::CreateDistiller() { |
DistillerImpl::DistillerImpl( |
const DistillerPageFactory& distiller_page_factory, |
const DistillerURLFetcherFactory& distiller_url_fetcher_factory) |
- : distiller_page_factory_(distiller_page_factory), |
- distiller_url_fetcher_factory_(distiller_url_fetcher_factory) { |
- distiller_page_ = distiller_page_factory_.CreateDistillerPage(this).Pass(); |
+ : distiller_url_fetcher_factory_(distiller_url_fetcher_factory), |
+ distillation_in_progress_(false) { |
+ page_distiller_.reset(new PageDistiller(distiller_page_factory)); |
} |
DistillerImpl::~DistillerImpl() { |
} |
void DistillerImpl::Init() { |
- distiller_page_->Init(); |
+ DCHECK(!distillation_in_progress_); |
+ page_distiller_->Init(); |
+ article_proto_.reset(new DistilledArticleProto()); |
} |
void DistillerImpl::DistillPage(const GURL& url, |
const DistillerCallback& distillation_cb) { |
+ DCHECK(!distillation_in_progress_); |
distillation_cb_ = distillation_cb; |
- proto_.reset(new DistilledPageProto()); |
- proto_->set_url(url.spec()); |
- LoadURL(url); |
+ DistillPage(url); |
} |
-void DistillerImpl::LoadURL(const GURL& url) { |
- distiller_page_->LoadURL(url); |
+void DistillerImpl::DistillPage(const GURL& url) { |
+ DCHECK(!distillation_in_progress_); |
+ if (url.is_valid() && article_proto_->pages_size() < kMaxPagesInArticle && |
+ processed_urls_.find(url.spec()) == processed_urls_.end()) { |
+ distillation_in_progress_ = true; |
+ // Distill the next page. |
+ DCHECK(url.is_valid()); |
+ DCHECK_LT(article_proto_->pages_size(), kMaxPagesInArticle); |
+ page_distiller_->DistillPage( |
+ url, |
+ base::Bind(&DistillerImpl::OnPageDistillationFinished, |
+ base::Unretained(this), |
+ url)); |
+ } else { |
+ RunDistillerCallbackIfDone(); |
+ } |
} |
-void DistillerImpl::OnLoadURLDone() { |
- GetDistilledContent(); |
-} |
+void DistillerImpl::OnPageDistillationFinished( |
+ const GURL& page_url, |
+ scoped_ptr<DistilledPageInfo> distilled_page, |
+ bool distillation_successful) { |
+ DCHECK(distillation_in_progress_); |
+ DCHECK(distilled_page.get()); |
+ if (!distillation_successful) { |
+ RunDistillerCallbackIfDone(); |
+ } else { |
+ DistilledPageProto* current_page = article_proto_->add_pages(); |
+ // Set the title of the article as the title of the first page. |
+ if (article_proto_->pages_size() == 1) { |
+ article_proto_->set_title(distilled_page->title); |
+ } |
-void DistillerImpl::GetDistilledContent() { |
- std::string script = |
- ResourceBundle::GetSharedInstance().GetRawDataResource( |
- IDR_DISTILLER_JS).as_string(); |
- distiller_page_->ExecuteJavaScript(script); |
-} |
+ current_page->set_url(page_url.spec()); |
+ current_page->set_html(distilled_page->html); |
-void DistillerImpl::OnExecuteJavaScriptDone(const base::Value* value) { |
- std::string result; |
- bool fetched_image = false; |
- const base::ListValue* result_list = NULL; |
- if (!value->GetAsList(&result_list)) { |
- DCHECK(proto_); |
- distillation_cb_.Run(proto_.Pass()); |
- return; |
- } |
- int i = 0; |
- for (base::ListValue::const_iterator iter = result_list->begin(); |
- iter != result_list->end(); ++iter, ++i) { |
- std::string item; |
- (*iter)->GetAsString(&item); |
- // The JavaScript returns an array where the first element is the title, |
- // the second element is the article content HTML, and the remaining |
- // elements are image URLs referenced in the HTML. |
- switch (i) { |
- case 0: |
- proto_->set_title(item); |
- break; |
- case 1: |
- proto_->set_html(item); |
- break; |
- default: |
- int image_number = i - 2; |
- std::string image_id = base::StringPrintf("%d", image_number); |
- FetchImage(image_id, item); |
- fetched_image = true; |
+ GURL next_page_url(distilled_page->next_page_url); |
+ if (next_page_url.is_valid()) { |
+ // The pages should be in same origin. |
+ DCHECK_EQ(next_page_url.GetOrigin(), page_url.GetOrigin()); |
+ } |
+ |
+ processed_urls_.insert(page_url.spec()); |
+ distillation_in_progress_ = false; |
+ int page_number = article_proto_->pages_size(); |
+ for (size_t img_num = 0; img_num < distilled_page->image_urls.size(); |
+ ++img_num) { |
+ std::string image_id = |
+ base::IntToString(page_number) + "_" + base::IntToString(img_num); |
+ FetchImage(current_page, image_id, distilled_page->image_urls[img_num]); |
} |
+ DistillPage(next_page_url); |
} |
- if (!fetched_image) |
- distillation_cb_.Run(proto_.Pass()); |
} |
-void DistillerImpl::FetchImage(const std::string& image_id, |
+void DistillerImpl::FetchImage(DistilledPageProto* distilled_page_proto, |
+ const std::string& image_id, |
const std::string& item) { |
DistillerURLFetcher* fetcher = |
distiller_url_fetcher_factory_.CreateDistillerURLFetcher(); |
image_fetchers_[image_id] = fetcher; |
fetcher->FetchURL(item, |
base::Bind(&DistillerImpl::OnFetchImageDone, |
- base::Unretained(this), image_id)); |
+ base::Unretained(this), |
+ base::Unretained(distilled_page_proto), |
+ image_id)); |
} |
-void DistillerImpl::OnFetchImageDone(const std::string& id, |
+void DistillerImpl::OnFetchImageDone(DistilledPageProto* distilled_page_proto, |
+ const std::string& id, |
const std::string& response) { |
- DCHECK(proto_); |
- DistilledPageProto_Image* image = proto_->add_image(); |
+ DCHECK_GT(article_proto_->pages_size(), 0); |
+ DCHECK(distilled_page_proto); |
+ DistilledPageProto_Image* image = distilled_page_proto->add_image(); |
image->set_name(id); |
image->set_data(response); |
DCHECK(image_fetchers_.end() != image_fetchers_.find(id)); |
@@ -130,8 +144,12 @@ void DistillerImpl::OnFetchImageDone(const std::string& id, |
int result = image_fetchers_.erase(id); |
delete fetcher; |
DCHECK_EQ(1, result); |
- if (image_fetchers_.empty()) { |
- distillation_cb_.Run(proto_.Pass()); |
+ RunDistillerCallbackIfDone(); |
+} |
+ |
+void DistillerImpl::RunDistillerCallbackIfDone() { |
+ if (image_fetchers_.empty() && !distillation_in_progress_) { |
+ distillation_cb_.Run(article_proto_.Pass()); |
} |
} |