| Index: components/dom_distiller/core/distiller.cc
|
| diff --git a/components/dom_distiller/core/distiller.cc b/components/dom_distiller/core/distiller.cc
|
| index 8e155ebc2788356c16cdd2fcfde01df6df3a457b..9bd8ba22b0e0c9ffee0bfaa3297a3c7e207730a7 100644
|
| --- a/components/dom_distiller/core/distiller.cc
|
| +++ b/components/dom_distiller/core/distiller.cc
|
| @@ -21,7 +21,7 @@
|
|
|
| namespace {
|
| // Maximum number of distilled pages in an article.
|
| -const int kMaxPagesInArticle = 32;
|
| +const size_t kMaxPagesInArticle = 32;
|
| }
|
|
|
| namespace dom_distiller {
|
| @@ -41,64 +41,105 @@ scoped_ptr<Distiller> DistillerFactoryImpl::CreateDistiller() {
|
| return distiller.PassAs<Distiller>();
|
| }
|
|
|
| +DistillerImpl::DistilledPageData::DistilledPageData() {}
|
| +
|
| +DistillerImpl::DistilledPageData::~DistilledPageData() {}
|
| +
|
| DistillerImpl::DistillerImpl(
|
| const DistillerPageFactory& distiller_page_factory,
|
| const DistillerURLFetcherFactory& distiller_url_fetcher_factory)
|
| : distiller_url_fetcher_factory_(distiller_url_fetcher_factory),
|
| - distillation_in_progress_(false) {
|
| + max_pages_in_article_(kMaxPagesInArticle) {
|
| page_distiller_.reset(new PageDistiller(distiller_page_factory));
|
| }
|
|
|
| -DistillerImpl::~DistillerImpl() {
|
| - DCHECK(image_fetchers_.empty());
|
| - DCHECK(!distillation_in_progress_);
|
| -}
|
| +DistillerImpl::~DistillerImpl() { DCHECK(AreAllPagesFinished()); }
|
|
|
| void DistillerImpl::Init() {
|
| - DCHECK(!distillation_in_progress_);
|
| + DCHECK(AreAllPagesFinished());
|
| page_distiller_->Init();
|
| - article_proto_.reset(new DistilledArticleProto());
|
| +}
|
| +
|
| +void DistillerImpl::SetMaxNumPagesInArticle(size_t max_num_pages) {
|
| + max_pages_in_article_ = max_num_pages;
|
| +}
|
| +
|
| +bool DistillerImpl::AreAllPagesFinished() const {
|
| + return started_pages_index_.empty() && waiting_pages_.empty();
|
| +}
|
| +
|
| +size_t DistillerImpl::TotalPageCount() const {
|
| + return waiting_pages_.size() + started_pages_index_.size() +
|
| + finished_pages_index_.size();
|
| +}
|
| +
|
| +void DistillerImpl::AddToDistillationQueue(int page_num, const GURL& url) {
|
| + if (!IsPageNumberInUse(page_num) && url.is_valid() &&
|
| + TotalPageCount() < max_pages_in_article_ &&
|
| + seen_urls_.find(url.spec()) == seen_urls_.end()) {
|
| + waiting_pages_[page_num] = url;
|
| + }
|
| +}
|
| +
|
| +bool DistillerImpl::IsPageNumberInUse(int page_num) const {
|
| + return waiting_pages_.find(page_num) != waiting_pages_.end() ||
|
| + started_pages_index_.find(page_num) != started_pages_index_.end() ||
|
| + finished_pages_index_.find(page_num) != finished_pages_index_.end();
|
| +}
|
| +
|
| +DistillerImpl::DistilledPageData* DistillerImpl::GetPageAtIndex(size_t index)
|
| + const {
|
| + DCHECK_LT(index, pages_.size());
|
| + DistilledPageData* page_data = pages_[index];
|
| + DCHECK(page_data);
|
| + return page_data;
|
| }
|
|
|
| void DistillerImpl::DistillPage(const GURL& url,
|
| const DistillerCallback& distillation_cb) {
|
| - DCHECK(!distillation_in_progress_);
|
| + DCHECK(AreAllPagesFinished());
|
| distillation_cb_ = distillation_cb;
|
| - DistillPage(url);
|
| +
|
| + AddToDistillationQueue(0, url);
|
| + DistillNextPage();
|
| }
|
|
|
| -void DistillerImpl::DistillPage(const GURL& url) {
|
| - DCHECK(!distillation_in_progress_);
|
| - if (url.is_valid() && article_proto_->pages_size() < kMaxPagesInArticle &&
|
| - processed_urls_.find(url.spec()) == processed_urls_.end()) {
|
| - distillation_in_progress_ = true;
|
| - // Distill the next page.
|
| +void DistillerImpl::DistillNextPage() {
|
| + if (!waiting_pages_.empty()) {
|
| + std::map<int, GURL>::iterator front = waiting_pages_.begin();
|
| + int page_num = front->first;
|
| + const GURL url = front->second;
|
| +
|
| + waiting_pages_.erase(front);
|
| DCHECK(url.is_valid());
|
| - DCHECK_LT(article_proto_->pages_size(), kMaxPagesInArticle);
|
| + DCHECK(started_pages_index_.find(page_num) == started_pages_index_.end());
|
| + DCHECK(finished_pages_index_.find(page_num) == finished_pages_index_.end());
|
| + seen_urls_.insert(url.spec());
|
| + pages_.push_back(new DistilledPageData());
|
| + started_pages_index_[page_num] = pages_.size() - 1;
|
| page_distiller_->DistillPage(
|
| url,
|
| base::Bind(&DistillerImpl::OnPageDistillationFinished,
|
| base::Unretained(this),
|
| + page_num,
|
| url));
|
| - } else {
|
| - RunDistillerCallbackIfDone();
|
| }
|
| }
|
|
|
| void DistillerImpl::OnPageDistillationFinished(
|
| + int page_num,
|
| const GURL& page_url,
|
| scoped_ptr<DistilledPageInfo> distilled_page,
|
| bool distillation_successful) {
|
| - DCHECK(distillation_in_progress_);
|
| DCHECK(distilled_page.get());
|
| - if (!distillation_successful) {
|
| - RunDistillerCallbackIfDone();
|
| - } else {
|
| - DistilledPageProto* current_page = article_proto_->add_pages();
|
| - // Set the title of the article as the title of the first page.
|
| - if (article_proto_->pages_size() == 1) {
|
| - article_proto_->set_title(distilled_page->title);
|
| - }
|
| + DCHECK(started_pages_index_.find(page_num) != started_pages_index_.end());
|
| + if (distillation_successful) {
|
| + DistilledPageData* page_data =
|
| + GetPageAtIndex(started_pages_index_[page_num]);
|
| + DistilledPageProto* current_page = new DistilledPageProto();
|
| + page_data->proto.reset(current_page);
|
| + page_data->page_num = page_num;
|
| + page_data->title = distilled_page->title;
|
|
|
| current_page->set_url(page_url.spec());
|
| current_page->set_html(distilled_page->html);
|
| @@ -107,59 +148,106 @@ void DistillerImpl::OnPageDistillationFinished(
|
| if (next_page_url.is_valid()) {
|
| // The pages should be in same origin.
|
| DCHECK_EQ(next_page_url.GetOrigin(), page_url.GetOrigin());
|
| + AddToDistillationQueue(page_num + 1, next_page_url);
|
| }
|
|
|
| - processed_urls_.insert(page_url.spec());
|
| - distillation_in_progress_ = false;
|
| - int page_number = article_proto_->pages_size();
|
| for (size_t img_num = 0; img_num < distilled_page->image_urls.size();
|
| ++img_num) {
|
| std::string image_id =
|
| - base::IntToString(page_number) + "_" + base::IntToString(img_num);
|
| - FetchImage(current_page, image_id, distilled_page->image_urls[img_num]);
|
| + base::IntToString(page_num + 1) + "_" + base::IntToString(img_num);
|
| + FetchImage(page_num, image_id, distilled_page->image_urls[img_num]);
|
| }
|
| - DistillPage(next_page_url);
|
| +
|
| + AddPageIfDone(page_num);
|
| + DistillNextPage();
|
| + } else {
|
| + started_pages_index_.erase(page_num);
|
| + RunDistillerCallbackIfDone();
|
| }
|
| }
|
|
|
| -void DistillerImpl::FetchImage(DistilledPageProto* distilled_page_proto,
|
| +void DistillerImpl::FetchImage(int page_num,
|
| const std::string& image_id,
|
| const std::string& item) {
|
| + DCHECK(started_pages_index_.find(page_num) != started_pages_index_.end());
|
| + DistilledPageData* page_data = GetPageAtIndex(started_pages_index_[page_num]);
|
| DistillerURLFetcher* fetcher =
|
| distiller_url_fetcher_factory_.CreateDistillerURLFetcher();
|
| - image_fetchers_.push_back(fetcher);
|
| + page_data->image_fetchers_.push_back(fetcher);
|
| +
|
| fetcher->FetchURL(item,
|
| base::Bind(&DistillerImpl::OnFetchImageDone,
|
| base::Unretained(this),
|
| - base::Unretained(distilled_page_proto),
|
| + page_num,
|
| base::Unretained(fetcher),
|
| image_id));
|
| }
|
|
|
| -void DistillerImpl::OnFetchImageDone(DistilledPageProto* distilled_page_proto,
|
| +void DistillerImpl::OnFetchImageDone(int page_num,
|
| DistillerURLFetcher* url_fetcher,
|
| const std::string& id,
|
| const std::string& response) {
|
| - DCHECK_GT(article_proto_->pages_size(), 0);
|
| - DCHECK(distilled_page_proto);
|
| + DCHECK(started_pages_index_.find(page_num) != started_pages_index_.end());
|
| + DistilledPageData* page_data = GetPageAtIndex(started_pages_index_[page_num]);
|
| + DCHECK(page_data->proto);
|
| DCHECK(url_fetcher);
|
| ScopedVector<DistillerURLFetcher>::iterator fetcher_it =
|
| - std::find(image_fetchers_.begin(), image_fetchers_.end(), url_fetcher);
|
| + std::find(page_data->image_fetchers_.begin(),
|
| + page_data->image_fetchers_.end(),
|
| + url_fetcher);
|
|
|
| - DCHECK(fetcher_it != image_fetchers_.end());
|
| + DCHECK(fetcher_it != page_data->image_fetchers_.end());
|
| // Delete the |url_fetcher| by DeleteSoon since the OnFetchImageDone
|
| // callback is invoked by the |url_fetcher|.
|
| - image_fetchers_.weak_erase(fetcher_it);
|
| + page_data->image_fetchers_.weak_erase(fetcher_it);
|
| base::MessageLoop::current()->DeleteSoon(FROM_HERE, url_fetcher);
|
| - DistilledPageProto_Image* image = distilled_page_proto->add_image();
|
| +
|
| + DistilledPageProto_Image* image = page_data->proto->add_image();
|
| image->set_name(id);
|
| image->set_data(response);
|
| - RunDistillerCallbackIfDone();
|
| +
|
| + AddPageIfDone(page_num);
|
| +}
|
| +
|
| +void DistillerImpl::AddPageIfDone(int page_num) {
|
| + DCHECK(started_pages_index_.find(page_num) != started_pages_index_.end());
|
| + DCHECK(finished_pages_index_.find(page_num) == finished_pages_index_.end());
|
| + DistilledPageData* page_data = GetPageAtIndex(started_pages_index_[page_num]);
|
| + if (page_data->image_fetchers_.empty()) {
|
| + finished_pages_index_[page_num] = started_pages_index_[page_num];
|
| + started_pages_index_.erase(page_num);
|
| + RunDistillerCallbackIfDone();
|
| + }
|
| }
|
|
|
| void DistillerImpl::RunDistillerCallbackIfDone() {
|
| - if (image_fetchers_.empty() && !distillation_in_progress_) {
|
| - distillation_cb_.Run(article_proto_.Pass());
|
| + DCHECK(!distillation_cb_.is_null());
|
| + if (AreAllPagesFinished()) {
|
| + bool first_page = true;
|
| + scoped_ptr<DistilledArticleProto> article_proto(
|
| + new DistilledArticleProto());
|
| + // Stitch the pages back into the article.
|
| + for (std::map<int, size_t>::iterator it = finished_pages_index_.begin();
|
| + it != finished_pages_index_.end();) {
|
| + DistilledPageData* page_data = GetPageAtIndex(it->second);
|
| + *(article_proto->add_pages()) = *(page_data->proto);
|
| +
|
| + if (first_page) {
|
| + article_proto->set_title(page_data->title);
|
| + first_page = false;
|
| + }
|
| +
|
| + finished_pages_index_.erase(it++);
|
| + }
|
| +
|
| + pages_.clear();
|
| + DCHECK_LE(static_cast<size_t>(article_proto->pages_size()),
|
| + max_pages_in_article_);
|
| +
|
| + DCHECK(pages_.empty());
|
| + DCHECK(finished_pages_index_.empty());
|
| + distillation_cb_.Run(article_proto.Pass());
|
| + distillation_cb_.Reset();
|
| }
|
| }
|
|
|
|
|