Chromium Code Reviews| Index: components/dom_distiller/core/distiller.cc |
| diff --git a/components/dom_distiller/core/distiller.cc b/components/dom_distiller/core/distiller.cc |
| index 8e155ebc2788356c16cdd2fcfde01df6df3a457b..18d8693ea9569411890abe394647691312df7556 100644 |
| --- a/components/dom_distiller/core/distiller.cc |
| +++ b/components/dom_distiller/core/distiller.cc |
| @@ -21,7 +21,7 @@ |
| namespace { |
| // Maximum number of distilled pages in an article. |
| -const int kMaxPagesInArticle = 32; |
| +const size_t kMaxPagesInArticle = 32; |
| } |
| namespace dom_distiller { |
| @@ -41,64 +41,105 @@ scoped_ptr<Distiller> DistillerFactoryImpl::CreateDistiller() { |
| return distiller.PassAs<Distiller>(); |
| } |
| +DistillerImpl::DistilledPageData::DistilledPageData() {} |
| + |
| +DistillerImpl::DistilledPageData::~DistilledPageData() {} |
| + |
| DistillerImpl::DistillerImpl( |
| const DistillerPageFactory& distiller_page_factory, |
| const DistillerURLFetcherFactory& distiller_url_fetcher_factory) |
| : distiller_url_fetcher_factory_(distiller_url_fetcher_factory), |
| - distillation_in_progress_(false) { |
| + max_pages_in_article_(kMaxPagesInArticle) { |
| page_distiller_.reset(new PageDistiller(distiller_page_factory)); |
| } |
| -DistillerImpl::~DistillerImpl() { |
| - DCHECK(image_fetchers_.empty()); |
| - DCHECK(!distillation_in_progress_); |
| -} |
| +DistillerImpl::~DistillerImpl() { DCHECK(AreAllPagesFinished()); } |
| void DistillerImpl::Init() { |
| - DCHECK(!distillation_in_progress_); |
| + DCHECK(AreAllPagesFinished()); |
| page_distiller_->Init(); |
| - article_proto_.reset(new DistilledArticleProto()); |
| +} |
| + |
| +void DistillerImpl::SetMaxNumPagesInArticle(size_t max_num_pages) { |
| + max_pages_in_article_ = max_num_pages; |
| +} |
| + |
| +bool DistillerImpl::AreAllPagesFinished() const { |
| + return started_pages_index_.empty() && waiting_pages_.empty(); |
| +} |
| + |
| +size_t DistillerImpl::TotalPageCount() const { |
| + return waiting_pages_.size() + started_pages_index_.size() + |
| + finished_pages_index_.size(); |
| +} |
| + |
| +void DistillerImpl::AddToDistillationQueue(int page_num, const GURL& url) { |
| + if (!IsPageNumberInUse(page_num) && url.is_valid() && |
| + TotalPageCount() < max_pages_in_article_ && |
| + seen_urls_.find(url.spec()) == seen_urls_.end()) { |
| + waiting_pages_[page_num] = url; |
| + } |
| +} |
| + |
| +bool DistillerImpl::IsPageNumberInUse(int page_num) const { |
| + return waiting_pages_.find(page_num) != waiting_pages_.end() || |
| + started_pages_index_.find(page_num) != started_pages_index_.end() || |
| + finished_pages_index_.find(page_num) != finished_pages_index_.end(); |
| +} |
| + |
| +DistillerImpl::DistilledPageData* DistillerImpl::GetPageAtIndex(size_t index) |
| + const { |
| + DCHECK_LT(index, pages_.size()); |
| + DistilledPageData* page_data = pages_[index]; |
| + DCHECK(page_data); |
| + return page_data; |
| } |
| void DistillerImpl::DistillPage(const GURL& url, |
| const DistillerCallback& distillation_cb) { |
| - DCHECK(!distillation_in_progress_); |
| + DCHECK(AreAllPagesFinished()); |
| distillation_cb_ = distillation_cb; |
| - DistillPage(url); |
| + |
| + AddToDistillationQueue(0, url); |
| + DistillNextPage(); |
| } |
| -void DistillerImpl::DistillPage(const GURL& url) { |
| - DCHECK(!distillation_in_progress_); |
| - if (url.is_valid() && article_proto_->pages_size() < kMaxPagesInArticle && |
| - processed_urls_.find(url.spec()) == processed_urls_.end()) { |
| - distillation_in_progress_ = true; |
| - // Distill the next page. |
| +void DistillerImpl::DistillNextPage() { |
| + if (!waiting_pages_.empty()) { |
| + std::map<int, GURL>::iterator front = waiting_pages_.begin(); |
| + int page_num = front->first; |
| + const GURL url = front->second; |
| + |
| + waiting_pages_.erase(front); |
| DCHECK(url.is_valid()); |
| - DCHECK_LT(article_proto_->pages_size(), kMaxPagesInArticle); |
| + DCHECK(started_pages_index_.find(page_num) == started_pages_index_.end()); |
| + DCHECK(finished_pages_index_.find(page_num) == finished_pages_index_.end()); |
| + seen_urls_.insert(url.spec()); |
| + pages_.push_back(new DistilledPageData()); |
| + started_pages_index_[page_num] = pages_.size() - 1; |
| page_distiller_->DistillPage( |
| url, |
| base::Bind(&DistillerImpl::OnPageDistillationFinished, |
| base::Unretained(this), |
| + page_num, |
| url)); |
| - } else { |
| - RunDistillerCallbackIfDone(); |
| } |
| } |
| void DistillerImpl::OnPageDistillationFinished( |
| + int page_num, |
| const GURL& page_url, |
| scoped_ptr<DistilledPageInfo> distilled_page, |
| bool distillation_successful) { |
| - DCHECK(distillation_in_progress_); |
| DCHECK(distilled_page.get()); |
| - if (!distillation_successful) { |
| - RunDistillerCallbackIfDone(); |
| - } else { |
| - DistilledPageProto* current_page = article_proto_->add_pages(); |
| - // Set the title of the article as the title of the first page. |
| - if (article_proto_->pages_size() == 1) { |
| - article_proto_->set_title(distilled_page->title); |
| - } |
| + DCHECK(started_pages_index_.find(page_num) != started_pages_index_.end()); |
| + if (distillation_successful) { |
| + DistilledPageData* page_data = |
| + GetPageAtIndex(started_pages_index_[page_num]); |
| + DistilledPageProto* current_page = new DistilledPageProto(); |
| + page_data->proto.reset(current_page); |
| + page_data->page_num = page_num; |
| + page_data->title = distilled_page->title; |
| current_page->set_url(page_url.spec()); |
| current_page->set_html(distilled_page->html); |
| @@ -109,57 +150,104 @@ void DistillerImpl::OnPageDistillationFinished( |
| DCHECK_EQ(next_page_url.GetOrigin(), page_url.GetOrigin()); |
| } |
| - processed_urls_.insert(page_url.spec()); |
| - distillation_in_progress_ = false; |
| - int page_number = article_proto_->pages_size(); |
| for (size_t img_num = 0; img_num < distilled_page->image_urls.size(); |
| ++img_num) { |
| std::string image_id = |
| - base::IntToString(page_number) + "_" + base::IntToString(img_num); |
| - FetchImage(current_page, image_id, distilled_page->image_urls[img_num]); |
| + base::IntToString(page_num + 1) + "_" + base::IntToString(img_num); |
| + FetchImage(page_num, image_id, distilled_page->image_urls[img_num]); |
| } |
| - DistillPage(next_page_url); |
| + |
| + AddToDistillationQueue(page_num + 1, next_page_url); |
|
cjhopman
2014/02/15 02:44:15
Nit: why not do this in the `if (next_page_url.is_
shashi
2014/02/15 03:15:36
Done.
|
| + AddPageIfDone(page_num); |
| + DistillNextPage(); |
| + } else { |
| + started_pages_index_.erase(page_num); |
| + RunDistillerCallbackIfDone(); |
| } |
| } |
| -void DistillerImpl::FetchImage(DistilledPageProto* distilled_page_proto, |
| +void DistillerImpl::FetchImage(int page_num, |
| const std::string& image_id, |
| const std::string& item) { |
| + DCHECK(started_pages_index_.find(page_num) != started_pages_index_.end()); |
| + DistilledPageData* page_data = GetPageAtIndex(started_pages_index_[page_num]); |
| DistillerURLFetcher* fetcher = |
| distiller_url_fetcher_factory_.CreateDistillerURLFetcher(); |
| - image_fetchers_.push_back(fetcher); |
| + page_data->image_fetchers_.push_back(fetcher); |
| + |
| fetcher->FetchURL(item, |
| base::Bind(&DistillerImpl::OnFetchImageDone, |
| base::Unretained(this), |
| - base::Unretained(distilled_page_proto), |
| + page_num, |
| base::Unretained(fetcher), |
| image_id)); |
| } |
| -void DistillerImpl::OnFetchImageDone(DistilledPageProto* distilled_page_proto, |
| +void DistillerImpl::OnFetchImageDone(int page_num, |
| DistillerURLFetcher* url_fetcher, |
| const std::string& id, |
| const std::string& response) { |
| - DCHECK_GT(article_proto_->pages_size(), 0); |
| - DCHECK(distilled_page_proto); |
| + DCHECK(started_pages_index_.find(page_num) != started_pages_index_.end()); |
| + DistilledPageData* page_data = GetPageAtIndex(started_pages_index_[page_num]); |
| + DCHECK(page_data->proto); |
| DCHECK(url_fetcher); |
| ScopedVector<DistillerURLFetcher>::iterator fetcher_it = |
| - std::find(image_fetchers_.begin(), image_fetchers_.end(), url_fetcher); |
| + std::find(page_data->image_fetchers_.begin(), |
| + page_data->image_fetchers_.end(), |
| + url_fetcher); |
| - DCHECK(fetcher_it != image_fetchers_.end()); |
| + DCHECK(fetcher_it != page_data->image_fetchers_.end()); |
| // Delete the |url_fetcher| by DeleteSoon since the OnFetchImageDone |
| // callback is invoked by the |url_fetcher|. |
| - image_fetchers_.weak_erase(fetcher_it); |
| + page_data->image_fetchers_.weak_erase(fetcher_it); |
| base::MessageLoop::current()->DeleteSoon(FROM_HERE, url_fetcher); |
| - DistilledPageProto_Image* image = distilled_page_proto->add_image(); |
| + |
| + DistilledPageProto_Image* image = page_data->proto->add_image(); |
| image->set_name(id); |
| image->set_data(response); |
| - RunDistillerCallbackIfDone(); |
| + |
| + AddPageIfDone(page_num); |
| +} |
| + |
| +void DistillerImpl::AddPageIfDone(int page_num) { |
| + DCHECK(started_pages_index_.find(page_num) != started_pages_index_.end()); |
| + DCHECK(finished_pages_index_.find(page_num) == finished_pages_index_.end()); |
| + DistilledPageData* page_data = GetPageAtIndex(started_pages_index_[page_num]); |
| + if (page_data->image_fetchers_.empty()) { |
| + finished_pages_index_[page_num] = started_pages_index_[page_num]; |
| + started_pages_index_.erase(page_num); |
| + RunDistillerCallbackIfDone(); |
| + } |
| } |
| void DistillerImpl::RunDistillerCallbackIfDone() { |
| - if (image_fetchers_.empty() && !distillation_in_progress_) { |
| - distillation_cb_.Run(article_proto_.Pass()); |
| + DCHECK(!distillation_cb_.is_null()); |
| + if (AreAllPagesFinished()) { |
| + bool first_page = true; |
| + scoped_ptr<DistilledArticleProto> article_proto( |
| + new DistilledArticleProto()); |
| + // Stitch the pages back into the article. |
| + for (std::map<int, size_t>::iterator it = finished_pages_index_.begin(); |
| + it != finished_pages_index_.end();) { |
| + DistilledPageData* page_data = GetPageAtIndex(it->second); |
| + *(article_proto->add_pages()) = *(page_data->proto); |
| + |
| + if (first_page) { |
| + article_proto->set_title(page_data->title); |
| + first_page = false; |
| + } |
| + |
| + finished_pages_index_.erase(it++); |
| + } |
| + |
| + pages_.clear(); |
| + DCHECK_LE(static_cast<size_t>(article_proto->pages_size()), |
| + max_pages_in_article_); |
| + |
| + DCHECK(pages_.empty()); |
| + DCHECK(finished_pages_index_.empty()); |
| + distillation_cb_.Run(article_proto.Pass()); |
| + distillation_cb_.Reset(); |
| } |
| } |