Index: components/dom_distiller/core/distiller.cc |
diff --git a/components/dom_distiller/core/distiller.cc b/components/dom_distiller/core/distiller.cc |
index 8e155ebc2788356c16cdd2fcfde01df6df3a457b..6e1edb026c68c3313d3eac83c02d2d9bfd6d420d 100644 |
--- a/components/dom_distiller/core/distiller.cc |
+++ b/components/dom_distiller/core/distiller.cc |
@@ -21,7 +21,7 @@ |
namespace { |
// Maximum number of distilled pages in an article. |
-const int kMaxPagesInArticle = 32; |
+const size_t kMaxPagesInArticle = 32; |
} |
namespace dom_distiller { |
@@ -41,64 +41,91 @@ scoped_ptr<Distiller> DistillerFactoryImpl::CreateDistiller() { |
return distiller.PassAs<Distiller>(); |
} |
+DistillerImpl::DistilledPageData::DistilledPageData() {} |
+ |
+DistillerImpl::DistilledPageData::~DistilledPageData() {} |
+ |
DistillerImpl::DistillerImpl( |
const DistillerPageFactory& distiller_page_factory, |
const DistillerURLFetcherFactory& distiller_url_fetcher_factory) |
- : distiller_url_fetcher_factory_(distiller_url_fetcher_factory), |
- distillation_in_progress_(false) { |
+ : distiller_url_fetcher_factory_(distiller_url_fetcher_factory) { |
page_distiller_.reset(new PageDistiller(distiller_page_factory)); |
} |
-DistillerImpl::~DistillerImpl() { |
- DCHECK(image_fetchers_.empty()); |
- DCHECK(!distillation_in_progress_); |
-} |
+DistillerImpl::~DistillerImpl() { DCHECK(NoPendingPages()); } |
void DistillerImpl::Init() { |
- DCHECK(!distillation_in_progress_); |
+ DCHECK(NoPendingPages()); |
page_distiller_->Init(); |
- article_proto_.reset(new DistilledArticleProto()); |
+} |
+ |
+size_t DistillerImpl::GetMaxNumPagesInArticle() const { |
+ return kMaxPagesInArticle; |
+} |
+ |
+bool DistillerImpl::NoPendingPages() const { |
+ return started_pages_.empty() && waiting_pages_.empty(); |
+} |
+ |
+size_t DistillerImpl::TotalPageCount() const { |
+ return waiting_pages_.size() + started_pages_.size() + finished_pages_.size(); |
+} |
+ |
+void DistillerImpl::AddToDistillationQueue(int page_no, const GURL& url) { |
+ if (!IsPageNumberInUse(page_no) && url.is_valid() && |
+ TotalPageCount() < GetMaxNumPagesInArticle() && |
+ seen_urls_.find(url.spec()) == seen_urls_.end()) { |
+ waiting_pages_[page_no] = url; |
+ } |
+} |
+ |
+bool DistillerImpl::IsPageNumberInUse(int page_no) const { |
+ return waiting_pages_.find(page_no) != waiting_pages_.end() || |
+ started_pages_.find(page_no) != started_pages_.end() || |
+ finished_pages_index_.find(page_no) != finished_pages_index_.end(); |
} |
void DistillerImpl::DistillPage(const GURL& url, |
const DistillerCallback& distillation_cb) { |
- DCHECK(!distillation_in_progress_); |
+ DCHECK(NoPendingPages()); |
distillation_cb_ = distillation_cb; |
- DistillPage(url); |
+ |
+ AddToDistillationQueue(0, url); |
+ DistillNextPage(); |
} |
-void DistillerImpl::DistillPage(const GURL& url) { |
- DCHECK(!distillation_in_progress_); |
- if (url.is_valid() && article_proto_->pages_size() < kMaxPagesInArticle && |
- processed_urls_.find(url.spec()) == processed_urls_.end()) { |
- distillation_in_progress_ = true; |
- // Distill the next page. |
+void DistillerImpl::DistillNextPage() { |
+ if (!waiting_pages_.empty()) { |
+ std::map<int, GURL>::iterator front = waiting_pages_.begin(); |
+ int page_no = front->first; |
+ const GURL url = front->second; |
+ |
+ waiting_pages_.erase(front); |
DCHECK(url.is_valid()); |
- DCHECK_LT(article_proto_->pages_size(), kMaxPagesInArticle); |
+ DCHECK(started_pages_.find(page_no) == started_pages_.end()); |
+ started_pages_.insert(page_no); |
page_distiller_->DistillPage( |
url, |
base::Bind(&DistillerImpl::OnPageDistillationFinished, |
base::Unretained(this), |
+ page_no, |
url)); |
- } else { |
- RunDistillerCallbackIfDone(); |
} |
} |
void DistillerImpl::OnPageDistillationFinished( |
+ int page_no, |
const GURL& page_url, |
scoped_ptr<DistilledPageInfo> distilled_page, |
bool distillation_successful) { |
- DCHECK(distillation_in_progress_); |
DCHECK(distilled_page.get()); |
- if (!distillation_successful) { |
- RunDistillerCallbackIfDone(); |
- } else { |
- DistilledPageProto* current_page = article_proto_->add_pages(); |
- // Set the title of the article as the title of the first page. |
- if (article_proto_->pages_size() == 1) { |
- article_proto_->set_title(distilled_page->title); |
- } |
+ DCHECK(IsPageNumberInUse(page_no)); |
cjhopman
2014/02/14 20:53:52
This could be more specific and check that page_no
shashi
2014/02/14 23:25:29
Done.
|
+ if (distillation_successful) { |
+ DistilledPageData* page_data = new DistilledPageData(); |
+ DistilledPageProto* current_page = new DistilledPageProto(); |
+ page_data->proto.reset(current_page); |
+ page_data->page_no = page_no; |
+ page_data->title = distilled_page->title; |
current_page->set_url(page_url.spec()); |
current_page->set_html(distilled_page->html); |
@@ -109,57 +136,106 @@ void DistillerImpl::OnPageDistillationFinished( |
DCHECK_EQ(next_page_url.GetOrigin(), page_url.GetOrigin()); |
} |
- processed_urls_.insert(page_url.spec()); |
- distillation_in_progress_ = false; |
- int page_number = article_proto_->pages_size(); |
+ seen_urls_.insert(page_url.spec()); |
cjhopman
2014/02/14 20:53:52
I think this should be done in ::AddToDistillation
shashi
2014/02/14 23:25:29
Done.
|
for (size_t img_num = 0; img_num < distilled_page->image_urls.size(); |
++img_num) { |
std::string image_id = |
- base::IntToString(page_number) + "_" + base::IntToString(img_num); |
- FetchImage(current_page, image_id, distilled_page->image_urls[img_num]); |
+ base::IntToString(page_no + 1) + "_" + base::IntToString(img_num); |
+ FetchImage(page_data, image_id, distilled_page->image_urls[img_num]); |
} |
- DistillPage(next_page_url); |
+ |
+ AddToDistillationQueue(page_no + 1, next_page_url); |
+ CheckAndAddPageIfDone(page_data); |
+ DistillNextPage(); |
+ } else { |
+ started_pages_.erase(page_no); |
+ RunDistillerCallbackIfDone(); |
} |
} |
-void DistillerImpl::FetchImage(DistilledPageProto* distilled_page_proto, |
+void DistillerImpl::FetchImage(DistilledPageData* distilled_page_data, |
const std::string& image_id, |
const std::string& item) { |
+ DCHECK(distilled_page_data); |
DistillerURLFetcher* fetcher = |
distiller_url_fetcher_factory_.CreateDistillerURLFetcher(); |
- image_fetchers_.push_back(fetcher); |
+ distilled_page_data->image_fetchers_.push_back(fetcher); |
+ |
fetcher->FetchURL(item, |
base::Bind(&DistillerImpl::OnFetchImageDone, |
base::Unretained(this), |
- base::Unretained(distilled_page_proto), |
+ base::Unretained(distilled_page_data), |
base::Unretained(fetcher), |
image_id)); |
} |
-void DistillerImpl::OnFetchImageDone(DistilledPageProto* distilled_page_proto, |
+void DistillerImpl::OnFetchImageDone(DistilledPageData* distilled_page_data, |
DistillerURLFetcher* url_fetcher, |
const std::string& id, |
const std::string& response) { |
- DCHECK_GT(article_proto_->pages_size(), 0); |
- DCHECK(distilled_page_proto); |
+ DCHECK(distilled_page_data); |
+ DCHECK(distilled_page_data->proto); |
DCHECK(url_fetcher); |
ScopedVector<DistillerURLFetcher>::iterator fetcher_it = |
- std::find(image_fetchers_.begin(), image_fetchers_.end(), url_fetcher); |
+ std::find(distilled_page_data->image_fetchers_.begin(), |
+ distilled_page_data->image_fetchers_.end(), |
+ url_fetcher); |
- DCHECK(fetcher_it != image_fetchers_.end()); |
+ DCHECK(fetcher_it != distilled_page_data->image_fetchers_.end()); |
// Delete the |url_fetcher| by DeleteSoon since the OnFetchImageDone |
// callback is invoked by the |url_fetcher|. |
- image_fetchers_.weak_erase(fetcher_it); |
+ distilled_page_data->image_fetchers_.weak_erase(fetcher_it); |
base::MessageLoop::current()->DeleteSoon(FROM_HERE, url_fetcher); |
- DistilledPageProto_Image* image = distilled_page_proto->add_image(); |
+ |
+ DistilledPageProto_Image* image = distilled_page_data->proto->add_image(); |
image->set_name(id); |
image->set_data(response); |
- RunDistillerCallbackIfDone(); |
+ |
+ CheckAndAddPageIfDone(distilled_page_data); |
+} |
+ |
+void DistillerImpl::CheckAndAddPageIfDone( |
+ DistilledPageData* distilled_page_data) { |
+ DCHECK(distilled_page_data); |
+ int page_no = distilled_page_data->page_no; |
+ DCHECK(started_pages_.find(page_no) != started_pages_.end()); |
+ DCHECK(finished_pages_index_.find(page_no) == finished_pages_index_.end()); |
+ if (distilled_page_data->image_fetchers_.empty()) { |
+ started_pages_.erase(page_no); |
+ finished_pages_.push_back(distilled_page_data); |
+ finished_pages_index_[page_no] = finished_pages_.size() - 1; |
+ RunDistillerCallbackIfDone(); |
+ } |
} |
void DistillerImpl::RunDistillerCallbackIfDone() { |
- if (image_fetchers_.empty() && !distillation_in_progress_) { |
- distillation_cb_.Run(article_proto_.Pass()); |
+ DCHECK(!distillation_cb_.is_null()); |
+ if (NoPendingPages()) { |
+ bool first_page = true; |
+ scoped_ptr<DistilledArticleProto> article_proto( |
+ new DistilledArticleProto()); |
+ // Stitch the pages back into the article. |
+ for (std::map<int, size_t>::iterator it = finished_pages_index_.begin(); |
+ it != finished_pages_index_.end();) { |
+ const DistilledPageData* page_data = finished_pages_[it->second]; |
+ *(article_proto->add_pages()) = *(page_data->proto); |
+ |
+ if (first_page) { |
+ article_proto->set_title(page_data->title); |
+ first_page = false; |
+ } |
+ |
+ finished_pages_index_.erase(it++); |
+ } |
+ |
+ finished_pages_.clear(); |
+ DCHECK_LE(static_cast<size_t>(article_proto->pages_size()), |
+ GetMaxNumPagesInArticle()); |
+ |
+ DCHECK(finished_pages_.empty()); |
+ DCHECK(finished_pages_index_.empty()); |
+ distillation_cb_.Run(article_proto.Pass()); |
+ distillation_cb_.Reset(); |
} |
} |