Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(387)

Unified Diff: components/dom_distiller/core/distiller.cc

Issue 130543003: Store page no for distilled pages undergoing distillation. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: Address comments. Created 6 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: components/dom_distiller/core/distiller.cc
diff --git a/components/dom_distiller/core/distiller.cc b/components/dom_distiller/core/distiller.cc
index 8e155ebc2788356c16cdd2fcfde01df6df3a457b..18d8693ea9569411890abe394647691312df7556 100644
--- a/components/dom_distiller/core/distiller.cc
+++ b/components/dom_distiller/core/distiller.cc
@@ -21,7 +21,7 @@
namespace {
// Maximum number of distilled pages in an article.
-const int kMaxPagesInArticle = 32;
+const size_t kMaxPagesInArticle = 32;
}
namespace dom_distiller {
@@ -41,64 +41,105 @@ scoped_ptr<Distiller> DistillerFactoryImpl::CreateDistiller() {
return distiller.PassAs<Distiller>();
}
+DistillerImpl::DistilledPageData::DistilledPageData() {}
+
+DistillerImpl::DistilledPageData::~DistilledPageData() {}
+
DistillerImpl::DistillerImpl(
const DistillerPageFactory& distiller_page_factory,
const DistillerURLFetcherFactory& distiller_url_fetcher_factory)
: distiller_url_fetcher_factory_(distiller_url_fetcher_factory),
- distillation_in_progress_(false) {
+ max_pages_in_article_(kMaxPagesInArticle) {
page_distiller_.reset(new PageDistiller(distiller_page_factory));
}
-DistillerImpl::~DistillerImpl() {
- DCHECK(image_fetchers_.empty());
- DCHECK(!distillation_in_progress_);
-}
+DistillerImpl::~DistillerImpl() { DCHECK(AreAllPagesFinished()); }
void DistillerImpl::Init() {
- DCHECK(!distillation_in_progress_);
+ DCHECK(AreAllPagesFinished());
page_distiller_->Init();
- article_proto_.reset(new DistilledArticleProto());
+}
+
+void DistillerImpl::SetMaxNumPagesInArticle(size_t max_num_pages) {
+ max_pages_in_article_ = max_num_pages;
+}
+
+bool DistillerImpl::AreAllPagesFinished() const {
+ return started_pages_index_.empty() && waiting_pages_.empty();
+}
+
+size_t DistillerImpl::TotalPageCount() const {
+ return waiting_pages_.size() + started_pages_index_.size() +
+ finished_pages_index_.size();
+}
+
+void DistillerImpl::AddToDistillationQueue(int page_num, const GURL& url) {
+ if (!IsPageNumberInUse(page_num) && url.is_valid() &&
+ TotalPageCount() < max_pages_in_article_ &&
+ seen_urls_.find(url.spec()) == seen_urls_.end()) {
+ waiting_pages_[page_num] = url;
+ }
+}
+
+bool DistillerImpl::IsPageNumberInUse(int page_num) const {
+ return waiting_pages_.find(page_num) != waiting_pages_.end() ||
+ started_pages_index_.find(page_num) != started_pages_index_.end() ||
+ finished_pages_index_.find(page_num) != finished_pages_index_.end();
+}
+
+DistillerImpl::DistilledPageData* DistillerImpl::GetPageAtIndex(size_t index)
+ const {
+ DCHECK_LT(index, pages_.size());
+ DistilledPageData* page_data = pages_[index];
+ DCHECK(page_data);
+ return page_data;
}
void DistillerImpl::DistillPage(const GURL& url,
const DistillerCallback& distillation_cb) {
- DCHECK(!distillation_in_progress_);
+ DCHECK(AreAllPagesFinished());
distillation_cb_ = distillation_cb;
- DistillPage(url);
+
+ AddToDistillationQueue(0, url);
+ DistillNextPage();
}
-void DistillerImpl::DistillPage(const GURL& url) {
- DCHECK(!distillation_in_progress_);
- if (url.is_valid() && article_proto_->pages_size() < kMaxPagesInArticle &&
- processed_urls_.find(url.spec()) == processed_urls_.end()) {
- distillation_in_progress_ = true;
- // Distill the next page.
+void DistillerImpl::DistillNextPage() {
+ if (!waiting_pages_.empty()) {
+ std::map<int, GURL>::iterator front = waiting_pages_.begin();
+ int page_num = front->first;
+ const GURL url = front->second;
+
+ waiting_pages_.erase(front);
DCHECK(url.is_valid());
- DCHECK_LT(article_proto_->pages_size(), kMaxPagesInArticle);
+ DCHECK(started_pages_index_.find(page_num) == started_pages_index_.end());
+ DCHECK(finished_pages_index_.find(page_num) == finished_pages_index_.end());
+ seen_urls_.insert(url.spec());
+ pages_.push_back(new DistilledPageData());
+ started_pages_index_[page_num] = pages_.size() - 1;
page_distiller_->DistillPage(
url,
base::Bind(&DistillerImpl::OnPageDistillationFinished,
base::Unretained(this),
+ page_num,
url));
- } else {
- RunDistillerCallbackIfDone();
}
}
void DistillerImpl::OnPageDistillationFinished(
+ int page_num,
const GURL& page_url,
scoped_ptr<DistilledPageInfo> distilled_page,
bool distillation_successful) {
- DCHECK(distillation_in_progress_);
DCHECK(distilled_page.get());
- if (!distillation_successful) {
- RunDistillerCallbackIfDone();
- } else {
- DistilledPageProto* current_page = article_proto_->add_pages();
- // Set the title of the article as the title of the first page.
- if (article_proto_->pages_size() == 1) {
- article_proto_->set_title(distilled_page->title);
- }
+ DCHECK(started_pages_index_.find(page_num) != started_pages_index_.end());
+ if (distillation_successful) {
+ DistilledPageData* page_data =
+ GetPageAtIndex(started_pages_index_[page_num]);
+ DistilledPageProto* current_page = new DistilledPageProto();
+ page_data->proto.reset(current_page);
+ page_data->page_num = page_num;
+ page_data->title = distilled_page->title;
current_page->set_url(page_url.spec());
current_page->set_html(distilled_page->html);
@@ -109,57 +150,104 @@ void DistillerImpl::OnPageDistillationFinished(
DCHECK_EQ(next_page_url.GetOrigin(), page_url.GetOrigin());
}
- processed_urls_.insert(page_url.spec());
- distillation_in_progress_ = false;
- int page_number = article_proto_->pages_size();
for (size_t img_num = 0; img_num < distilled_page->image_urls.size();
++img_num) {
std::string image_id =
- base::IntToString(page_number) + "_" + base::IntToString(img_num);
- FetchImage(current_page, image_id, distilled_page->image_urls[img_num]);
+ base::IntToString(page_num + 1) + "_" + base::IntToString(img_num);
+ FetchImage(page_num, image_id, distilled_page->image_urls[img_num]);
}
- DistillPage(next_page_url);
+
+ AddToDistillationQueue(page_num + 1, next_page_url);
cjhopman 2014/02/15 02:44:15 Nit: why not do this in the `if (next_page_url.is_
shashi 2014/02/15 03:15:36 Done.
+ AddPageIfDone(page_num);
+ DistillNextPage();
+ } else {
+ started_pages_index_.erase(page_num);
+ RunDistillerCallbackIfDone();
}
}
-void DistillerImpl::FetchImage(DistilledPageProto* distilled_page_proto,
+void DistillerImpl::FetchImage(int page_num,
const std::string& image_id,
const std::string& item) {
+ DCHECK(started_pages_index_.find(page_num) != started_pages_index_.end());
+ DistilledPageData* page_data = GetPageAtIndex(started_pages_index_[page_num]);
DistillerURLFetcher* fetcher =
distiller_url_fetcher_factory_.CreateDistillerURLFetcher();
- image_fetchers_.push_back(fetcher);
+ page_data->image_fetchers_.push_back(fetcher);
+
fetcher->FetchURL(item,
base::Bind(&DistillerImpl::OnFetchImageDone,
base::Unretained(this),
- base::Unretained(distilled_page_proto),
+ page_num,
base::Unretained(fetcher),
image_id));
}
-void DistillerImpl::OnFetchImageDone(DistilledPageProto* distilled_page_proto,
+void DistillerImpl::OnFetchImageDone(int page_num,
DistillerURLFetcher* url_fetcher,
const std::string& id,
const std::string& response) {
- DCHECK_GT(article_proto_->pages_size(), 0);
- DCHECK(distilled_page_proto);
+ DCHECK(started_pages_index_.find(page_num) != started_pages_index_.end());
+ DistilledPageData* page_data = GetPageAtIndex(started_pages_index_[page_num]);
+ DCHECK(page_data->proto);
DCHECK(url_fetcher);
ScopedVector<DistillerURLFetcher>::iterator fetcher_it =
- std::find(image_fetchers_.begin(), image_fetchers_.end(), url_fetcher);
+ std::find(page_data->image_fetchers_.begin(),
+ page_data->image_fetchers_.end(),
+ url_fetcher);
- DCHECK(fetcher_it != image_fetchers_.end());
+ DCHECK(fetcher_it != page_data->image_fetchers_.end());
// Delete the |url_fetcher| by DeleteSoon since the OnFetchImageDone
// callback is invoked by the |url_fetcher|.
- image_fetchers_.weak_erase(fetcher_it);
+ page_data->image_fetchers_.weak_erase(fetcher_it);
base::MessageLoop::current()->DeleteSoon(FROM_HERE, url_fetcher);
- DistilledPageProto_Image* image = distilled_page_proto->add_image();
+
+ DistilledPageProto_Image* image = page_data->proto->add_image();
image->set_name(id);
image->set_data(response);
- RunDistillerCallbackIfDone();
+
+ AddPageIfDone(page_num);
+}
+
+void DistillerImpl::AddPageIfDone(int page_num) {
+ DCHECK(started_pages_index_.find(page_num) != started_pages_index_.end());
+ DCHECK(finished_pages_index_.find(page_num) == finished_pages_index_.end());
+ DistilledPageData* page_data = GetPageAtIndex(started_pages_index_[page_num]);
+ if (page_data->image_fetchers_.empty()) {
+ finished_pages_index_[page_num] = started_pages_index_[page_num];
+ started_pages_index_.erase(page_num);
+ RunDistillerCallbackIfDone();
+ }
}
void DistillerImpl::RunDistillerCallbackIfDone() {
- if (image_fetchers_.empty() && !distillation_in_progress_) {
- distillation_cb_.Run(article_proto_.Pass());
+ DCHECK(!distillation_cb_.is_null());
+ if (AreAllPagesFinished()) {
+ bool first_page = true;
+ scoped_ptr<DistilledArticleProto> article_proto(
+ new DistilledArticleProto());
+ // Stitch the pages back into the article.
+ for (std::map<int, size_t>::iterator it = finished_pages_index_.begin();
+ it != finished_pages_index_.end();) {
+ DistilledPageData* page_data = GetPageAtIndex(it->second);
+ *(article_proto->add_pages()) = *(page_data->proto);
+
+ if (first_page) {
+ article_proto->set_title(page_data->title);
+ first_page = false;
+ }
+
+ finished_pages_index_.erase(it++);
+ }
+
+ pages_.clear();
+ DCHECK_LE(static_cast<size_t>(article_proto->pages_size()),
+ max_pages_in_article_);
+
+ DCHECK(pages_.empty());
+ DCHECK(finished_pages_index_.empty());
+ distillation_cb_.Run(article_proto.Pass());
+ distillation_cb_.Reset();
}
}

Powered by Google App Engine
This is Rietveld 408576698