Index: components/dom_distiller/core/distiller.h |
diff --git a/components/dom_distiller/core/distiller.h b/components/dom_distiller/core/distiller.h |
index 54907500557dc2f6886a27976e7bec086e59bbe9..a42f87323c7e0479b17a361a99524936fb6dd702 100644 |
--- a/components/dom_distiller/core/distiller.h |
+++ b/components/dom_distiller/core/distiller.h |
@@ -68,36 +68,95 @@ class DistillerImpl : public Distiller { |
virtual void DistillPage(const GURL& url, |
const DistillerCallback& callback) OVERRIDE; |
+ protected: |
+ // Returns the maximum number of pages in an article. |
+ // Overriden by tests to verify the limit on pages in an article. |
+ virtual size_t GetMaxNumPagesInArticle() const; |
cjhopman
2014/02/14 20:53:52
Overriding in tests for something like this is bad
shashi
2014/02/14 23:25:29
Done.
|
+ |
private: |
- void OnFetchImageDone(DistilledPageProto* distilled_page_proto, |
+ // In case of multiple pages, the Distiller maintains state of multiple pages |
+ // as relative page numbers. E.g. if distillation starts at page 2 for a 3 |
+ // page article. The relative page numbers assigned to pages will be [-1,0,1]. |
+ |
+ // Class representing the state of a page under distillation. |
+ struct DistilledPageData { |
+ DistilledPageData(); |
+ virtual ~DistilledPageData(); |
+ // Relative page number of the page. |
+ int page_no; |
cjhopman
2014/02/14 20:53:52
Consider doing `s/page_no/page_number` throughout.
shashi
2014/02/14 23:25:29
Done.
|
+ std::string title; |
+ ScopedVector<DistillerURLFetcher> image_fetchers_; |
+ scoped_ptr<DistilledPageProto> proto; |
+ |
+ private: |
+ DISALLOW_COPY_AND_ASSIGN(DistilledPageData); |
+ }; |
+ |
+ void OnFetchImageDone(DistilledPageData* distilled_page_data, |
cjhopman
2014/02/14 20:53:52
So a bunch of functions are called to distill a pa
shashi
2014/02/14 23:25:29
Change to use page_num through out.
On 2014/02/14
|
DistillerURLFetcher* url_fetcher, |
const std::string& id, |
const std::string& response); |
- void OnPageDistillationFinished(const GURL& page_url, |
+ void OnPageDistillationFinished(int page_no, |
+ const GURL& page_url, |
scoped_ptr<DistilledPageInfo> distilled_page, |
bool distillation_successful); |
- virtual void FetchImage(DistilledPageProto* distilled_page_proto, |
+ virtual void FetchImage(DistilledPageData* distilled_page_data, |
const std::string& image_id, |
const std::string& item); |
- // Distills the page and adds the new page to |article_proto|. |
- void DistillPage(const GURL& url); |
+ // Distills the next page. |
+ void DistillNextPage(); |
+ |
+ // Adds the |url| to |pages_to_be_distilled| if |page_no| is a valid relative |
+ // page number and |url| is valid. Ignores duplicate pages and urls. |
+ void AddToDistillationQueue(int page_no, const GURL& url); |
+ |
+ // Check if |page_no| is a valid relative page number, i.e. page with |
+ // |page_no| is either under distillation or has already completed |
+ // distillation. |
+ bool IsPageNumberInUse(int page_no) const; |
+ |
+ bool NoPendingPages() const; |
cjhopman
2014/02/14 20:53:52
This name doesn't really imply a question... how a
shashi
2014/02/14 23:25:29
Done.
|
+ |
+ // Total number of pages in the article that the distiller knows of, this |
+ // includes pages that are pending distillation. |
+ size_t TotalPageCount() const; |
// Runs |distillation_cb_| if all distillation callbacks and image fetches are |
// complete. |
void RunDistillerCallbackIfDone(); |
+ // Checks if page |distilled_page_data| has finished distillation, including |
+ // all image fetches. |
+ void CheckAndAddPageIfDone(DistilledPageData* distilled_page_data); |
cjhopman
2014/02/14 20:53:52
Does this mean `(check and add) if done` or `check
shashi
2014/02/14 23:25:29
Done.
|
+ |
const DistillerURLFetcherFactory& distiller_url_fetcher_factory_; |
scoped_ptr<PageDistiller> page_distiller_; |
DistillerCallback distillation_cb_; |
- ScopedVector<DistillerURLFetcher> image_fetchers_; |
- scoped_ptr<DistilledArticleProto> article_proto_; |
- bool distillation_in_progress_; |
- // Set to keep track of which urls are already seen by the distiller. |
- base::hash_set<std::string> processed_urls_; |
+ // Set of pages which have finished distillation. |
+ // |finished_pages_index_| maintains the mapping from page number to the |
+ // index in |finished_pages_|. |
+ ScopedVector<DistilledPageData> finished_pages_; |
+ |
+ // Maps page number to the index in |finished_pages_|. |
+ std::map<int, size_t> finished_pages_index_; |
+ |
+ // The list of pages that are still waiting for distillation to start. |
+ // This is a map, to make distiller prefer distilling lower page numbers |
+ // first. |
+ std::map<int, GURL> waiting_pages_; |
+ |
+ // The page number of pages that are either waiting for distillation or image |
+ // fetches. If a page is |started_pages_| that means it is still waiting |
+ // for an action (distillation or image fetch) to finish. |
cjhopman
2014/02/14 20:53:52
These sentences seem redundant. Remove one of them
shashi
2014/02/14 23:25:29
Done.
|
+ base::hash_set<int> started_pages_; |
+ |
+ // Set to keep track of which urls are already seen by the distiller. Used to |
+ // prevent distiller from distilling the same url twice. |
+ base::hash_set<std::string> seen_urls_; |
DISALLOW_COPY_AND_ASSIGN(DistillerImpl); |
}; |