Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 // Copyright 2013 The Chromium Authors. All rights reserved. | 1 // Copyright 2013 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #ifndef COMPONENTS_DOM_DISTILLER_CORE_DISTILLER_H_ | 5 #ifndef COMPONENTS_DOM_DISTILLER_CORE_DISTILLER_H_ |
| 6 #define COMPONENTS_DOM_DISTILLER_CORE_DISTILLER_H_ | 6 #define COMPONENTS_DOM_DISTILLER_CORE_DISTILLER_H_ |
| 7 | 7 |
| 8 #include <string> | 8 #include <string> |
| 9 | 9 |
| 10 #include "base/callback.h" | 10 #include "base/callback.h" |
| (...skipping 50 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 61 const DistillerURLFetcherFactory& distiller_url_fetcher_factory); | 61 const DistillerURLFetcherFactory& distiller_url_fetcher_factory); |
| 62 virtual ~DistillerImpl(); | 62 virtual ~DistillerImpl(); |
| 63 | 63 |
| 64 // Creates an execution context. This must be called once before any calls are | 64 // Creates an execution context. This must be called once before any calls are |
| 65 // made to distill the page. | 65 // made to distill the page. |
| 66 virtual void Init(); | 66 virtual void Init(); |
| 67 | 67 |
| 68 virtual void DistillPage(const GURL& url, | 68 virtual void DistillPage(const GURL& url, |
| 69 const DistillerCallback& callback) OVERRIDE; | 69 const DistillerCallback& callback) OVERRIDE; |
| 70 | 70 |
| 71 protected: | |
| 72 // Returns the maximum number of pages in an article. | |
| 73 // Overriden by tests to verify the limit on pages in an article. | |
| 74 virtual size_t GetMaxNumPagesInArticle() const; | |
|
cjhopman
2014/02/14 20:53:52
Overriding in tests for something like this is bad
shashi
2014/02/14 23:25:29
Done.
| |
| 75 | |
| 71 private: | 76 private: |
| 72 void OnFetchImageDone(DistilledPageProto* distilled_page_proto, | 77 // In case of multiple pages, the Distiller maintains state of multiple pages |
| 78 // as relative page numbers. E.g. if distillation starts at page 2 for a 3 | |
| 79 // page article. The relative page numbers assigned to pages will be [-1,0,1]. | |
| 80 | |
| 81 // Class representing the state of a page under distillation. | |
| 82 struct DistilledPageData { | |
| 83 DistilledPageData(); | |
| 84 virtual ~DistilledPageData(); | |
| 85 // Relative page number of the page. | |
| 86 int page_no; | |
|
cjhopman
2014/02/14 20:53:52
Consider doing `s/page_no/page_number` throughout.
shashi
2014/02/14 23:25:29
Done.
| |
| 87 std::string title; | |
| 88 ScopedVector<DistillerURLFetcher> image_fetchers_; | |
| 89 scoped_ptr<DistilledPageProto> proto; | |
| 90 | |
| 91 private: | |
| 92 DISALLOW_COPY_AND_ASSIGN(DistilledPageData); | |
| 93 }; | |
| 94 | |
| 95 void OnFetchImageDone(DistilledPageData* distilled_page_data, | |
|
cjhopman
2014/02/14 20:53:52
So a bunch of functions are called to distill a pa
shashi
2014/02/14 23:25:29
Change to use page_num through out.
On 2014/02/14
| |
| 73 DistillerURLFetcher* url_fetcher, | 96 DistillerURLFetcher* url_fetcher, |
| 74 const std::string& id, | 97 const std::string& id, |
| 75 const std::string& response); | 98 const std::string& response); |
| 76 | 99 |
| 77 void OnPageDistillationFinished(const GURL& page_url, | 100 void OnPageDistillationFinished(int page_no, |
| 101 const GURL& page_url, | |
| 78 scoped_ptr<DistilledPageInfo> distilled_page, | 102 scoped_ptr<DistilledPageInfo> distilled_page, |
| 79 bool distillation_successful); | 103 bool distillation_successful); |
| 80 | 104 |
| 81 virtual void FetchImage(DistilledPageProto* distilled_page_proto, | 105 virtual void FetchImage(DistilledPageData* distilled_page_data, |
| 82 const std::string& image_id, | 106 const std::string& image_id, |
| 83 const std::string& item); | 107 const std::string& item); |
| 84 | 108 |
| 85 // Distills the page and adds the new page to |article_proto|. | 109 // Distills the next page. |
| 86 void DistillPage(const GURL& url); | 110 void DistillNextPage(); |
| 111 | |
| 112 // Adds the |url| to |pages_to_be_distilled| if |page_no| is a valid relative | |
| 113 // page number and |url| is valid. Ignores duplicate pages and urls. | |
| 114 void AddToDistillationQueue(int page_no, const GURL& url); | |
| 115 | |
| 116 // Check if |page_no| is a valid relative page number, i.e. page with | |
| 117 // |page_no| is either under distillation or has already completed | |
| 118 // distillation. | |
| 119 bool IsPageNumberInUse(int page_no) const; | |
| 120 | |
| 121 bool NoPendingPages() const; | |
|
cjhopman
2014/02/14 20:53:52
This name doesn't really imply a question... how a
shashi
2014/02/14 23:25:29
Done.
| |
| 122 | |
| 123 // Total number of pages in the article that the distiller knows of, this | |
| 124 // includes pages that are pending distillation. | |
| 125 size_t TotalPageCount() const; | |
| 87 | 126 |
| 88 // Runs |distillation_cb_| if all distillation callbacks and image fetches are | 127 // Runs |distillation_cb_| if all distillation callbacks and image fetches are |
| 89 // complete. | 128 // complete. |
| 90 void RunDistillerCallbackIfDone(); | 129 void RunDistillerCallbackIfDone(); |
| 91 | 130 |
| 131 // Checks if page |distilled_page_data| has finished distillation, including | |
| 132 // all image fetches. | |
| 133 void CheckAndAddPageIfDone(DistilledPageData* distilled_page_data); | |
|
cjhopman
2014/02/14 20:53:52
Does this mean `(check and add) if done` or `check
shashi
2014/02/14 23:25:29
Done.
| |
| 134 | |
| 92 const DistillerURLFetcherFactory& distiller_url_fetcher_factory_; | 135 const DistillerURLFetcherFactory& distiller_url_fetcher_factory_; |
| 93 scoped_ptr<PageDistiller> page_distiller_; | 136 scoped_ptr<PageDistiller> page_distiller_; |
| 94 DistillerCallback distillation_cb_; | 137 DistillerCallback distillation_cb_; |
| 95 | 138 |
| 96 ScopedVector<DistillerURLFetcher> image_fetchers_; | 139 // Set of pages which have finished distillation. |
| 97 scoped_ptr<DistilledArticleProto> article_proto_; | 140 // |finished_pages_index_| maintains the mapping from page number to the |
| 98 bool distillation_in_progress_; | 141 // index in |finished_pages_|. |
| 99 // Set to keep track of which urls are already seen by the distiller. | 142 ScopedVector<DistilledPageData> finished_pages_; |
| 100 base::hash_set<std::string> processed_urls_; | 143 |
| 144 // Maps page number to the index in |finished_pages_|. | |
| 145 std::map<int, size_t> finished_pages_index_; | |
| 146 | |
| 147 // The list of pages that are still waiting for distillation to start. | |
| 148 // This is a map, to make distiller prefer distilling lower page numbers | |
| 149 // first. | |
| 150 std::map<int, GURL> waiting_pages_; | |
| 151 | |
| 152 // The page number of pages that are either waiting for distillation or image | |
| 153 // fetches. If a page is |started_pages_| that means it is still waiting | |
| 154 // for an action (distillation or image fetch) to finish. | |
|
cjhopman
2014/02/14 20:53:52
These sentences seem redundant. Remove one of them
shashi
2014/02/14 23:25:29
Done.
| |
| 155 base::hash_set<int> started_pages_; | |
| 156 | |
| 157 // Set to keep track of which urls are already seen by the distiller. Used to | |
| 158 // prevent distiller from distilling the same url twice. | |
| 159 base::hash_set<std::string> seen_urls_; | |
| 101 | 160 |
| 102 DISALLOW_COPY_AND_ASSIGN(DistillerImpl); | 161 DISALLOW_COPY_AND_ASSIGN(DistillerImpl); |
| 103 }; | 162 }; |
| 104 | 163 |
| 105 } // namespace dom_distiller | 164 } // namespace dom_distiller |
| 106 | 165 |
| 107 #endif // COMPONENTS_DOM_DISTILLER_CORE_DISTILLER_H_ | 166 #endif // COMPONENTS_DOM_DISTILLER_CORE_DISTILLER_H_ |
| OLD | NEW |