OLD | NEW |
---|---|
1 // Copyright 2013 The Chromium Authors. All rights reserved. | 1 // Copyright 2013 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #ifndef COMPONENTS_DOM_DISTILLER_CORE_DISTILLER_H_ | 5 #ifndef COMPONENTS_DOM_DISTILLER_CORE_DISTILLER_H_ |
6 #define COMPONENTS_DOM_DISTILLER_CORE_DISTILLER_H_ | 6 #define COMPONENTS_DOM_DISTILLER_CORE_DISTILLER_H_ |
7 | 7 |
8 #include <string> | 8 #include <string> |
9 | 9 |
10 #include "base/callback.h" | 10 #include "base/callback.h" |
(...skipping 50 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
61 const DistillerURLFetcherFactory& distiller_url_fetcher_factory); | 61 const DistillerURLFetcherFactory& distiller_url_fetcher_factory); |
62 virtual ~DistillerImpl(); | 62 virtual ~DistillerImpl(); |
63 | 63 |
64 // Creates an execution context. This must be called once before any calls are | 64 // Creates an execution context. This must be called once before any calls are |
65 // made to distill the page. | 65 // made to distill the page. |
66 virtual void Init(); | 66 virtual void Init(); |
67 | 67 |
68 virtual void DistillPage(const GURL& url, | 68 virtual void DistillPage(const GURL& url, |
69 const DistillerCallback& callback) OVERRIDE; | 69 const DistillerCallback& callback) OVERRIDE; |
70 | 70 |
71 protected: | |
72 // Returns the maximum number of pages in an article. | |
73 // Overriden by tests to verify the limit on pages in an article. | |
74 virtual size_t GetMaxNumPagesInArticle() const; | |
cjhopman
2014/02/14 20:53:52
Overriding in tests for something like this is bad
shashi
2014/02/14 23:25:29
Done.
| |
75 | |
71 private: | 76 private: |
72 void OnFetchImageDone(DistilledPageProto* distilled_page_proto, | 77 // In case of multiple pages, the Distiller maintains state of multiple pages |
78 // as relative page numbers. E.g. if distillation starts at page 2 for a 3 | |
79 // page article. The relative page numbers assigned to pages will be [-1,0,1]. | |
80 | |
81 // Class representing the state of a page under distillation. | |
82 struct DistilledPageData { | |
83 DistilledPageData(); | |
84 virtual ~DistilledPageData(); | |
85 // Relative page number of the page. | |
86 int page_no; | |
cjhopman
2014/02/14 20:53:52
Consider doing `s/page_no/page_number` throughout.
shashi
2014/02/14 23:25:29
Done.
| |
87 std::string title; | |
88 ScopedVector<DistillerURLFetcher> image_fetchers_; | |
89 scoped_ptr<DistilledPageProto> proto; | |
90 | |
91 private: | |
92 DISALLOW_COPY_AND_ASSIGN(DistilledPageData); | |
93 }; | |
94 | |
95 void OnFetchImageDone(DistilledPageData* distilled_page_data, | |
cjhopman
2014/02/14 20:53:52
So a bunch of functions are called to distill a pa
shashi
2014/02/14 23:25:29
Change to use page_num through out.
On 2014/02/14
| |
73 DistillerURLFetcher* url_fetcher, | 96 DistillerURLFetcher* url_fetcher, |
74 const std::string& id, | 97 const std::string& id, |
75 const std::string& response); | 98 const std::string& response); |
76 | 99 |
77 void OnPageDistillationFinished(const GURL& page_url, | 100 void OnPageDistillationFinished(int page_no, |
101 const GURL& page_url, | |
78 scoped_ptr<DistilledPageInfo> distilled_page, | 102 scoped_ptr<DistilledPageInfo> distilled_page, |
79 bool distillation_successful); | 103 bool distillation_successful); |
80 | 104 |
81 virtual void FetchImage(DistilledPageProto* distilled_page_proto, | 105 virtual void FetchImage(DistilledPageData* distilled_page_data, |
82 const std::string& image_id, | 106 const std::string& image_id, |
83 const std::string& item); | 107 const std::string& item); |
84 | 108 |
85 // Distills the page and adds the new page to |article_proto|. | 109 // Distills the next page. |
86 void DistillPage(const GURL& url); | 110 void DistillNextPage(); |
111 | |
112 // Adds the |url| to |pages_to_be_distilled| if |page_no| is a valid relative | |
113 // page number and |url| is valid. Ignores duplicate pages and urls. | |
114 void AddToDistillationQueue(int page_no, const GURL& url); | |
115 | |
116 // Check if |page_no| is a valid relative page number, i.e. page with | |
117 // |page_no| is either under distillation or has already completed | |
118 // distillation. | |
119 bool IsPageNumberInUse(int page_no) const; | |
120 | |
121 bool NoPendingPages() const; | |
cjhopman
2014/02/14 20:53:52
This name doesn't really imply a question... how a
shashi
2014/02/14 23:25:29
Done.
| |
122 | |
123 // Total number of pages in the article that the distiller knows of, this | |
124 // includes pages that are pending distillation. | |
125 size_t TotalPageCount() const; | |
87 | 126 |
88 // Runs |distillation_cb_| if all distillation callbacks and image fetches are | 127 // Runs |distillation_cb_| if all distillation callbacks and image fetches are |
89 // complete. | 128 // complete. |
90 void RunDistillerCallbackIfDone(); | 129 void RunDistillerCallbackIfDone(); |
91 | 130 |
131 // Checks if page |distilled_page_data| has finished distillation, including | |
132 // all image fetches. | |
133 void CheckAndAddPageIfDone(DistilledPageData* distilled_page_data); | |
cjhopman
2014/02/14 20:53:52
Does this mean `(check and add) if done` or `check
shashi
2014/02/14 23:25:29
Done.
| |
134 | |
92 const DistillerURLFetcherFactory& distiller_url_fetcher_factory_; | 135 const DistillerURLFetcherFactory& distiller_url_fetcher_factory_; |
93 scoped_ptr<PageDistiller> page_distiller_; | 136 scoped_ptr<PageDistiller> page_distiller_; |
94 DistillerCallback distillation_cb_; | 137 DistillerCallback distillation_cb_; |
95 | 138 |
96 ScopedVector<DistillerURLFetcher> image_fetchers_; | 139 // Set of pages which have finished distillation. |
97 scoped_ptr<DistilledArticleProto> article_proto_; | 140 // |finished_pages_index_| maintains the mapping from page number to the |
98 bool distillation_in_progress_; | 141 // index in |finished_pages_|. |
99 // Set to keep track of which urls are already seen by the distiller. | 142 ScopedVector<DistilledPageData> finished_pages_; |
100 base::hash_set<std::string> processed_urls_; | 143 |
144 // Maps page number to the index in |finished_pages_|. | |
145 std::map<int, size_t> finished_pages_index_; | |
146 | |
147 // The list of pages that are still waiting for distillation to start. | |
148 // This is a map, to make distiller prefer distilling lower page numbers | |
149 // first. | |
150 std::map<int, GURL> waiting_pages_; | |
151 | |
152 // The page number of pages that are either waiting for distillation or image | |
153 // fetches. If a page is |started_pages_| that means it is still waiting | |
154 // for an action (distillation or image fetch) to finish. | |
cjhopman
2014/02/14 20:53:52
These sentences seem redundant. Remove one of them
shashi
2014/02/14 23:25:29
Done.
| |
155 base::hash_set<int> started_pages_; | |
156 | |
157 // Set to keep track of which urls are already seen by the distiller. Used to | |
158 // prevent distiller from distilling the same url twice. | |
159 base::hash_set<std::string> seen_urls_; | |
101 | 160 |
102 DISALLOW_COPY_AND_ASSIGN(DistillerImpl); | 161 DISALLOW_COPY_AND_ASSIGN(DistillerImpl); |
103 }; | 162 }; |
104 | 163 |
105 } // namespace dom_distiller | 164 } // namespace dom_distiller |
106 | 165 |
107 #endif // COMPONENTS_DOM_DISTILLER_CORE_DISTILLER_H_ | 166 #endif // COMPONENTS_DOM_DISTILLER_CORE_DISTILLER_H_ |
OLD | NEW |