OLD | NEW |
---|---|
1 // Copyright 2013 The Chromium Authors. All rights reserved. | 1 // Copyright 2013 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #ifndef COMPONENTS_DOM_DISTILLER_CORE_DISTILLER_H_ | 5 #ifndef COMPONENTS_DOM_DISTILLER_CORE_DISTILLER_H_ |
6 #define COMPONENTS_DOM_DISTILLER_CORE_DISTILLER_H_ | 6 #define COMPONENTS_DOM_DISTILLER_CORE_DISTILLER_H_ |
7 | 7 |
8 #include <string> | 8 #include <string> |
9 | 9 |
10 #include "base/callback.h" | 10 #include "base/callback.h" |
(...skipping 50 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
61 const DistillerURLFetcherFactory& distiller_url_fetcher_factory); | 61 const DistillerURLFetcherFactory& distiller_url_fetcher_factory); |
62 virtual ~DistillerImpl(); | 62 virtual ~DistillerImpl(); |
63 | 63 |
64 // Creates an execution context. This must be called once before any calls are | 64 // Creates an execution context. This must be called once before any calls are |
65 // made to distill the page. | 65 // made to distill the page. |
66 virtual void Init(); | 66 virtual void Init(); |
67 | 67 |
68 virtual void DistillPage(const GURL& url, | 68 virtual void DistillPage(const GURL& url, |
69 const DistillerCallback& callback) OVERRIDE; | 69 const DistillerCallback& callback) OVERRIDE; |
70 | 70 |
71 void SetMaxNumPagesInArticle(size_t max_num_pages); | |
72 | |
71 private: | 73 private: |
72 void OnFetchImageDone(DistilledPageProto* distilled_page_proto, | 74 // In case of multiple pages, the Distiller maintains state of multiple pages |
75 // as relative page numbers. E.g. if distillation starts at page 2 for a 3 | |
cjhopman
2014/02/15 02:44:15
nit: relative to what? Might not be needed since i
shashi
2014/02/15 03:15:36
Done.
| |
76 // page article. The relative page numbers assigned to pages will be [-1,0,1]. | |
77 | |
78 // Class representing the state of a page under distillation. | |
79 struct DistilledPageData { | |
80 DistilledPageData(); | |
81 virtual ~DistilledPageData(); | |
82 // Relative page number of the page. | |
83 int page_num; | |
84 std::string title; | |
85 ScopedVector<DistillerURLFetcher> image_fetchers_; | |
86 scoped_ptr<DistilledPageProto> proto; | |
87 | |
88 private: | |
89 DISALLOW_COPY_AND_ASSIGN(DistilledPageData); | |
90 }; | |
91 | |
92 void OnFetchImageDone(int page_num, | |
73 DistillerURLFetcher* url_fetcher, | 93 DistillerURLFetcher* url_fetcher, |
74 const std::string& id, | 94 const std::string& id, |
75 const std::string& response); | 95 const std::string& response); |
76 | 96 |
77 void OnPageDistillationFinished(const GURL& page_url, | 97 void OnPageDistillationFinished(int page_num, |
98 const GURL& page_url, | |
78 scoped_ptr<DistilledPageInfo> distilled_page, | 99 scoped_ptr<DistilledPageInfo> distilled_page, |
79 bool distillation_successful); | 100 bool distillation_successful); |
80 | 101 |
81 virtual void FetchImage(DistilledPageProto* distilled_page_proto, | 102 virtual void FetchImage(int page_num, |
82 const std::string& image_id, | 103 const std::string& image_id, |
83 const std::string& item); | 104 const std::string& item); |
84 | 105 |
85 // Distills the page and adds the new page to |article_proto|. | 106 // Distills the next page. |
86 void DistillPage(const GURL& url); | 107 void DistillNextPage(); |
108 | |
109 // Adds the |url| to |pages_to_be_distilled| if |page_num| is a valid relative | |
110 // page number and |url| is valid. Ignores duplicate pages and urls. | |
111 void AddToDistillationQueue(int page_num, const GURL& url); | |
112 | |
113 // Check if |page_num| is a valid relative page number, i.e. page with | |
114 // |page_num| is either under distillation or has already completed | |
115 // distillation. | |
116 bool IsPageNumberInUse(int page_num) const; | |
117 | |
118 bool AreAllPagesFinished() const; | |
119 | |
120 // Total number of pages in the article that the distiller knows of, this | |
121 // includes pages that are pending distillation. | |
122 size_t TotalPageCount() const; | |
87 | 123 |
88 // Runs |distillation_cb_| if all distillation callbacks and image fetches are | 124 // Runs |distillation_cb_| if all distillation callbacks and image fetches are |
89 // complete. | 125 // complete. |
90 void RunDistillerCallbackIfDone(); | 126 void RunDistillerCallbackIfDone(); |
91 | 127 |
128 // Checks if page |distilled_page_data| has finished distillation, including | |
129 // all image fetches. | |
130 void AddPageIfDone(int page_num); | |
131 | |
132 DistilledPageData* GetPageAtIndex(size_t index) const; | |
133 | |
92 const DistillerURLFetcherFactory& distiller_url_fetcher_factory_; | 134 const DistillerURLFetcherFactory& distiller_url_fetcher_factory_; |
93 scoped_ptr<PageDistiller> page_distiller_; | 135 scoped_ptr<PageDistiller> page_distiller_; |
94 DistillerCallback distillation_cb_; | 136 DistillerCallback distillation_cb_; |
95 | 137 |
96 ScopedVector<DistillerURLFetcher> image_fetchers_; | 138 // Set of pages that are under distillation or have finished distillation. |
97 scoped_ptr<DistilledArticleProto> article_proto_; | 139 // |started_pages_index_| and |finished_pages_index_| maintains the mapping |
98 bool distillation_in_progress_; | 140 // from page number to the indices in |pages_|. |
99 // Set to keep track of which urls are already seen by the distiller. | 141 ScopedVector<DistilledPageData> pages_; |
100 base::hash_set<std::string> processed_urls_; | 142 |
143 // Maps page numbers of finished pages to the indices in |pages_|. | |
144 std::map<int, size_t> finished_pages_index_; | |
145 | |
146 // The list of pages that are still waiting for distillation to start. | |
cjhopman
2014/02/15 02:44:15
Nit: The order of declaration here should be finis
shashi
2014/02/15 03:15:36
Done.
shashi
2014/02/15 03:15:36
Done.
| |
147 // This is a map, to make distiller prefer distilling lower page numbers | |
148 // first. | |
149 std::map<int, GURL> waiting_pages_; | |
150 | |
151 // Maps page numbers of pages under distillation to the indices in |pages_|. | |
152 // If a page is |started_pages_| that means it is still waiting for an action | |
153 // (distillation or image fetch) to finish. | |
154 base::hash_map<int, size_t> started_pages_index_; | |
155 | |
156 // Set to keep track of which urls are already seen by the distiller. Used to | |
157 // prevent distiller from distilling the same url twice. | |
158 base::hash_set<std::string> seen_urls_; | |
159 | |
160 size_t max_pages_in_article_; | |
101 | 161 |
102 DISALLOW_COPY_AND_ASSIGN(DistillerImpl); | 162 DISALLOW_COPY_AND_ASSIGN(DistillerImpl); |
103 }; | 163 }; |
104 | 164 |
105 } // namespace dom_distiller | 165 } // namespace dom_distiller |
106 | 166 |
107 #endif // COMPONENTS_DOM_DISTILLER_CORE_DISTILLER_H_ | 167 #endif // COMPONENTS_DOM_DISTILLER_CORE_DISTILLER_H_ |
OLD | NEW |