OLD | NEW |
---|---|
1 // Copyright 2013 The Chromium Authors. All rights reserved. | 1 // Copyright 2013 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #ifndef COMPONENTS_DOM_DISTILLER_CORE_DISTILLER_H_ | 5 #ifndef COMPONENTS_DOM_DISTILLER_CORE_DISTILLER_H_ |
6 #define COMPONENTS_DOM_DISTILLER_CORE_DISTILLER_H_ | 6 #define COMPONENTS_DOM_DISTILLER_CORE_DISTILLER_H_ |
7 | 7 |
8 #include <string> | 8 #include <string> |
9 | 9 |
10 #include "base/callback.h" | 10 #include "base/callback.h" |
(...skipping 51 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
62 virtual ~DistillerImpl(); | 62 virtual ~DistillerImpl(); |
63 | 63 |
64 // Creates an execution context. This must be called once before any calls are | 64 // Creates an execution context. This must be called once before any calls are |
65 // made to distill the page. | 65 // made to distill the page. |
66 virtual void Init(); | 66 virtual void Init(); |
67 | 67 |
68 virtual void DistillPage(const GURL& url, | 68 virtual void DistillPage(const GURL& url, |
69 const DistillerCallback& callback) OVERRIDE; | 69 const DistillerCallback& callback) OVERRIDE; |
70 | 70 |
71 private: | 71 private: |
72 void OnFetchImageDone(DistilledPageProto* distilled_page_proto, | 72 // In case of multiple pages, the Distiller maintains state of multiple pages |
73 // as relative page numbers. E.g. if distillation starts at page 2 for a 3 | |
74 // page article. The relative page numbers assigned to pages will be [-1,0,1]. | |
75 | |
76 // Class representing the state of a page under distillation. | |
77 struct DistilledPageData { | |
78 DistilledPageData(); | |
79 virtual ~DistilledPageData(); | |
80 // Relative page number of the page. | |
81 int page_no; | |
82 std::string title; | |
83 ScopedVector<DistillerURLFetcher> image_fetchers_; | |
84 scoped_ptr<DistilledPageProto> proto; | |
85 | |
86 private: | |
87 DISALLOW_COPY_AND_ASSIGN(DistilledPageData); | |
88 }; | |
89 | |
90 void OnFetchImageDone(DistilledPageData* distilled_page_data, | |
73 DistillerURLFetcher* url_fetcher, | 91 DistillerURLFetcher* url_fetcher, |
74 const std::string& id, | 92 const std::string& id, |
75 const std::string& response); | 93 const std::string& response); |
76 | 94 |
77 void OnPageDistillationFinished(const GURL& page_url, | 95 void OnPageDistillationFinished(int page_no, |
78 scoped_ptr<DistilledPageInfo> distilled_page, | 96 scoped_ptr<DistilledPageInfo> distilled_page, |
79 bool distillation_successful); | 97 bool distillation_successful); |
80 | 98 |
81 virtual void FetchImage(DistilledPageProto* distilled_page_proto, | 99 virtual void FetchImage(DistilledPageData* distilled_page_data, |
82 const std::string& image_id, | 100 const std::string& image_id, |
83 const std::string& item); | 101 const std::string& item); |
84 | 102 |
85 // Distills the page and adds the new page to |article_proto|. | 103 // Distills the next page. |
86 void DistillPage(const GURL& url); | 104 void DistillNextPage(); |
105 | |
106 // Adds the |url| to |pages_to_be_distilled| if |page_no| is a valid relative | |
107 // page number and |url| is valid. Ignores duplicate pages and urls. | |
108 void AddToDistillationQueue(int page_no, const GURL& url); | |
109 | |
110 // Check if |page_no| is a valid relative page number, i.e. page with | |
111 // |page_no| is either under distillation or has already completed | |
112 // distillation. | |
113 bool IsValidPageNo(int page_no) const; | |
87 | 114 |
88 // Runs |distillation_cb_| if all distillation callbacks and image fetches are | 115 // Runs |distillation_cb_| if all distillation callbacks and image fetches are |
89 // complete. | 116 // complete. |
90 void RunDistillerCallbackIfDone(); | 117 void RunDistillerCallbackIfDone(); |
91 | 118 |
119 // Checks if page |distilled_page_data| has finished distillation, including | |
120 // all image fetches. | |
121 void CheckIfPageDone(const DistilledPageData* distilled_page_data); | |
122 | |
92 const DistillerURLFetcherFactory& distiller_url_fetcher_factory_; | 123 const DistillerURLFetcherFactory& distiller_url_fetcher_factory_; |
93 scoped_ptr<PageDistiller> page_distiller_; | 124 scoped_ptr<PageDistiller> page_distiller_; |
94 DistillerCallback distillation_cb_; | 125 DistillerCallback distillation_cb_; |
95 | 126 |
96 ScopedVector<DistillerURLFetcher> image_fetchers_; | 127 // Set of pages which have finished distillation. Note: some pages may be |
97 scoped_ptr<DistilledArticleProto> article_proto_; | 128 // waiting for image fetches to be complete. |
98 bool distillation_in_progress_; | 129 // |distilled_pages_index_| maintains the mapping from page number to the |
130 // index in |distilled_pages_|. | |
131 ScopedVector<DistilledPageData> distilled_pages_; | |
132 | |
133 // Maps page number to the index in |distilled_pages_|. | |
134 std::map<int, size_t> distilled_pages_index_; | |
cjhopman
2014/02/12 20:39:09
Couldn't this just point right to the DPD in disti
shashi
2014/02/13 01:03:11
It could but then I have to manually manage the li
cjhopman
2014/02/13 02:48:48
I mean it would point into distilled_pages. I.e. t
shashi
2014/02/13 20:09:51
When I will add incremental updates in my later pa
| |
135 | |
136 // The list of pages that are still waiting for distillation to finish. | |
137 std::map<int, GURL> pages_to_be_distilled_; | |
138 | |
139 // The page number of pages that are either waiting for distillation or image | |
cjhopman
2014/02/12 20:39:09
This includes pages waiting in pages_to_be_distill
shashi
2014/02/13 01:03:11
Yes, it does, any unfinished pages.
| |
140 // fetches. If a page is |in_progress_pages_| that means it is still waiting | |
141 // for an action (distillation or image fetch) to finish. | |
142 base::hash_set<int> in_progress_pages_; | |
cjhopman
2014/02/12 20:39:09
It's unclear how a page distillation works through
shashi
2014/02/13 01:03:11
Done, hopefully more clear now.
On 2014/02/12 20:3
cjhopman
2014/02/13 02:48:48
This isn't really more clear, particularly because
shashi
2014/02/13 20:09:51
in_progress_pages_ = all pages that are not finish
cjhopman
2014/02/13 20:43:19
I would much prefer that. It was difficult reading
shashi
2014/02/13 21:57:25
Done.
| |
143 | |
99 // Set to keep track of which urls are already seen by the distiller. | 144 // Set to keep track of which urls are already seen by the distiller. |
cjhopman
2014/02/12 20:39:09
This comment should say what this is used for. Als
shashi
2014/02/13 01:03:11
Done.
| |
100 base::hash_set<std::string> processed_urls_; | 145 base::hash_set<std::string> processed_urls_; |
101 | 146 |
147 scoped_ptr<DistilledArticleProto> article_proto_; | |
cjhopman
2014/02/12 20:39:09
This is only actually created and used in RunDisti
shashi
2014/02/13 01:03:11
Because I was concerned about the change in lifeti
cjhopman
2014/02/13 02:48:48
Ownership of it is passed to the DistillerCallback
shashi
2014/02/13 20:09:51
Duh! Done, that file is task_tracker :).
On 2014/
| |
148 | |
102 DISALLOW_COPY_AND_ASSIGN(DistillerImpl); | 149 DISALLOW_COPY_AND_ASSIGN(DistillerImpl); |
103 }; | 150 }; |
104 | 151 |
105 } // namespace dom_distiller | 152 } // namespace dom_distiller |
106 | 153 |
107 #endif // COMPONENTS_DOM_DISTILLER_CORE_DISTILLER_H_ | 154 #endif // COMPONENTS_DOM_DISTILLER_CORE_DISTILLER_H_ |
OLD | NEW |