Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 // Copyright 2013 The Chromium Authors. All rights reserved. | 1 // Copyright 2013 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #ifndef COMPONENTS_DOM_DISTILLER_CORE_DISTILLER_H_ | 5 #ifndef COMPONENTS_DOM_DISTILLER_CORE_DISTILLER_H_ |
| 6 #define COMPONENTS_DOM_DISTILLER_CORE_DISTILLER_H_ | 6 #define COMPONENTS_DOM_DISTILLER_CORE_DISTILLER_H_ |
| 7 | 7 |
| 8 #include <string> | 8 #include <string> |
| 9 | 9 |
| 10 #include "base/callback.h" | 10 #include "base/callback.h" |
| (...skipping 51 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 62 virtual ~DistillerImpl(); | 62 virtual ~DistillerImpl(); |
| 63 | 63 |
| 64 // Creates an execution context. This must be called once before any calls are | 64 // Creates an execution context. This must be called once before any calls are |
| 65 // made to distill the page. | 65 // made to distill the page. |
| 66 virtual void Init(); | 66 virtual void Init(); |
| 67 | 67 |
| 68 virtual void DistillPage(const GURL& url, | 68 virtual void DistillPage(const GURL& url, |
| 69 const DistillerCallback& callback) OVERRIDE; | 69 const DistillerCallback& callback) OVERRIDE; |
| 70 | 70 |
| 71 private: | 71 private: |
| 72 void OnFetchImageDone(DistilledPageProto* distilled_page_proto, | 72 // In case of multiple pages, the Distiller maintains state of multiple pages |
| 73 // as relative page numbers. E.g. if distillation starts at page 2 for a 3 | |
| 74 // page article. The relative page numbers assigned to pages will be [-1,0,1]. | |
| 75 | |
| 76 // Class representing the state of a page under distillation. | |
| 77 struct DistilledPageData { | |
| 78 DistilledPageData(); | |
| 79 virtual ~DistilledPageData(); | |
| 80 // Relative page number of the page. | |
| 81 int page_no; | |
| 82 std::string title; | |
| 83 ScopedVector<DistillerURLFetcher> image_fetchers_; | |
| 84 scoped_ptr<DistilledPageProto> proto; | |
| 85 | |
| 86 private: | |
| 87 DISALLOW_COPY_AND_ASSIGN(DistilledPageData); | |
| 88 }; | |
| 89 | |
| 90 void OnFetchImageDone(DistilledPageData* distilled_page_data, | |
| 73 DistillerURLFetcher* url_fetcher, | 91 DistillerURLFetcher* url_fetcher, |
| 74 const std::string& id, | 92 const std::string& id, |
| 75 const std::string& response); | 93 const std::string& response); |
| 76 | 94 |
| 77 void OnPageDistillationFinished(const GURL& page_url, | 95 void OnPageDistillationFinished(int page_no, |
| 78 scoped_ptr<DistilledPageInfo> distilled_page, | 96 scoped_ptr<DistilledPageInfo> distilled_page, |
| 79 bool distillation_successful); | 97 bool distillation_successful); |
| 80 | 98 |
| 81 virtual void FetchImage(DistilledPageProto* distilled_page_proto, | 99 virtual void FetchImage(DistilledPageData* distilled_page_data, |
| 82 const std::string& image_id, | 100 const std::string& image_id, |
| 83 const std::string& item); | 101 const std::string& item); |
| 84 | 102 |
| 85 // Distills the page and adds the new page to |article_proto|. | 103 // Distills the next page. |
| 86 void DistillPage(const GURL& url); | 104 void DistillNextPage(); |
| 105 | |
| 106 // Adds the |url| to |pages_to_be_distilled| if |page_no| is a valid relative | |
| 107 // page number and |url| is valid. Ignores duplicate pages and urls. | |
| 108 void AddToDistillationQueue(int page_no, const GURL& url); | |
| 109 | |
| 110 // Check if |page_no| is a valid relative page number, i.e. page with | |
| 111 // |page_no| is either under distillation or has already completed | |
| 112 // distillation. | |
| 113 bool IsValidPageNo(int page_no) const; | |
| 87 | 114 |
| 88 // Runs |distillation_cb_| if all distillation callbacks and image fetches are | 115 // Runs |distillation_cb_| if all distillation callbacks and image fetches are |
| 89 // complete. | 116 // complete. |
| 90 void RunDistillerCallbackIfDone(); | 117 void RunDistillerCallbackIfDone(); |
| 91 | 118 |
| 119 // Checks if page |distilled_page_data| has finished distillation, including | |
| 120 // all image fetches. | |
| 121 void CheckIfPageDone(const DistilledPageData* distilled_page_data); | |
| 122 | |
| 92 const DistillerURLFetcherFactory& distiller_url_fetcher_factory_; | 123 const DistillerURLFetcherFactory& distiller_url_fetcher_factory_; |
| 93 scoped_ptr<PageDistiller> page_distiller_; | 124 scoped_ptr<PageDistiller> page_distiller_; |
| 94 DistillerCallback distillation_cb_; | 125 DistillerCallback distillation_cb_; |
| 95 | 126 |
| 96 ScopedVector<DistillerURLFetcher> image_fetchers_; | 127 // Set of pages which have finished distillation. Note: some pages may be |
| 97 scoped_ptr<DistilledArticleProto> article_proto_; | 128 // waiting for image fetches to be complete. |
| 98 bool distillation_in_progress_; | 129 // |distilled_pages_index_| maintains the mapping from page number to the |
| 130 // index in |distilled_pages_|. | |
| 131 ScopedVector<DistilledPageData> distilled_pages_; | |
| 132 | |
| 133 // Maps page number to the index in |distilled_pages_|. | |
| 134 std::map<int, size_t> distilled_pages_index_; | |
|
cjhopman
2014/02/12 20:39:09
Couldn't this just point right to the DPD in disti
shashi
2014/02/13 01:03:11
It could but then I have to manually manage the li
cjhopman
2014/02/13 02:48:48
I mean it would point into distilled_pages. I.e. t
shashi
2014/02/13 20:09:51
When I will add incremental updates in my later pa
| |
| 135 | |
| 136 // The list of pages that are still waiting for distillation to finish. | |
| 137 std::map<int, GURL> pages_to_be_distilled_; | |
| 138 | |
| 139 // The page number of pages that are either waiting for distillation or image | |
|
cjhopman
2014/02/12 20:39:09
This includes pages waiting in pages_to_be_distill
shashi
2014/02/13 01:03:11
Yes, it does, any unfinished pages.
| |
| 140 // fetches. If a page is |in_progress_pages_| that means it is still waiting | |
| 141 // for an action (distillation or image fetch) to finish. | |
| 142 base::hash_set<int> in_progress_pages_; | |
|
cjhopman
2014/02/12 20:39:09
It's unclear how a page distillation works through
shashi
2014/02/13 01:03:11
Done, hopefully more clear now.
On 2014/02/12 20:3
cjhopman
2014/02/13 02:48:48
This isn't really more clear, particularly because
shashi
2014/02/13 20:09:51
in_progress_pages_ = all pages that are not finish
cjhopman
2014/02/13 20:43:19
I would much prefer that. It was difficult reading
shashi
2014/02/13 21:57:25
Done.
| |
| 143 | |
| 99 // Set to keep track of which urls are already seen by the distiller. | 144 // Set to keep track of which urls are already seen by the distiller. |
|
cjhopman
2014/02/12 20:39:09
This comment should say what this is used for. Als
shashi
2014/02/13 01:03:11
Done.
| |
| 100 base::hash_set<std::string> processed_urls_; | 145 base::hash_set<std::string> processed_urls_; |
| 101 | 146 |
| 147 scoped_ptr<DistilledArticleProto> article_proto_; | |
|
cjhopman
2014/02/12 20:39:09
This is only actually created and used in RunDisti
shashi
2014/02/13 01:03:11
Because I was concerned about the change in lifeti
cjhopman
2014/02/13 02:48:48
Ownership of it is passed to the DistillerCallback
shashi
2014/02/13 20:09:51
Duh! Done, that file is task_tracker :).
On 2014/
| |
| 148 | |
| 102 DISALLOW_COPY_AND_ASSIGN(DistillerImpl); | 149 DISALLOW_COPY_AND_ASSIGN(DistillerImpl); |
| 103 }; | 150 }; |
| 104 | 151 |
| 105 } // namespace dom_distiller | 152 } // namespace dom_distiller |
| 106 | 153 |
| 107 #endif // COMPONENTS_DOM_DISTILLER_CORE_DISTILLER_H_ | 154 #endif // COMPONENTS_DOM_DISTILLER_CORE_DISTILLER_H_ |
| OLD | NEW |