Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(710)

Side by Side Diff: components/dom_distiller/core/distiller.h

Issue 130543003: Store page no for distilled pages undergoing distillation. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: Address comments. Created 6 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 // Copyright 2013 The Chromium Authors. All rights reserved. 1 // Copyright 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #ifndef COMPONENTS_DOM_DISTILLER_CORE_DISTILLER_H_ 5 #ifndef COMPONENTS_DOM_DISTILLER_CORE_DISTILLER_H_
6 #define COMPONENTS_DOM_DISTILLER_CORE_DISTILLER_H_ 6 #define COMPONENTS_DOM_DISTILLER_CORE_DISTILLER_H_
7 7
8 #include <string> 8 #include <string>
9 9
10 #include "base/callback.h" 10 #include "base/callback.h"
(...skipping 50 matching lines...) Expand 10 before | Expand all | Expand 10 after
61 const DistillerURLFetcherFactory& distiller_url_fetcher_factory); 61 const DistillerURLFetcherFactory& distiller_url_fetcher_factory);
62 virtual ~DistillerImpl(); 62 virtual ~DistillerImpl();
63 63
64 // Creates an execution context. This must be called once before any calls are 64 // Creates an execution context. This must be called once before any calls are
65 // made to distill the page. 65 // made to distill the page.
66 virtual void Init(); 66 virtual void Init();
67 67
68 virtual void DistillPage(const GURL& url, 68 virtual void DistillPage(const GURL& url,
69 const DistillerCallback& callback) OVERRIDE; 69 const DistillerCallback& callback) OVERRIDE;
70 70
71 void SetMaxNumPagesInArticle(size_t max_num_pages);
72
71 private: 73 private:
72 void OnFetchImageDone(DistilledPageProto* distilled_page_proto, 74 // In case of multiple pages, the Distiller maintains state of multiple pages
75 // as relative page numbers. E.g. if distillation starts at page 2 for a 3
cjhopman 2014/02/15 02:44:15 nit: relative to what? Might not be needed since i
shashi 2014/02/15 03:15:36 Done.
76 // page article. The relative page numbers assigned to pages will be [-1,0,1].
77
78 // Class representing the state of a page under distillation.
79 struct DistilledPageData {
80 DistilledPageData();
81 virtual ~DistilledPageData();
82 // Relative page number of the page.
83 int page_num;
84 std::string title;
85 ScopedVector<DistillerURLFetcher> image_fetchers_;
86 scoped_ptr<DistilledPageProto> proto;
87
88 private:
89 DISALLOW_COPY_AND_ASSIGN(DistilledPageData);
90 };
91
92 void OnFetchImageDone(int page_num,
73 DistillerURLFetcher* url_fetcher, 93 DistillerURLFetcher* url_fetcher,
74 const std::string& id, 94 const std::string& id,
75 const std::string& response); 95 const std::string& response);
76 96
77 void OnPageDistillationFinished(const GURL& page_url, 97 void OnPageDistillationFinished(int page_num,
98 const GURL& page_url,
78 scoped_ptr<DistilledPageInfo> distilled_page, 99 scoped_ptr<DistilledPageInfo> distilled_page,
79 bool distillation_successful); 100 bool distillation_successful);
80 101
81 virtual void FetchImage(DistilledPageProto* distilled_page_proto, 102 virtual void FetchImage(int page_num,
82 const std::string& image_id, 103 const std::string& image_id,
83 const std::string& item); 104 const std::string& item);
84 105
85 // Distills the page and adds the new page to |article_proto|. 106 // Distills the next page.
86 void DistillPage(const GURL& url); 107 void DistillNextPage();
108
109 // Adds the |url| to |pages_to_be_distilled| if |page_num| is a valid relative
110 // page number and |url| is valid. Ignores duplicate pages and urls.
111 void AddToDistillationQueue(int page_num, const GURL& url);
112
113 // Check if |page_num| is a valid relative page number, i.e. page with
114 // |page_num| is either under distillation or has already completed
115 // distillation.
116 bool IsPageNumberInUse(int page_num) const;
117
118 bool AreAllPagesFinished() const;
119
120 // Total number of pages in the article that the distiller knows of, this
121 // includes pages that are pending distillation.
122 size_t TotalPageCount() const;
87 123
88 // Runs |distillation_cb_| if all distillation callbacks and image fetches are 124 // Runs |distillation_cb_| if all distillation callbacks and image fetches are
89 // complete. 125 // complete.
90 void RunDistillerCallbackIfDone(); 126 void RunDistillerCallbackIfDone();
91 127
128 // Checks if page |distilled_page_data| has finished distillation, including
129 // all image fetches.
130 void AddPageIfDone(int page_num);
131
132 DistilledPageData* GetPageAtIndex(size_t index) const;
133
92 const DistillerURLFetcherFactory& distiller_url_fetcher_factory_; 134 const DistillerURLFetcherFactory& distiller_url_fetcher_factory_;
93 scoped_ptr<PageDistiller> page_distiller_; 135 scoped_ptr<PageDistiller> page_distiller_;
94 DistillerCallback distillation_cb_; 136 DistillerCallback distillation_cb_;
95 137
96 ScopedVector<DistillerURLFetcher> image_fetchers_; 138 // Set of pages that are under distillation or have finished distillation.
97 scoped_ptr<DistilledArticleProto> article_proto_; 139 // |started_pages_index_| and |finished_pages_index_| maintains the mapping
98 bool distillation_in_progress_; 140 // from page number to the indices in |pages_|.
99 // Set to keep track of which urls are already seen by the distiller. 141 ScopedVector<DistilledPageData> pages_;
100 base::hash_set<std::string> processed_urls_; 142
143 // Maps page numbers of finished pages to the indices in |pages_|.
144 std::map<int, size_t> finished_pages_index_;
145
146 // The list of pages that are still waiting for distillation to start.
cjhopman 2014/02/15 02:44:15 Nit: The order of declaration here should be finis
shashi 2014/02/15 03:15:36 Done.
shashi 2014/02/15 03:15:36 Done.
147 // This is a map, to make distiller prefer distilling lower page numbers
148 // first.
149 std::map<int, GURL> waiting_pages_;
150
151 // Maps page numbers of pages under distillation to the indices in |pages_|.
152 // If a page is |started_pages_| that means it is still waiting for an action
153 // (distillation or image fetch) to finish.
154 base::hash_map<int, size_t> started_pages_index_;
155
156 // Set to keep track of which urls are already seen by the distiller. Used to
157 // prevent distiller from distilling the same url twice.
158 base::hash_set<std::string> seen_urls_;
159
160 size_t max_pages_in_article_;
101 161
102 DISALLOW_COPY_AND_ASSIGN(DistillerImpl); 162 DISALLOW_COPY_AND_ASSIGN(DistillerImpl);
103 }; 163 };
104 164
105 } // namespace dom_distiller 165 } // namespace dom_distiller
106 166
107 #endif // COMPONENTS_DOM_DISTILLER_CORE_DISTILLER_H_ 167 #endif // COMPONENTS_DOM_DISTILLER_CORE_DISTILLER_H_
OLDNEW
« no previous file with comments | « no previous file | components/dom_distiller/core/distiller.cc » ('j') | components/dom_distiller/core/distiller.cc » ('J')

Powered by Google App Engine
This is Rietveld 408576698