Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(419)

Side by Side Diff: components/dom_distiller/core/distiller.h

Issue 130543003: Store page no for distilled pages undergoing distillation. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: fix nitz. Created 6 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « no previous file | components/dom_distiller/core/distiller.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2013 The Chromium Authors. All rights reserved. 1 // Copyright 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #ifndef COMPONENTS_DOM_DISTILLER_CORE_DISTILLER_H_ 5 #ifndef COMPONENTS_DOM_DISTILLER_CORE_DISTILLER_H_
6 #define COMPONENTS_DOM_DISTILLER_CORE_DISTILLER_H_ 6 #define COMPONENTS_DOM_DISTILLER_CORE_DISTILLER_H_
7 7
8 #include <string> 8 #include <string>
9 9
10 #include "base/callback.h" 10 #include "base/callback.h"
(...skipping 50 matching lines...) Expand 10 before | Expand all | Expand 10 after
61 const DistillerURLFetcherFactory& distiller_url_fetcher_factory); 61 const DistillerURLFetcherFactory& distiller_url_fetcher_factory);
62 virtual ~DistillerImpl(); 62 virtual ~DistillerImpl();
63 63
64 // Creates an execution context. This must be called once before any calls are 64 // Creates an execution context. This must be called once before any calls are
65 // made to distill the page. 65 // made to distill the page.
66 virtual void Init(); 66 virtual void Init();
67 67
68 virtual void DistillPage(const GURL& url, 68 virtual void DistillPage(const GURL& url,
69 const DistillerCallback& callback) OVERRIDE; 69 const DistillerCallback& callback) OVERRIDE;
70 70
71 void SetMaxNumPagesInArticle(size_t max_num_pages);
72
71 private: 73 private:
72 void OnFetchImageDone(DistilledPageProto* distilled_page_proto, 74 // In case of multiple pages, the Distiller maintains state of multiple pages
75 // as page numbers relative to the page number where distillation started.
76 // E.g. if distillation starts at page 2 for a 3 page article. The relative
77 // page numbers assigned to pages will be [-1,0,1].
78
79 // Class representing the state of a page under distillation.
80 struct DistilledPageData {
81 DistilledPageData();
82 virtual ~DistilledPageData();
83 // Relative page number of the page.
84 int page_num;
85 std::string title;
86 ScopedVector<DistillerURLFetcher> image_fetchers_;
87 scoped_ptr<DistilledPageProto> proto;
88
89 private:
90 DISALLOW_COPY_AND_ASSIGN(DistilledPageData);
91 };
92
93 void OnFetchImageDone(int page_num,
73 DistillerURLFetcher* url_fetcher, 94 DistillerURLFetcher* url_fetcher,
74 const std::string& id, 95 const std::string& id,
75 const std::string& response); 96 const std::string& response);
76 97
77 void OnPageDistillationFinished(const GURL& page_url, 98 void OnPageDistillationFinished(int page_num,
99 const GURL& page_url,
78 scoped_ptr<DistilledPageInfo> distilled_page, 100 scoped_ptr<DistilledPageInfo> distilled_page,
79 bool distillation_successful); 101 bool distillation_successful);
80 102
81 virtual void FetchImage(DistilledPageProto* distilled_page_proto, 103 virtual void FetchImage(int page_num,
82 const std::string& image_id, 104 const std::string& image_id,
83 const std::string& item); 105 const std::string& item);
84 106
85 // Distills the page and adds the new page to |article_proto|. 107 // Distills the next page.
86 void DistillPage(const GURL& url); 108 void DistillNextPage();
109
110 // Adds the |url| to |pages_to_be_distilled| if |page_num| is a valid relative
111 // page number and |url| is valid. Ignores duplicate pages and urls.
112 void AddToDistillationQueue(int page_num, const GURL& url);
113
114 // Check if |page_num| is a valid relative page number, i.e. page with
115 // |page_num| is either under distillation or has already completed
116 // distillation.
117 bool IsPageNumberInUse(int page_num) const;
118
119 bool AreAllPagesFinished() const;
120
121 // Total number of pages in the article that the distiller knows of, this
122 // includes pages that are pending distillation.
123 size_t TotalPageCount() const;
87 124
88 // Runs |distillation_cb_| if all distillation callbacks and image fetches are 125 // Runs |distillation_cb_| if all distillation callbacks and image fetches are
89 // complete. 126 // complete.
90 void RunDistillerCallbackIfDone(); 127 void RunDistillerCallbackIfDone();
91 128
129 // Checks if page |distilled_page_data| has finished distillation, including
130 // all image fetches.
131 void AddPageIfDone(int page_num);
132
133 DistilledPageData* GetPageAtIndex(size_t index) const;
134
92 const DistillerURLFetcherFactory& distiller_url_fetcher_factory_; 135 const DistillerURLFetcherFactory& distiller_url_fetcher_factory_;
93 scoped_ptr<PageDistiller> page_distiller_; 136 scoped_ptr<PageDistiller> page_distiller_;
94 DistillerCallback distillation_cb_; 137 DistillerCallback distillation_cb_;
95 138
96 ScopedVector<DistillerURLFetcher> image_fetchers_; 139 // Set of pages that are under distillation or have finished distillation.
97 scoped_ptr<DistilledArticleProto> article_proto_; 140 // |started_pages_index_| and |finished_pages_index_| maintains the mapping
98 bool distillation_in_progress_; 141 // from page number to the indices in |pages_|.
99 // Set to keep track of which urls are already seen by the distiller. 142 ScopedVector<DistilledPageData> pages_;
100 base::hash_set<std::string> processed_urls_; 143
144 // Maps page numbers of finished pages to the indices in |pages_|.
145 std::map<int, size_t> finished_pages_index_;
146
147 // Maps page numbers of pages under distillation to the indices in |pages_|.
148 // If a page is |started_pages_| that means it is still waiting for an action
149 // (distillation or image fetch) to finish.
150 base::hash_map<int, size_t> started_pages_index_;
151
152 // The list of pages that are still waiting for distillation to start.
153 // This is a map, to make distiller prefer distilling lower page numbers
154 // first.
155 std::map<int, GURL> waiting_pages_;
156
157 // Set to keep track of which urls are already seen by the distiller. Used to
158 // prevent distiller from distilling the same url twice.
159 base::hash_set<std::string> seen_urls_;
160
161 size_t max_pages_in_article_;
101 162
102 DISALLOW_COPY_AND_ASSIGN(DistillerImpl); 163 DISALLOW_COPY_AND_ASSIGN(DistillerImpl);
103 }; 164 };
104 165
105 } // namespace dom_distiller 166 } // namespace dom_distiller
106 167
107 #endif // COMPONENTS_DOM_DISTILLER_CORE_DISTILLER_H_ 168 #endif // COMPONENTS_DOM_DISTILLER_CORE_DISTILLER_H_
OLDNEW
« no previous file with comments | « no previous file | components/dom_distiller/core/distiller.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698