Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(202)

Side by Side Diff: components/dom_distiller/core/distiller.h

Issue 130543003: Store page no for distilled pages undergoing distillation. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: Partition internal states into 3 sets: started, pending, finished. Created 6 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 // Copyright 2013 The Chromium Authors. All rights reserved. 1 // Copyright 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #ifndef COMPONENTS_DOM_DISTILLER_CORE_DISTILLER_H_ 5 #ifndef COMPONENTS_DOM_DISTILLER_CORE_DISTILLER_H_
6 #define COMPONENTS_DOM_DISTILLER_CORE_DISTILLER_H_ 6 #define COMPONENTS_DOM_DISTILLER_CORE_DISTILLER_H_
7 7
8 #include <string> 8 #include <string>
9 9
10 #include "base/callback.h" 10 #include "base/callback.h"
(...skipping 50 matching lines...) Expand 10 before | Expand all | Expand 10 after
61 const DistillerURLFetcherFactory& distiller_url_fetcher_factory); 61 const DistillerURLFetcherFactory& distiller_url_fetcher_factory);
62 virtual ~DistillerImpl(); 62 virtual ~DistillerImpl();
63 63
64 // Creates an execution context. This must be called once before any calls are 64 // Creates an execution context. This must be called once before any calls are
65 // made to distill the page. 65 // made to distill the page.
66 virtual void Init(); 66 virtual void Init();
67 67
68 virtual void DistillPage(const GURL& url, 68 virtual void DistillPage(const GURL& url,
69 const DistillerCallback& callback) OVERRIDE; 69 const DistillerCallback& callback) OVERRIDE;
70 70
71 protected:
72 // Returns the maximum number of pages in an article.
73 // Overriden by tests to verify the limit on pages in an article.
74 virtual size_t GetMaxNumPagesInArticle() const;
cjhopman 2014/02/14 20:53:52 Overriding in tests for something like this is bad
shashi 2014/02/14 23:25:29 Done.
75
71 private: 76 private:
72 void OnFetchImageDone(DistilledPageProto* distilled_page_proto, 77 // In case of multiple pages, the Distiller maintains state of multiple pages
78 // as relative page numbers. E.g. if distillation starts at page 2 for a 3
79 // page article. The relative page numbers assigned to pages will be [-1,0,1].
80
81 // Class representing the state of a page under distillation.
82 struct DistilledPageData {
83 DistilledPageData();
84 virtual ~DistilledPageData();
85 // Relative page number of the page.
86 int page_no;
cjhopman 2014/02/14 20:53:52 Consider doing `s/page_no/page_number` throughout.
shashi 2014/02/14 23:25:29 Done.
87 std::string title;
88 ScopedVector<DistillerURLFetcher> image_fetchers_;
89 scoped_ptr<DistilledPageProto> proto;
90
91 private:
92 DISALLOW_COPY_AND_ASSIGN(DistilledPageData);
93 };
94
95 void OnFetchImageDone(DistilledPageData* distilled_page_data,
cjhopman 2014/02/14 20:53:52 So a bunch of functions are called to distill a pa
shashi 2014/02/14 23:25:29 Change to use page_num through out. On 2014/02/14
73 DistillerURLFetcher* url_fetcher, 96 DistillerURLFetcher* url_fetcher,
74 const std::string& id, 97 const std::string& id,
75 const std::string& response); 98 const std::string& response);
76 99
77 void OnPageDistillationFinished(const GURL& page_url, 100 void OnPageDistillationFinished(int page_no,
101 const GURL& page_url,
78 scoped_ptr<DistilledPageInfo> distilled_page, 102 scoped_ptr<DistilledPageInfo> distilled_page,
79 bool distillation_successful); 103 bool distillation_successful);
80 104
81 virtual void FetchImage(DistilledPageProto* distilled_page_proto, 105 virtual void FetchImage(DistilledPageData* distilled_page_data,
82 const std::string& image_id, 106 const std::string& image_id,
83 const std::string& item); 107 const std::string& item);
84 108
85 // Distills the page and adds the new page to |article_proto|. 109 // Distills the next page.
86 void DistillPage(const GURL& url); 110 void DistillNextPage();
111
112 // Adds the |url| to |pages_to_be_distilled| if |page_no| is a valid relative
113 // page number and |url| is valid. Ignores duplicate pages and urls.
114 void AddToDistillationQueue(int page_no, const GURL& url);
115
116 // Check if |page_no| is a valid relative page number, i.e. page with
117 // |page_no| is either under distillation or has already completed
118 // distillation.
119 bool IsPageNumberInUse(int page_no) const;
120
121 bool NoPendingPages() const;
cjhopman 2014/02/14 20:53:52 This name doesn't really imply a question... how a
shashi 2014/02/14 23:25:29 Done.
122
123 // Total number of pages in the article that the distiller knows of, this
124 // includes pages that are pending distillation.
125 size_t TotalPageCount() const;
87 126
88 // Runs |distillation_cb_| if all distillation callbacks and image fetches are 127 // Runs |distillation_cb_| if all distillation callbacks and image fetches are
89 // complete. 128 // complete.
90 void RunDistillerCallbackIfDone(); 129 void RunDistillerCallbackIfDone();
91 130
131 // Checks if page |distilled_page_data| has finished distillation, including
132 // all image fetches.
133 void CheckAndAddPageIfDone(DistilledPageData* distilled_page_data);
cjhopman 2014/02/14 20:53:52 Does this mean `(check and add) if done` or `check
shashi 2014/02/14 23:25:29 Done.
134
92 const DistillerURLFetcherFactory& distiller_url_fetcher_factory_; 135 const DistillerURLFetcherFactory& distiller_url_fetcher_factory_;
93 scoped_ptr<PageDistiller> page_distiller_; 136 scoped_ptr<PageDistiller> page_distiller_;
94 DistillerCallback distillation_cb_; 137 DistillerCallback distillation_cb_;
95 138
96 ScopedVector<DistillerURLFetcher> image_fetchers_; 139 // Set of pages which have finished distillation.
97 scoped_ptr<DistilledArticleProto> article_proto_; 140 // |finished_pages_index_| maintains the mapping from page number to the
98 bool distillation_in_progress_; 141 // index in |finished_pages_|.
99 // Set to keep track of which urls are already seen by the distiller. 142 ScopedVector<DistilledPageData> finished_pages_;
100 base::hash_set<std::string> processed_urls_; 143
144 // Maps page number to the index in |finished_pages_|.
145 std::map<int, size_t> finished_pages_index_;
146
147 // The list of pages that are still waiting for distillation to start.
148 // This is a map, to make distiller prefer distilling lower page numbers
149 // first.
150 std::map<int, GURL> waiting_pages_;
151
152 // The page number of pages that are either waiting for distillation or image
153 // fetches. If a page is |started_pages_| that means it is still waiting
154 // for an action (distillation or image fetch) to finish.
cjhopman 2014/02/14 20:53:52 These sentences seem redundant. Remove one of them
shashi 2014/02/14 23:25:29 Done.
155 base::hash_set<int> started_pages_;
156
157 // Set to keep track of which urls are already seen by the distiller. Used to
158 // prevent distiller from distilling the same url twice.
159 base::hash_set<std::string> seen_urls_;
101 160
102 DISALLOW_COPY_AND_ASSIGN(DistillerImpl); 161 DISALLOW_COPY_AND_ASSIGN(DistillerImpl);
103 }; 162 };
104 163
105 } // namespace dom_distiller 164 } // namespace dom_distiller
106 165
107 #endif // COMPONENTS_DOM_DISTILLER_CORE_DISTILLER_H_ 166 #endif // COMPONENTS_DOM_DISTILLER_CORE_DISTILLER_H_
OLDNEW
« no previous file with comments | « no previous file | components/dom_distiller/core/distiller.cc » ('j') | components/dom_distiller/core/distiller.cc » ('J')

Powered by Google App Engine
This is Rietveld 408576698