Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(151)

Side by Side Diff: components/dom_distiller/core/distiller.h

Issue 146843010: Add support for multipage distillation. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: Created 6 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 // Copyright 2013 The Chromium Authors. All rights reserved. 1 // Copyright 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #ifndef COMPONENTS_DOM_DISTILLER_CORE_DISTILLER_H_ 5 #ifndef COMPONENTS_DOM_DISTILLER_CORE_DISTILLER_H_
6 #define COMPONENTS_DOM_DISTILLER_CORE_DISTILLER_H_ 6 #define COMPONENTS_DOM_DISTILLER_CORE_DISTILLER_H_
7 7
8 #include <map>
9 #include <string> 8 #include <string>
10 9
11 #include "base/callback.h" 10 #include "base/callback.h"
12 #include "base/gtest_prod_util.h" 11 #include "base/containers/hash_tables.h"
13 #include "base/memory/ref_counted.h" 12 #include "base/memory/scoped_ptr.h"
14 #include "base/values.h"
15 #include "components/dom_distiller/core/distiller_page.h"
16 #include "components/dom_distiller/core/distiller_url_fetcher.h" 13 #include "components/dom_distiller/core/distiller_url_fetcher.h"
17 #include "components/dom_distiller/core/proto/distilled_page.pb.h" 14 #include "components/dom_distiller/core/page_distiller.h"
15 #include "components/dom_distiller/core/proto/distilled_article.pb.h"
18 #include "net/url_request/url_request_context_getter.h" 16 #include "net/url_request/url_request_context_getter.h"
19 #include "url/gurl.h" 17 #include "url/gurl.h"
20 18
21 namespace dom_distiller { 19 namespace dom_distiller {
22 20
23 class DistillerImpl; 21 class DistillerImpl;
24 22
25 class Distiller { 23 class Distiller {
26 public: 24 public:
27 typedef base::Callback<void( 25 typedef base::Callback<void(scoped_ptr<DistilledArticleProto>)>
28 scoped_ptr<DistilledPageProto>)> DistillerCallback; 26 DistillerCallback;
29 virtual ~Distiller() {} 27 virtual ~Distiller() {}
30 28
31 // Distills a page, and asynchrounously returns the article HTML to the 29 // Distills a page, and asynchrounously returns the article HTML to the
32 // supplied callback. 30 // supplied callback.
33 virtual void DistillPage(const GURL& url, 31 virtual void DistillPage(const GURL& url,
34 const DistillerCallback& callback) = 0; 32 const DistillerCallback& callback) = 0;
35 }; 33 };
36 34
37 class DistillerFactory { 35 class DistillerFactory {
38 public: 36 public:
39 virtual scoped_ptr<Distiller> CreateDistiller() = 0; 37 virtual scoped_ptr<Distiller> CreateDistiller() = 0;
40 virtual ~DistillerFactory() {} 38 virtual ~DistillerFactory() {}
41 }; 39 };
42 40
43 // Factory for creating a Distiller. 41 // Factory for creating a Distiller.
44 class DistillerFactoryImpl : public DistillerFactory { 42 class DistillerFactoryImpl : public DistillerFactory {
45 public: 43 public:
46 DistillerFactoryImpl( 44 DistillerFactoryImpl(
47 scoped_ptr<DistillerPageFactory> distiller_page_factory, 45 scoped_ptr<DistillerPageFactory> distiller_page_factory,
48 scoped_ptr<DistillerURLFetcherFactory> distiller_url_fetcher_factory); 46 scoped_ptr<DistillerURLFetcherFactory> distiller_url_fetcher_factory);
49 virtual ~DistillerFactoryImpl(); 47 virtual ~DistillerFactoryImpl();
50 virtual scoped_ptr<Distiller> CreateDistiller() OVERRIDE; 48 virtual scoped_ptr<Distiller> CreateDistiller() OVERRIDE;
51 49
52 private: 50 private:
53 scoped_ptr<DistillerPageFactory> distiller_page_factory_; 51 scoped_ptr<DistillerPageFactory> distiller_page_factory_;
54 scoped_ptr<DistillerURLFetcherFactory> distiller_url_fetcher_factory_; 52 scoped_ptr<DistillerURLFetcherFactory> distiller_url_fetcher_factory_;
55 }; 53 };
56 54
57 // Distills a article from a page and associated pages. 55 // Distills a article from a page and associated pages.
58 class DistillerImpl : public Distiller, 56 class DistillerImpl : public Distiller {
59 public DistillerPage::Delegate {
60 public: 57 public:
61 DistillerImpl( 58 DistillerImpl(
62 const DistillerPageFactory& distiller_page_factory, 59 const DistillerPageFactory& distiller_page_factory,
63 const DistillerURLFetcherFactory& distiller_url_fetcher_factory); 60 const DistillerURLFetcherFactory& distiller_url_fetcher_factory);
64 virtual ~DistillerImpl(); 61 virtual ~DistillerImpl();
65 62
66 // Creates an execution context. This must be called once before any calls are 63 // Creates an execution context. This must be called once before any calls are
67 // made to distill the page. 64 // made to distill the page.
68 virtual void Init(); 65 virtual void Init();
69 66
70 virtual void DistillPage(const GURL& url, 67 virtual void DistillPage(const GURL& url,
71 const DistillerCallback& callback) OVERRIDE; 68 const DistillerCallback& callback) OVERRIDE;
72 69
73 // PageDistillerContext::Delegate 70 private:
74 virtual void OnLoadURLDone() OVERRIDE; 71 void OnFetchImageDone(DistilledPageProto* distilled_page_proto,
75 virtual void OnExecuteJavaScriptDone(const base::Value* value) OVERRIDE; 72 const std::string& id,
73 const std::string& response);
76 74
77 void OnFetchImageDone(const std::string& id, const std::string& response); 75 void OnPageDistillationFinished(const GURL& page_url,
76 scoped_ptr<DistilledPageInfo> distilled_page,
77 bool distillation_successful);
78 78
79 private: 79 virtual void FetchImage(DistilledPageProto* distilled_page_proto,
80 virtual void LoadURL(const GURL& url); 80 const std::string& image_id,
81 virtual void FetchImage(const std::string& image_id, const std::string& item); 81 const std::string& item);
82 82
83 // Injects JavaScript to distill a loaded page down to its important content, 83 // Distills the page and adds the new page to |article_proto|.
84 // e.g., extracting a news article from its surrounding boilerplate. 84 void DistillPage(const GURL& url);
85 void GetDistilledContent();
86 85
87 const DistillerPageFactory& distiller_page_factory_; 86 // Runs |distillation_cb_| if all distillation callbacks and image fetches are
87 // complete.
88 void RunDistillerCallbackIfDone();
89
88 const DistillerURLFetcherFactory& distiller_url_fetcher_factory_; 90 const DistillerURLFetcherFactory& distiller_url_fetcher_factory_;
89 scoped_ptr<DistillerPage> distiller_page_; 91 scoped_ptr<PageDistiller> page_distiller_;
90 DistillerCallback distillation_cb_; 92 DistillerCallback distillation_cb_;
91 93
92 std::map<std::string, DistillerURLFetcher* > image_fetchers_; 94 base::hash_map<std::string, DistillerURLFetcher*> image_fetchers_;
93 95 scoped_ptr<DistilledArticleProto> article_proto_;
94 scoped_ptr<DistilledPageProto> proto_; 96 bool distillation_in_progress_;
97 // Set to keep track of which urls are already seen by the distiller.
98 base::hash_set<std::string> processed_urls_;
95 99
96 DISALLOW_COPY_AND_ASSIGN(DistillerImpl); 100 DISALLOW_COPY_AND_ASSIGN(DistillerImpl);
97 }; 101 };
98 102
99 } // namespace dom_distiller 103 } // namespace dom_distiller
100 104
101 #endif // COMPONENTS_DOM_DISTILLER_CORE_DISTILLER_H_ 105 #endif // COMPONENTS_DOM_DISTILLER_CORE_DISTILLER_H_
OLDNEW
« no previous file with comments | « components/dom_distiller/content/dom_distiller_viewer_source.cc ('k') | components/dom_distiller/core/distiller.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698