| OLD | NEW |
| 1 // Copyright 2013 The Chromium Authors. All rights reserved. | 1 // Copyright 2013 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #ifndef COMPONENTS_DOM_DISTILLER_CORE_DISTILLER_H_ | 5 #ifndef COMPONENTS_DOM_DISTILLER_CORE_DISTILLER_H_ |
| 6 #define COMPONENTS_DOM_DISTILLER_CORE_DISTILLER_H_ | 6 #define COMPONENTS_DOM_DISTILLER_CORE_DISTILLER_H_ |
| 7 | 7 |
| 8 #include <map> | |
| 9 #include <string> | 8 #include <string> |
| 10 | 9 |
| 11 #include "base/callback.h" | 10 #include "base/callback.h" |
| 12 #include "base/gtest_prod_util.h" | 11 #include "base/containers/hash_tables.h" |
| 13 #include "base/memory/ref_counted.h" | 12 #include "base/memory/scoped_ptr.h" |
| 14 #include "base/values.h" | |
| 15 #include "components/dom_distiller/core/distiller_page.h" | |
| 16 #include "components/dom_distiller/core/distiller_url_fetcher.h" | 13 #include "components/dom_distiller/core/distiller_url_fetcher.h" |
| 17 #include "components/dom_distiller/core/proto/distilled_page.pb.h" | 14 #include "components/dom_distiller/core/page_distiller.h" |
| 15 #include "components/dom_distiller/core/proto/distilled_article.pb.h" |
| 18 #include "net/url_request/url_request_context_getter.h" | 16 #include "net/url_request/url_request_context_getter.h" |
| 19 #include "url/gurl.h" | 17 #include "url/gurl.h" |
| 20 | 18 |
| 21 namespace dom_distiller { | 19 namespace dom_distiller { |
| 22 | 20 |
| 23 class DistillerImpl; | 21 class DistillerImpl; |
| 24 | 22 |
| 25 class Distiller { | 23 class Distiller { |
| 26 public: | 24 public: |
| 27 typedef base::Callback<void( | 25 typedef base::Callback<void(scoped_ptr<DistilledArticleProto>)> |
| 28 scoped_ptr<DistilledPageProto>)> DistillerCallback; | 26 DistillerCallback; |
| 29 virtual ~Distiller() {} | 27 virtual ~Distiller() {} |
| 30 | 28 |
| 31 // Distills a page, and asynchrounously returns the article HTML to the | 29 // Distills a page, and asynchrounously returns the article HTML to the |
| 32 // supplied callback. | 30 // supplied callback. |
| 33 virtual void DistillPage(const GURL& url, | 31 virtual void DistillPage(const GURL& url, |
| 34 const DistillerCallback& callback) = 0; | 32 const DistillerCallback& callback) = 0; |
| 35 }; | 33 }; |
| 36 | 34 |
| 37 class DistillerFactory { | 35 class DistillerFactory { |
| 38 public: | 36 public: |
| 39 virtual scoped_ptr<Distiller> CreateDistiller() = 0; | 37 virtual scoped_ptr<Distiller> CreateDistiller() = 0; |
| 40 virtual ~DistillerFactory() {} | 38 virtual ~DistillerFactory() {} |
| 41 }; | 39 }; |
| 42 | 40 |
| 43 // Factory for creating a Distiller. | 41 // Factory for creating a Distiller. |
| 44 class DistillerFactoryImpl : public DistillerFactory { | 42 class DistillerFactoryImpl : public DistillerFactory { |
| 45 public: | 43 public: |
| 46 DistillerFactoryImpl( | 44 DistillerFactoryImpl( |
| 47 scoped_ptr<DistillerPageFactory> distiller_page_factory, | 45 scoped_ptr<DistillerPageFactory> distiller_page_factory, |
| 48 scoped_ptr<DistillerURLFetcherFactory> distiller_url_fetcher_factory); | 46 scoped_ptr<DistillerURLFetcherFactory> distiller_url_fetcher_factory); |
| 49 virtual ~DistillerFactoryImpl(); | 47 virtual ~DistillerFactoryImpl(); |
| 50 virtual scoped_ptr<Distiller> CreateDistiller() OVERRIDE; | 48 virtual scoped_ptr<Distiller> CreateDistiller() OVERRIDE; |
| 51 | 49 |
| 52 private: | 50 private: |
| 53 scoped_ptr<DistillerPageFactory> distiller_page_factory_; | 51 scoped_ptr<DistillerPageFactory> distiller_page_factory_; |
| 54 scoped_ptr<DistillerURLFetcherFactory> distiller_url_fetcher_factory_; | 52 scoped_ptr<DistillerURLFetcherFactory> distiller_url_fetcher_factory_; |
| 55 }; | 53 }; |
| 56 | 54 |
| 57 // Distills a article from a page and associated pages. | 55 // Distills a article from a page and associated pages. |
| 58 class DistillerImpl : public Distiller, | 56 class DistillerImpl : public Distiller { |
| 59 public DistillerPage::Delegate { | |
| 60 public: | 57 public: |
| 61 DistillerImpl( | 58 DistillerImpl( |
| 62 const DistillerPageFactory& distiller_page_factory, | 59 const DistillerPageFactory& distiller_page_factory, |
| 63 const DistillerURLFetcherFactory& distiller_url_fetcher_factory); | 60 const DistillerURLFetcherFactory& distiller_url_fetcher_factory); |
| 64 virtual ~DistillerImpl(); | 61 virtual ~DistillerImpl(); |
| 65 | 62 |
| 66 // Creates an execution context. This must be called once before any calls are | 63 // Creates an execution context. This must be called once before any calls are |
| 67 // made to distill the page. | 64 // made to distill the page. |
| 68 virtual void Init(); | 65 virtual void Init(); |
| 69 | 66 |
| 70 virtual void DistillPage(const GURL& url, | 67 virtual void DistillPage(const GURL& url, |
| 71 const DistillerCallback& callback) OVERRIDE; | 68 const DistillerCallback& callback) OVERRIDE; |
| 72 | 69 |
| 73 // PageDistillerContext::Delegate | 70 private: |
| 74 virtual void OnLoadURLDone() OVERRIDE; | 71 void OnFetchImageDone(DistilledPageProto* distilled_page_proto, |
| 75 virtual void OnExecuteJavaScriptDone(const base::Value* value) OVERRIDE; | 72 const std::string& id, |
| 73 const std::string& response); |
| 76 | 74 |
| 77 void OnFetchImageDone(const std::string& id, const std::string& response); | 75 void OnPageDistillationFinished(const GURL& page_url, |
| 76 scoped_ptr<DistilledPageInfo> distilled_page, |
| 77 bool distillation_successful); |
| 78 | 78 |
| 79 private: | 79 virtual void FetchImage(DistilledPageProto* distilled_page_proto, |
| 80 virtual void LoadURL(const GURL& url); | 80 const std::string& image_id, |
| 81 virtual void FetchImage(const std::string& image_id, const std::string& item); | 81 const std::string& item); |
| 82 | 82 |
| 83 // Injects JavaScript to distill a loaded page down to its important content, | 83 // Distills the page and adds the new page to |article_proto|. |
| 84 // e.g., extracting a news article from its surrounding boilerplate. | 84 void DistillPage(const GURL& url); |
| 85 void GetDistilledContent(); | |
| 86 | 85 |
| 87 const DistillerPageFactory& distiller_page_factory_; | 86 // Runs |distillation_cb_| if all distillation callbacks and image fetches are |
| 87 // complete. |
| 88 void RunDistillerCallbackIfDone(); |
| 89 |
| 88 const DistillerURLFetcherFactory& distiller_url_fetcher_factory_; | 90 const DistillerURLFetcherFactory& distiller_url_fetcher_factory_; |
| 89 scoped_ptr<DistillerPage> distiller_page_; | 91 scoped_ptr<PageDistiller> page_distiller_; |
| 90 DistillerCallback distillation_cb_; | 92 DistillerCallback distillation_cb_; |
| 91 | 93 |
| 92 std::map<std::string, DistillerURLFetcher* > image_fetchers_; | 94 base::hash_map<std::string, DistillerURLFetcher*> image_fetchers_; |
| 93 | 95 scoped_ptr<DistilledArticleProto> article_proto_; |
| 94 scoped_ptr<DistilledPageProto> proto_; | 96 bool distillation_in_progress_; |
| 97 // Set to keep track of which urls are already seen by the distiller. |
| 98 base::hash_set<std::string> processed_urls_; |
| 95 | 99 |
| 96 DISALLOW_COPY_AND_ASSIGN(DistillerImpl); | 100 DISALLOW_COPY_AND_ASSIGN(DistillerImpl); |
| 97 }; | 101 }; |
| 98 | 102 |
| 99 } // namespace dom_distiller | 103 } // namespace dom_distiller |
| 100 | 104 |
| 101 #endif // COMPONENTS_DOM_DISTILLER_CORE_DISTILLER_H_ | 105 #endif // COMPONENTS_DOM_DISTILLER_CORE_DISTILLER_H_ |
| OLD | NEW |