OLD | NEW |
1 // Copyright 2014 The Chromium Authors. All rights reserved. | 1 // Copyright 2014 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include <sstream> | 5 #include <sstream> |
6 | 6 |
7 #include "base/command_line.h" | 7 #include "base/command_line.h" |
| 8 #include "base/files/file_path.h" |
| 9 #include "base/files/file_util.h" |
8 #include "base/files/scoped_temp_dir.h" | 10 #include "base/files/scoped_temp_dir.h" |
9 #include "base/id_map.h" | 11 #include "base/id_map.h" |
10 #include "base/message_loop/message_loop.h" | 12 #include "base/message_loop/message_loop.h" |
11 #include "base/path_service.h" | 13 #include "base/path_service.h" |
12 #include "base/run_loop.h" | 14 #include "base/run_loop.h" |
13 #include "base/strings/string_number_conversions.h" | 15 #include "base/strings/string_number_conversions.h" |
14 #include "base/strings/string_split.h" | 16 #include "base/strings/string_split.h" |
15 #include "components/dom_distiller/content/distiller_page_web_contents.h" | 17 #include "components/dom_distiller/content/distiller_page_web_contents.h" |
16 #include "components/dom_distiller/core/article_entry.h" | 18 #include "components/dom_distiller/core/article_entry.h" |
17 #include "components/dom_distiller/core/distilled_page_prefs.h" | 19 #include "components/dom_distiller/core/distilled_page_prefs.h" |
18 #include "components/dom_distiller/core/distiller.h" | 20 #include "components/dom_distiller/core/distiller.h" |
19 #include "components/dom_distiller/core/dom_distiller_service.h" | 21 #include "components/dom_distiller/core/dom_distiller_service.h" |
20 #include "components/dom_distiller/core/dom_distiller_store.h" | 22 #include "components/dom_distiller/core/dom_distiller_store.h" |
21 #include "components/dom_distiller/core/proto/distilled_article.pb.h" | 23 #include "components/dom_distiller/core/proto/distilled_article.pb.h" |
22 #include "components/dom_distiller/core/proto/distilled_page.pb.h" | 24 #include "components/dom_distiller/core/proto/distilled_page.pb.h" |
23 #include "components/dom_distiller/core/task_tracker.h" | 25 #include "components/dom_distiller/core/task_tracker.h" |
24 #include "components/leveldb_proto/proto_database.h" | 26 #include "components/leveldb_proto/proto_database.h" |
25 #include "components/leveldb_proto/proto_database_impl.h" | 27 #include "components/leveldb_proto/proto_database_impl.h" |
26 #include "components/pref_registry/testing_pref_service_syncable.h" | 28 #include "components/pref_registry/testing_pref_service_syncable.h" |
27 #include "content/public/browser/browser_context.h" | 29 #include "content/public/browser/browser_context.h" |
28 #include "content/public/browser/browser_thread.h" | 30 #include "content/public/browser/browser_thread.h" |
29 #include "content/public/test/content_browser_test.h" | 31 #include "content/public/test/content_browser_test.h" |
30 #include "content/shell/browser/shell.h" | 32 #include "content/shell/browser/shell.h" |
31 #include "google/protobuf/io/coded_stream.h" | 33 #include "google/protobuf/io/coded_stream.h" |
32 #include "google/protobuf/io/zero_copy_stream_impl_lite.h" | 34 #include "google/protobuf/io/zero_copy_stream_impl_lite.h" |
| 35 #include "grit/components_resources.h" |
33 #include "net/dns/mock_host_resolver.h" | 36 #include "net/dns/mock_host_resolver.h" |
34 #include "third_party/dom_distiller_js/dom_distiller.pb.h" | 37 #include "third_party/dom_distiller_js/dom_distiller.pb.h" |
35 #include "ui/base/resource/resource_bundle.h" | 38 #include "ui/base/resource/resource_bundle.h" |
36 | 39 |
37 using content::ContentBrowserTest; | 40 using content::ContentBrowserTest; |
38 | 41 |
39 namespace dom_distiller { | 42 namespace dom_distiller { |
40 | 43 |
41 namespace { | 44 namespace { |
42 | 45 |
(...skipping 61 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
104 // The original URL of the page if |kUrlSwitch| is a file. | 107 // The original URL of the page if |kUrlSwitch| is a file. |
105 const char* kOriginalUrl = "original-url"; | 108 const char* kOriginalUrl = "original-url"; |
106 | 109 |
107 // A semi-colon-separated (i.e. ';') list of original URLs corresponding to | 110 // A semi-colon-separated (i.e. ';') list of original URLs corresponding to |
108 // "kUrlsSwitch". | 111 // "kUrlsSwitch". |
109 const char* kOriginalUrls = "original-urls"; | 112 const char* kOriginalUrls = "original-urls"; |
110 | 113 |
111 // Maximum number of concurrent started extractor requests. | 114 // Maximum number of concurrent started extractor requests. |
112 const int kMaxExtractorTasks = 8; | 115 const int kMaxExtractorTasks = 8; |
113 | 116 |
| 117 // A path to a script for extracting content (domdistiller.js). If this argument |
| 118 // is passed in, the script will be used instead of using the bundled version |
| 119 // of the script. |
| 120 const char* kExternalDomDistillerJs = "external-dom-distiller-js"; |
| 121 |
114 scoped_ptr<DomDistillerService> CreateDomDistillerService( | 122 scoped_ptr<DomDistillerService> CreateDomDistillerService( |
115 content::BrowserContext* context, | 123 content::BrowserContext* context, |
116 const base::FilePath& db_path, | 124 const base::FilePath& db_path, |
117 const FileToUrlMap& file_to_url_map) { | 125 const FileToUrlMap& file_to_url_map) { |
118 scoped_refptr<base::SequencedTaskRunner> background_task_runner = | 126 scoped_refptr<base::SequencedTaskRunner> background_task_runner = |
119 content::BrowserThread::GetBlockingPool()->GetSequencedTaskRunner( | 127 content::BrowserThread::GetBlockingPool()->GetSequencedTaskRunner( |
120 content::BrowserThread::GetBlockingPool()->GetSequenceToken()); | 128 content::BrowserThread::GetBlockingPool()->GetSequenceToken()); |
121 | 129 |
122 // TODO(cjhopman): use an in-memory database instead of an on-disk one with | 130 // TODO(cjhopman): use an in-memory database instead of an on-disk one with |
123 // temporary directory. | 131 // temporary directory. |
124 scoped_ptr<leveldb_proto::ProtoDatabaseImpl<ArticleEntry> > db( | 132 scoped_ptr<leveldb_proto::ProtoDatabaseImpl<ArticleEntry> > db( |
125 new leveldb_proto::ProtoDatabaseImpl<ArticleEntry>( | 133 new leveldb_proto::ProtoDatabaseImpl<ArticleEntry>( |
126 background_task_runner)); | 134 background_task_runner)); |
127 scoped_ptr<DomDistillerStore> dom_distiller_store( | 135 scoped_ptr<DomDistillerStore> dom_distiller_store( |
128 new DomDistillerStore(db.Pass(), db_path)); | 136 new DomDistillerStore(db.Pass(), db_path)); |
129 | 137 |
130 scoped_ptr<DistillerPageFactory> distiller_page_factory( | 138 scoped_ptr<DistillerPageFactory> distiller_page_factory; |
131 new DistillerPageWebContentsFactory(context)); | 139 if (base::CommandLine::ForCurrentProcess()->HasSwitch( |
| 140 kExternalDomDistillerJs)) { |
| 141 std::string external_script_path = |
| 142 base::CommandLine::ForCurrentProcess()->GetSwitchValueASCII( |
| 143 kExternalDomDistillerJs); |
| 144 std::string script_content; |
| 145 if (!base::ReadFileToString(base::FilePath(external_script_path), |
| 146 &script_content)) { |
| 147 ADD_FAILURE() << "Failed to read external script for distillation."; |
| 148 return nullptr; |
| 149 } |
| 150 distiller_page_factory.reset( |
| 151 new DistillerPageWebContentsFactory(context, script_content)); |
| 152 } else { |
| 153 const std::string distiller_js_script = |
| 154 ResourceBundle::GetSharedInstance() |
| 155 .GetRawDataResource(IDR_DISTILLER_JS) |
| 156 .as_string(); |
| 157 distiller_page_factory.reset( |
| 158 new DistillerPageWebContentsFactory(context, distiller_js_script)); |
| 159 } |
| 160 |
132 scoped_ptr<DistillerURLFetcherFactory> distiller_url_fetcher_factory( | 161 scoped_ptr<DistillerURLFetcherFactory> distiller_url_fetcher_factory( |
133 new DistillerURLFetcherFactory(context->GetRequestContext())); | 162 new DistillerURLFetcherFactory(context->GetRequestContext())); |
134 | 163 |
135 dom_distiller::proto::DomDistillerOptions options; | 164 dom_distiller::proto::DomDistillerOptions options; |
136 if (base::CommandLine::ForCurrentProcess()->HasSwitch(kExtractTextOnly)) { | 165 if (base::CommandLine::ForCurrentProcess()->HasSwitch(kExtractTextOnly)) { |
137 options.set_extract_text_only(true); | 166 options.set_extract_text_only(true); |
138 } | 167 } |
139 int debug_level = 0; | 168 int debug_level = 0; |
140 if (base::CommandLine::ForCurrentProcess()->HasSwitch(kDebugLevel) && | 169 if (base::CommandLine::ForCurrentProcess()->HasSwitch(kDebugLevel) && |
141 base::StringToInt( | 170 base::StringToInt( |
(...skipping 271 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
413 std::string output_data_; | 442 std::string output_data_; |
414 scoped_ptr<google::protobuf::io::StringOutputStream> protobuf_output_stream_; | 443 scoped_ptr<google::protobuf::io::StringOutputStream> protobuf_output_stream_; |
415 }; | 444 }; |
416 | 445 |
417 IN_PROC_BROWSER_TEST_F(ContentExtractor, MANUAL_ExtractUrl) { | 446 IN_PROC_BROWSER_TEST_F(ContentExtractor, MANUAL_ExtractUrl) { |
418 Start(); | 447 Start(); |
419 base::RunLoop().Run(); | 448 base::RunLoop().Run(); |
420 } | 449 } |
421 | 450 |
422 } // namespace dom_distiller | 451 } // namespace dom_distiller |
OLD | NEW |