OLD | NEW |
1 // Copyright 2014 The Chromium Authors. All rights reserved. | 1 // Copyright 2014 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include <sstream> | 5 #include <sstream> |
6 | 6 |
7 #include "base/command_line.h" | 7 #include "base/command_line.h" |
| 8 #include "base/files/file_path.h" |
| 9 #include "base/files/file_util.h" |
8 #include "base/files/scoped_temp_dir.h" | 10 #include "base/files/scoped_temp_dir.h" |
9 #include "base/id_map.h" | 11 #include "base/id_map.h" |
10 #include "base/message_loop/message_loop.h" | 12 #include "base/message_loop/message_loop.h" |
11 #include "base/path_service.h" | 13 #include "base/path_service.h" |
12 #include "base/run_loop.h" | 14 #include "base/run_loop.h" |
13 #include "base/strings/string_number_conversions.h" | 15 #include "base/strings/string_number_conversions.h" |
14 #include "base/strings/string_split.h" | 16 #include "base/strings/string_split.h" |
15 #include "components/dom_distiller/content/distiller_page_web_contents.h" | 17 #include "components/dom_distiller/content/distiller_page_web_contents.h" |
16 #include "components/dom_distiller/core/article_entry.h" | 18 #include "components/dom_distiller/core/article_entry.h" |
17 #include "components/dom_distiller/core/distilled_page_prefs.h" | 19 #include "components/dom_distiller/core/distilled_page_prefs.h" |
(...skipping 84 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
102 // The original domain of the page if |kUrlSwitch| is a file. | 104 // The original domain of the page if |kUrlSwitch| is a file. |
103 const char* kOriginalDomain = "original-domain"; | 105 const char* kOriginalDomain = "original-domain"; |
104 | 106 |
105 // A semi-colon-separated (i.e. ';') list of original domains corresponding to | 107 // A semi-colon-separated (i.e. ';') list of original domains corresponding to |
106 // "kUrlsSwitch". | 108 // "kUrlsSwitch". |
107 const char* kOriginalDomains = "original-domains"; | 109 const char* kOriginalDomains = "original-domains"; |
108 | 110 |
109 // Maximum number of concurrent started extractor requests. | 111 // Maximum number of concurrent started extractor requests. |
110 const int kMaxExtractorTasks = 8; | 112 const int kMaxExtractorTasks = 8; |
111 | 113 |
| 114 // A path to a script for extracting content (domdistiller.js). If this argument |
| 115 // is passed in, the script will be used instead of using the bundled version |
| 116 // of the script. |
| 117 const char* kExternalDomDistillerJs = "external-dom-distiller-js"; |
| 118 |
112 scoped_ptr<DomDistillerService> CreateDomDistillerService( | 119 scoped_ptr<DomDistillerService> CreateDomDistillerService( |
113 content::BrowserContext* context, | 120 content::BrowserContext* context, |
114 const base::FilePath& db_path, | 121 const base::FilePath& db_path, |
115 const UrlToDomainMap& url_to_domain_map) { | 122 const UrlToDomainMap& url_to_domain_map) { |
116 scoped_refptr<base::SequencedTaskRunner> background_task_runner = | 123 scoped_refptr<base::SequencedTaskRunner> background_task_runner = |
117 content::BrowserThread::GetBlockingPool()->GetSequencedTaskRunner( | 124 content::BrowserThread::GetBlockingPool()->GetSequencedTaskRunner( |
118 content::BrowserThread::GetBlockingPool()->GetSequenceToken()); | 125 content::BrowserThread::GetBlockingPool()->GetSequenceToken()); |
119 | 126 |
120 // TODO(cjhopman): use an in-memory database instead of an on-disk one with | 127 // TODO(cjhopman): use an in-memory database instead of an on-disk one with |
121 // temporary directory. | 128 // temporary directory. |
122 scoped_ptr<leveldb_proto::ProtoDatabaseImpl<ArticleEntry> > db( | 129 scoped_ptr<leveldb_proto::ProtoDatabaseImpl<ArticleEntry> > db( |
123 new leveldb_proto::ProtoDatabaseImpl<ArticleEntry>( | 130 new leveldb_proto::ProtoDatabaseImpl<ArticleEntry>( |
124 background_task_runner)); | 131 background_task_runner)); |
125 scoped_ptr<DomDistillerStore> dom_distiller_store( | 132 scoped_ptr<DomDistillerStore> dom_distiller_store( |
126 new DomDistillerStore(db.Pass(), db_path)); | 133 new DomDistillerStore(db.Pass(), db_path)); |
127 | 134 |
128 scoped_ptr<DistillerPageFactory> distiller_page_factory( | 135 scoped_ptr<DistillerPageFactory> distiller_page_factory; |
129 new DistillerPageWebContentsFactory(context)); | 136 if (base::CommandLine::ForCurrentProcess()->HasSwitch( |
| 137 kExternalDomDistillerJs)) { |
| 138 std::string external_script_path = |
| 139 base::CommandLine::ForCurrentProcess()->GetSwitchValueASCII( |
| 140 kExternalDomDistillerJs); |
| 141 std::string script_content; |
| 142 if (!base::ReadFileToString(base::FilePath(external_script_path), |
| 143 &script_content)) { |
| 144 ADD_FAILURE() << "Failed to read external script for distillation."; |
| 145 return nullptr; |
| 146 } |
| 147 distiller_page_factory.reset( |
| 148 new DistillerPageWebContentsFactory(context, script_content)); |
| 149 } else { |
| 150 distiller_page_factory.reset(new DistillerPageWebContentsFactory(context)); |
| 151 } |
| 152 |
130 scoped_ptr<DistillerURLFetcherFactory> distiller_url_fetcher_factory( | 153 scoped_ptr<DistillerURLFetcherFactory> distiller_url_fetcher_factory( |
131 new DistillerURLFetcherFactory(context->GetRequestContext())); | 154 new DistillerURLFetcherFactory(context->GetRequestContext())); |
132 | 155 |
133 dom_distiller::proto::DomDistillerOptions options; | 156 dom_distiller::proto::DomDistillerOptions options; |
134 if (base::CommandLine::ForCurrentProcess()->HasSwitch(kExtractTextOnly)) { | 157 if (base::CommandLine::ForCurrentProcess()->HasSwitch(kExtractTextOnly)) { |
135 options.set_extract_text_only(true); | 158 options.set_extract_text_only(true); |
136 } | 159 } |
137 int debug_level = 0; | 160 int debug_level = 0; |
138 if (base::CommandLine::ForCurrentProcess()->HasSwitch(kDebugLevel) && | 161 if (base::CommandLine::ForCurrentProcess()->HasSwitch(kDebugLevel) && |
139 base::StringToInt( | 162 base::StringToInt( |
(...skipping 269 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
409 std::string output_data_; | 432 std::string output_data_; |
410 scoped_ptr<google::protobuf::io::StringOutputStream> protobuf_output_stream_; | 433 scoped_ptr<google::protobuf::io::StringOutputStream> protobuf_output_stream_; |
411 }; | 434 }; |
412 | 435 |
413 IN_PROC_BROWSER_TEST_F(ContentExtractor, MANUAL_ExtractUrl) { | 436 IN_PROC_BROWSER_TEST_F(ContentExtractor, MANUAL_ExtractUrl) { |
414 Start(); | 437 Start(); |
415 base::RunLoop().Run(); | 438 base::RunLoop().Run(); |
416 } | 439 } |
417 | 440 |
418 } // namespace dom_distiller | 441 } // namespace dom_distiller |
OLD | NEW |