Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(881)

Side by Side Diff: components/dom_distiller/standalone/content_extractor_browsertest.cc

Issue 901793002: Add support for providing an external file for extracting content. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Moved injection to constructor Created 5 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright 2014 The Chromium Authors. All rights reserved. 1 // Copyright 2014 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include <sstream> 5 #include <sstream>
6 6
7 #include "base/command_line.h" 7 #include "base/command_line.h"
8 #include "base/files/file_path.h"
9 #include "base/files/file_util.h"
8 #include "base/files/scoped_temp_dir.h" 10 #include "base/files/scoped_temp_dir.h"
9 #include "base/id_map.h" 11 #include "base/id_map.h"
10 #include "base/message_loop/message_loop.h" 12 #include "base/message_loop/message_loop.h"
11 #include "base/path_service.h" 13 #include "base/path_service.h"
12 #include "base/run_loop.h" 14 #include "base/run_loop.h"
13 #include "base/strings/string_number_conversions.h" 15 #include "base/strings/string_number_conversions.h"
14 #include "base/strings/string_split.h" 16 #include "base/strings/string_split.h"
15 #include "components/dom_distiller/content/distiller_page_web_contents.h" 17 #include "components/dom_distiller/content/distiller_page_web_contents.h"
16 #include "components/dom_distiller/core/article_entry.h" 18 #include "components/dom_distiller/core/article_entry.h"
17 #include "components/dom_distiller/core/distilled_page_prefs.h" 19 #include "components/dom_distiller/core/distilled_page_prefs.h"
18 #include "components/dom_distiller/core/distiller.h" 20 #include "components/dom_distiller/core/distiller.h"
19 #include "components/dom_distiller/core/dom_distiller_service.h" 21 #include "components/dom_distiller/core/dom_distiller_service.h"
20 #include "components/dom_distiller/core/dom_distiller_store.h" 22 #include "components/dom_distiller/core/dom_distiller_store.h"
21 #include "components/dom_distiller/core/proto/distilled_article.pb.h" 23 #include "components/dom_distiller/core/proto/distilled_article.pb.h"
22 #include "components/dom_distiller/core/proto/distilled_page.pb.h" 24 #include "components/dom_distiller/core/proto/distilled_page.pb.h"
23 #include "components/dom_distiller/core/task_tracker.h" 25 #include "components/dom_distiller/core/task_tracker.h"
24 #include "components/leveldb_proto/proto_database.h" 26 #include "components/leveldb_proto/proto_database.h"
25 #include "components/leveldb_proto/proto_database_impl.h" 27 #include "components/leveldb_proto/proto_database_impl.h"
26 #include "components/pref_registry/testing_pref_service_syncable.h" 28 #include "components/pref_registry/testing_pref_service_syncable.h"
27 #include "content/public/browser/browser_context.h" 29 #include "content/public/browser/browser_context.h"
28 #include "content/public/browser/browser_thread.h" 30 #include "content/public/browser/browser_thread.h"
29 #include "content/public/test/content_browser_test.h" 31 #include "content/public/test/content_browser_test.h"
30 #include "content/shell/browser/shell.h" 32 #include "content/shell/browser/shell.h"
31 #include "google/protobuf/io/coded_stream.h" 33 #include "google/protobuf/io/coded_stream.h"
32 #include "google/protobuf/io/zero_copy_stream_impl_lite.h" 34 #include "google/protobuf/io/zero_copy_stream_impl_lite.h"
35 #include "grit/components_resources.h"
33 #include "net/dns/mock_host_resolver.h" 36 #include "net/dns/mock_host_resolver.h"
34 #include "third_party/dom_distiller_js/dom_distiller.pb.h" 37 #include "third_party/dom_distiller_js/dom_distiller.pb.h"
35 #include "ui/base/resource/resource_bundle.h" 38 #include "ui/base/resource/resource_bundle.h"
36 39
37 using content::ContentBrowserTest; 40 using content::ContentBrowserTest;
38 41
39 namespace dom_distiller { 42 namespace dom_distiller {
40 43
41 namespace { 44 namespace {
42 45
(...skipping 61 matching lines...) Expand 10 before | Expand all | Expand 10 after
104 // The original URL of the page if |kUrlSwitch| is a file. 107 // The original URL of the page if |kUrlSwitch| is a file.
105 const char* kOriginalUrl = "original-url"; 108 const char* kOriginalUrl = "original-url";
106 109
107 // A semi-colon-separated (i.e. ';') list of original URLs corresponding to 110 // A semi-colon-separated (i.e. ';') list of original URLs corresponding to
108 // "kUrlsSwitch". 111 // "kUrlsSwitch".
109 const char* kOriginalUrls = "original-urls"; 112 const char* kOriginalUrls = "original-urls";
110 113
111 // Maximum number of concurrent started extractor requests. 114 // Maximum number of concurrent started extractor requests.
112 const int kMaxExtractorTasks = 8; 115 const int kMaxExtractorTasks = 8;
113 116
117 // A path to a script for extracting content (domdistiller.js). If this argument
118 // is passed in, the script will be used instead of using the bundled version
119 // of the script.
120 const char* kExternalDomDistillerJs = "external-dom-distiller-js";
121
114 scoped_ptr<DomDistillerService> CreateDomDistillerService( 122 scoped_ptr<DomDistillerService> CreateDomDistillerService(
115 content::BrowserContext* context, 123 content::BrowserContext* context,
116 const base::FilePath& db_path, 124 const base::FilePath& db_path,
117 const FileToUrlMap& file_to_url_map) { 125 const FileToUrlMap& file_to_url_map) {
118 scoped_refptr<base::SequencedTaskRunner> background_task_runner = 126 scoped_refptr<base::SequencedTaskRunner> background_task_runner =
119 content::BrowserThread::GetBlockingPool()->GetSequencedTaskRunner( 127 content::BrowserThread::GetBlockingPool()->GetSequencedTaskRunner(
120 content::BrowserThread::GetBlockingPool()->GetSequenceToken()); 128 content::BrowserThread::GetBlockingPool()->GetSequenceToken());
121 129
122 // TODO(cjhopman): use an in-memory database instead of an on-disk one with 130 // TODO(cjhopman): use an in-memory database instead of an on-disk one with
123 // temporary directory. 131 // temporary directory.
124 scoped_ptr<leveldb_proto::ProtoDatabaseImpl<ArticleEntry> > db( 132 scoped_ptr<leveldb_proto::ProtoDatabaseImpl<ArticleEntry> > db(
125 new leveldb_proto::ProtoDatabaseImpl<ArticleEntry>( 133 new leveldb_proto::ProtoDatabaseImpl<ArticleEntry>(
126 background_task_runner)); 134 background_task_runner));
127 scoped_ptr<DomDistillerStore> dom_distiller_store( 135 scoped_ptr<DomDistillerStore> dom_distiller_store(
128 new DomDistillerStore(db.Pass(), db_path)); 136 new DomDistillerStore(db.Pass(), db_path));
129 137
130 scoped_ptr<DistillerPageFactory> distiller_page_factory( 138 scoped_ptr<DistillerPageFactory> distiller_page_factory;
131 new DistillerPageWebContentsFactory(context)); 139 if (base::CommandLine::ForCurrentProcess()->HasSwitch(
140 kExternalDomDistillerJs)) {
141 std::string external_script_path =
142 base::CommandLine::ForCurrentProcess()->GetSwitchValueASCII(
143 kExternalDomDistillerJs);
144 std::string script_content;
145 if (!base::ReadFileToString(base::FilePath(external_script_path),
146 &script_content)) {
147 ADD_FAILURE() << "Failed to read external script for distillation.";
148 return nullptr;
149 }
150 distiller_page_factory.reset(
151 new DistillerPageWebContentsFactory(context, script_content));
152 } else {
153 const std::string distiller_js_script =
154 ResourceBundle::GetSharedInstance()
155 .GetRawDataResource(IDR_DISTILLER_JS)
156 .as_string();
157 distiller_page_factory.reset(
158 new DistillerPageWebContentsFactory(context, distiller_js_script));
159 }
160
132 scoped_ptr<DistillerURLFetcherFactory> distiller_url_fetcher_factory( 161 scoped_ptr<DistillerURLFetcherFactory> distiller_url_fetcher_factory(
133 new DistillerURLFetcherFactory(context->GetRequestContext())); 162 new DistillerURLFetcherFactory(context->GetRequestContext()));
134 163
135 dom_distiller::proto::DomDistillerOptions options; 164 dom_distiller::proto::DomDistillerOptions options;
136 if (base::CommandLine::ForCurrentProcess()->HasSwitch(kExtractTextOnly)) { 165 if (base::CommandLine::ForCurrentProcess()->HasSwitch(kExtractTextOnly)) {
137 options.set_extract_text_only(true); 166 options.set_extract_text_only(true);
138 } 167 }
139 int debug_level = 0; 168 int debug_level = 0;
140 if (base::CommandLine::ForCurrentProcess()->HasSwitch(kDebugLevel) && 169 if (base::CommandLine::ForCurrentProcess()->HasSwitch(kDebugLevel) &&
141 base::StringToInt( 170 base::StringToInt(
(...skipping 271 matching lines...) Expand 10 before | Expand all | Expand 10 after
413 std::string output_data_; 442 std::string output_data_;
414 scoped_ptr<google::protobuf::io::StringOutputStream> protobuf_output_stream_; 443 scoped_ptr<google::protobuf::io::StringOutputStream> protobuf_output_stream_;
415 }; 444 };
416 445
417 IN_PROC_BROWSER_TEST_F(ContentExtractor, MANUAL_ExtractUrl) { 446 IN_PROC_BROWSER_TEST_F(ContentExtractor, MANUAL_ExtractUrl) {
418 Start(); 447 Start();
419 base::RunLoop().Run(); 448 base::RunLoop().Run();
420 } 449 }
421 450
422 } // namespace dom_distiller 451 } // namespace dom_distiller
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698