| OLD | NEW |
| 1 // Copyright 2014 The Chromium Authors. All rights reserved. | 1 // Copyright 2014 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include <sstream> | 5 #include <sstream> |
| 6 | 6 |
| 7 #include "base/command_line.h" | 7 #include "base/command_line.h" |
| 8 #include "base/files/scoped_temp_dir.h" | 8 #include "base/files/scoped_temp_dir.h" |
| 9 #include "base/id_map.h" | 9 #include "base/id_map.h" |
| 10 #include "base/message_loop/message_loop.h" | 10 #include "base/message_loop/message_loop.h" |
| (...skipping 207 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 218 service->ViewUrl(this, | 218 service->ViewUrl(this, |
| 219 service->CreateDefaultDistillerPage(render_view_size), | 219 service->CreateDefaultDistillerPage(render_view_size), |
| 220 url_); | 220 url_); |
| 221 } | 221 } |
| 222 | 222 |
| 223 DistilledArticleProto GetArticleCopy() { | 223 DistilledArticleProto GetArticleCopy() { |
| 224 return *article_proto_; | 224 return *article_proto_; |
| 225 } | 225 } |
| 226 | 226 |
| 227 static ScopedVector<ContentExtractionRequest> CreateForCommandLine( | 227 static ScopedVector<ContentExtractionRequest> CreateForCommandLine( |
| 228 const CommandLine& command_line, | 228 const base::CommandLine& command_line, |
| 229 UrlToDomainMap* url_to_domain_map) { | 229 UrlToDomainMap* url_to_domain_map) { |
| 230 ScopedVector<ContentExtractionRequest> requests; | 230 ScopedVector<ContentExtractionRequest> requests; |
| 231 if (command_line.HasSwitch(kUrlSwitch)) { | 231 if (command_line.HasSwitch(kUrlSwitch)) { |
| 232 GURL url; | 232 GURL url; |
| 233 std::string url_string = command_line.GetSwitchValueASCII(kUrlSwitch); | 233 std::string url_string = command_line.GetSwitchValueASCII(kUrlSwitch); |
| 234 url = GURL(url_string); | 234 url = GURL(url_string); |
| 235 if (url.is_valid()) { | 235 if (url.is_valid()) { |
| 236 requests.push_back(new ContentExtractionRequest(url)); | 236 requests.push_back(new ContentExtractionRequest(url)); |
| 237 if (command_line.HasSwitch(kOriginalDomain)) { | 237 if (command_line.HasSwitch(kOriginalDomain)) { |
| 238 (*url_to_domain_map)[url.spec()] = | 238 (*url_to_domain_map)[url.spec()] = |
| (...skipping 58 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 297 : pending_tasks_(0), | 297 : pending_tasks_(0), |
| 298 max_tasks_(kMaxExtractorTasks), | 298 max_tasks_(kMaxExtractorTasks), |
| 299 next_request_(0), | 299 next_request_(0), |
| 300 output_data_(), | 300 output_data_(), |
| 301 protobuf_output_stream_( | 301 protobuf_output_stream_( |
| 302 new google::protobuf::io::StringOutputStream(&output_data_)) {} | 302 new google::protobuf::io::StringOutputStream(&output_data_)) {} |
| 303 | 303 |
| 304 // Change behavior of the default host resolver to avoid DNS lookup errors, so | 304 // Change behavior of the default host resolver to avoid DNS lookup errors, so |
| 305 // we can make network calls. | 305 // we can make network calls. |
| 306 void SetUpOnMainThread() override { | 306 void SetUpOnMainThread() override { |
| 307 if (!CommandLine::ForCurrentProcess()->HasSwitch(kDisableDnsSwitch)) { | 307 if (!base::CommandLine::ForCurrentProcess()->HasSwitch(kDisableDnsSwitch)) { |
| 308 EnableDNSLookupForThisTest(); | 308 EnableDNSLookupForThisTest(); |
| 309 } | 309 } |
| 310 CHECK(db_dir_.CreateUniqueTempDir()); | 310 CHECK(db_dir_.CreateUniqueTempDir()); |
| 311 AddComponentsResources(); | 311 AddComponentsResources(); |
| 312 } | 312 } |
| 313 | 313 |
| 314 void TearDownOnMainThread() override { DisableDNSLookupForThisTest(); } | 314 void TearDownOnMainThread() override { DisableDNSLookupForThisTest(); } |
| 315 | 315 |
| 316 protected: | 316 protected: |
| 317 // Creates the DomDistillerService and creates and starts the extraction | 317 // Creates the DomDistillerService and creates and starts the extraction |
| 318 // request. | 318 // request. |
| 319 void Start() { | 319 void Start() { |
| 320 const CommandLine& command_line = *CommandLine::ForCurrentProcess(); | 320 const base::CommandLine& command_line = |
| 321 *base::CommandLine::ForCurrentProcess(); |
| 321 UrlToDomainMap url_to_domain_map; | 322 UrlToDomainMap url_to_domain_map; |
| 322 requests_ = ContentExtractionRequest::CreateForCommandLine( | 323 requests_ = ContentExtractionRequest::CreateForCommandLine( |
| 323 command_line, &url_to_domain_map); | 324 command_line, &url_to_domain_map); |
| 324 content::BrowserContext* context = | 325 content::BrowserContext* context = |
| 325 shell()->web_contents()->GetBrowserContext(); | 326 shell()->web_contents()->GetBrowserContext(); |
| 326 service_ = CreateDomDistillerService(context, | 327 service_ = CreateDomDistillerService(context, |
| 327 db_dir_.path(), | 328 db_dir_.path(), |
| 328 url_to_domain_map); | 329 url_to_domain_map); |
| 329 PumpQueue(); | 330 PumpQueue(); |
| 330 } | 331 } |
| (...skipping 29 matching lines...) Expand all Loading... |
| 360 void FinishRequest() { | 361 void FinishRequest() { |
| 361 --pending_tasks_; | 362 --pending_tasks_; |
| 362 if (next_request_ == requests_.size() && pending_tasks_ == 0) { | 363 if (next_request_ == requests_.size() && pending_tasks_ == 0) { |
| 363 Finish(); | 364 Finish(); |
| 364 } else { | 365 } else { |
| 365 PumpQueue(); | 366 PumpQueue(); |
| 366 } | 367 } |
| 367 } | 368 } |
| 368 | 369 |
| 369 void DoArticleOutput() { | 370 void DoArticleOutput() { |
| 371 const base::CommandLine& command_line = |
| 372 *base::CommandLine::ForCurrentProcess(); |
| 370 for (size_t i = 0; i < requests_.size(); ++i) { | 373 for (size_t i = 0; i < requests_.size(); ++i) { |
| 371 const DistilledArticleProto& article = requests_[i]->GetArticleCopy(); | 374 const DistilledArticleProto& article = requests_[i]->GetArticleCopy(); |
| 372 if (CommandLine::ForCurrentProcess()->HasSwitch(kShouldOutputBinary)) { | 375 if (command_line.HasSwitch(kShouldOutputBinary)) { |
| 373 WriteProtobufWithSize(article, protobuf_output_stream_.get()); | 376 WriteProtobufWithSize(article, protobuf_output_stream_.get()); |
| 374 } else { | 377 } else { |
| 375 output_data_ += GetReadableArticleString(article) + "\n"; | 378 output_data_ += GetReadableArticleString(article) + "\n"; |
| 376 } | 379 } |
| 377 } | 380 } |
| 378 | 381 |
| 379 if (CommandLine::ForCurrentProcess()->HasSwitch(kOutputFile)) { | 382 if (command_line.HasSwitch(kOutputFile)) { |
| 380 base::FilePath filename = | 383 base::FilePath filename = command_line.GetSwitchValuePath(kOutputFile); |
| 381 CommandLine::ForCurrentProcess()->GetSwitchValuePath(kOutputFile); | |
| 382 ASSERT_EQ( | 384 ASSERT_EQ( |
| 383 (int)output_data_.size(), | 385 (int)output_data_.size(), |
| 384 base::WriteFile(filename, output_data_.c_str(), output_data_.size())); | 386 base::WriteFile(filename, output_data_.c_str(), output_data_.size())); |
| 385 } else { | 387 } else { |
| 386 VLOG(0) << output_data_; | 388 VLOG(0) << output_data_; |
| 387 } | 389 } |
| 388 } | 390 } |
| 389 | 391 |
| 390 void Finish() { | 392 void Finish() { |
| 391 DoArticleOutput(); | 393 DoArticleOutput(); |
| (...skipping 15 matching lines...) Expand all Loading... |
| 407 std::string output_data_; | 409 std::string output_data_; |
| 408 scoped_ptr<google::protobuf::io::StringOutputStream> protobuf_output_stream_; | 410 scoped_ptr<google::protobuf::io::StringOutputStream> protobuf_output_stream_; |
| 409 }; | 411 }; |
| 410 | 412 |
| 411 IN_PROC_BROWSER_TEST_F(ContentExtractor, MANUAL_ExtractUrl) { | 413 IN_PROC_BROWSER_TEST_F(ContentExtractor, MANUAL_ExtractUrl) { |
| 412 Start(); | 414 Start(); |
| 413 base::RunLoop().Run(); | 415 base::RunLoop().Run(); |
| 414 } | 416 } |
| 415 | 417 |
| 416 } // namespace dom_distiller | 418 } // namespace dom_distiller |
| OLD | NEW |