Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(177)

Side by Side Diff: net/tools/cachetool/cachetool.cc

Issue 2421583002: [CacheTool] Add a "list_dups" command (Closed)
Patch Set: Fixes Created 4 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « no previous file | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2016 The Chromium Authors. All rights reserved. 1 // Copyright 2016 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include <iostream> 5 #include <iostream>
6 #include <memory> 6 #include <memory>
7 #include <unordered_map>
7 8
8 #include "base/at_exit.h" 9 #include "base/at_exit.h"
9 #include "base/command_line.h" 10 #include "base/command_line.h"
10 #include "base/files/file_path.h" 11 #include "base/files/file_path.h"
12 #include "base/format_macros.h"
11 #include "base/logging.h" 13 #include "base/logging.h"
12 #include "base/macros.h" 14 #include "base/macros.h"
15 #include "base/md5.h"
gavinp 2016/12/08 18:15:04 We have SHA1 and SHA256 in our code tree. I guess
jkarlin 2016/12/08 18:23:50 Yes, it's because of the incremental interface. MD
gavinp 2016/12/09 15:01:45 Yeah, md5 is fine for this; no reason to rewrite.
13 #include "base/message_loop/message_loop.h" 16 #include "base/message_loop/message_loop.h"
14 #include "base/run_loop.h" 17 #include "base/run_loop.h"
15 #include "base/strings/string_number_conversions.h" 18 #include "base/strings/string_number_conversions.h"
19 #include "base/strings/string_piece.h"
20 #include "base/strings/stringprintf.h"
16 #include "net/base/io_buffer.h" 21 #include "net/base/io_buffer.h"
17 #include "net/base/test_completion_callback.h" 22 #include "net/base/test_completion_callback.h"
18 #include "net/disk_cache/disk_cache.h" 23 #include "net/disk_cache/disk_cache.h"
19 #include "net/http/http_cache.h" 24 #include "net/http/http_cache.h"
20 #include "net/http/http_response_headers.h" 25 #include "net/http/http_response_headers.h"
21 #include "net/http/http_util.h" 26 #include "net/http/http_util.h"
22 27
23 using disk_cache::Backend; 28 using disk_cache::Backend;
24 using disk_cache::Entry; 29 using disk_cache::Entry;
25 30
26 namespace { 31 namespace {
27 32
33 struct EntryData {
34 std::string url;
35 std::string mime_type;
36 int size;
37 };
38
28 constexpr int kResponseInfoIndex = 0; 39 constexpr int kResponseInfoIndex = 0;
40 constexpr int kResponseContentIndex = 1;
29 41
30 const char* const kCommandNames[] = { 42 const char* const kCommandNames[] = {
31 "stop", "get_size", "list_keys", "get_stream_for_key", 43 "stop", "get_size", "list_keys", "get_stream_for_key",
32 "delete_stream", "delete_key", "update_raw_headers", 44 "delete_stream", "delete_key", "update_raw_headers", "list_dups",
33 }; 45 };
34 46
35 // Prints the command line help. 47 // Prints the command line help.
36 void PrintHelp() { 48 void PrintHelp() {
37 std::cout << "cachetool <cache_path> <cache_backend_type> <subcommand> " 49 std::cout << "cachetool <cache_path> <cache_backend_type> <subcommand> "
38 << std::endl 50 << std::endl
39 << std::endl; 51 << std::endl;
40 std::cout << "Available cache backend types: simple, blockfile" << std::endl; 52 std::cout << "Available cache backend types: simple, blockfile" << std::endl;
41 std::cout << "Available subcommands:" << std::endl; 53 std::cout << "Available subcommands:" << std::endl;
42 std::cout << " batch: Starts cachetool to process serialized commands " 54 std::cout << " batch: Starts cachetool to process serialized commands "
43 << "passed down by the standard input and return commands output " 55 << "passed down by the standard input and return commands output "
44 << "in the stdout until the stop command is received." << std::endl; 56 << "in the stdout until the stop command is received." << std::endl;
45 std::cout << " delete_key <key>: Delete key from cache." << std::endl; 57 std::cout << " delete_key <key>: Delete key from cache." << std::endl;
46 std::cout << " delete_stream <key> <index>: Delete a particular stream of a" 58 std::cout << " delete_stream <key> <index>: Delete a particular stream of a"
47 << " given key." << std::endl; 59 << " given key." << std::endl;
48 std::cout << " get_size: Calculate the total size of the cache in bytes." 60 std::cout << " get_size: Calculate the total size of the cache in bytes."
49 << std::endl; 61 << std::endl;
50 std::cout << " get_stream <key> <index>: Print a particular stream for a" 62 std::cout << " get_stream <key> <index>: Print a particular stream for a"
51 << " given key." << std::endl; 63 << " given key." << std::endl;
52 std::cout << " list_keys: List all keys in the cache." << std::endl; 64 std::cout << " list_keys: List all keys in the cache." << std::endl;
65 std::cout << " list_dups: List all resources with duplicate bodies in the "
66 << "cache." << std::endl;
53 std::cout << " update_raw_headers <key>: Update stdin as the key's raw " 67 std::cout << " update_raw_headers <key>: Update stdin as the key's raw "
54 << "response headers." << std::endl; 68 << "response headers." << std::endl;
55 std::cout << " stop: Verify that the cache can be opened and return, " 69 std::cout << " stop: Verify that the cache can be opened and return, "
56 << "confirming the cache exists and is of the right type." 70 << "confirming the cache exists and is of the right type."
57 << std::endl; 71 << std::endl;
58 std::cout << "Expected values of <index> are:" << std::endl; 72 std::cout << "Expected values of <index> are:" << std::endl;
59 std::cout << " 0 (HTTP response headers)" << std::endl; 73 std::cout << " 0 (HTTP response headers)" << std::endl;
60 std::cout << " 1 (transport encoded content)" << std::endl; 74 std::cout << " 1 (transport encoded content)" << std::endl;
61 std::cout << " 2 (compiled content)" << std::endl; 75 std::cout << " 2 (compiled content)" << std::endl;
62 } 76 }
63 77
64 // Generic command input/output. 78 // Generic command input/output.
65 class CommandMarshal { 79 class CommandMarshal {
66 public: 80 public:
67 CommandMarshal(Backend* cache_backend) 81 explicit CommandMarshal(Backend* cache_backend)
68 : command_failed_(false), cache_backend_(cache_backend){}; 82 : command_failed_(false), cache_backend_(cache_backend) {}
69 virtual ~CommandMarshal(){}; 83 virtual ~CommandMarshal() {}
70 84
71 // Reads the next command's name to execute. 85 // Reads the next command's name to execute.
72 virtual std::string ReadCommandName() = 0; 86 virtual std::string ReadCommandName() = 0;
73 87
74 // Reads the next parameter as an integer. 88 // Reads the next parameter as an integer.
75 virtual int ReadInt() = 0; 89 virtual int ReadInt() = 0;
76 90
77 // Reads the next parameter as stream index. 91 // Reads the next parameter as stream index.
78 int ReadStreamIndex() { 92 int ReadStreamIndex() {
79 if (has_failed()) 93 if (has_failed())
(...skipping 18 matching lines...) Expand all
98 // Communicates back a string. 112 // Communicates back a string.
99 virtual void ReturnString(const std::string& string) = 0; 113 virtual void ReturnString(const std::string& string) = 0;
100 114
101 // Communicates back a buffer. 115 // Communicates back a buffer.
102 virtual void ReturnBuffer(net::GrowableIOBuffer* buffer) = 0; 116 virtual void ReturnBuffer(net::GrowableIOBuffer* buffer) = 0;
103 117
104 // Communicates back command failure. 118 // Communicates back command failure.
105 virtual void ReturnFailure(const std::string& error_msg) = 0; 119 virtual void ReturnFailure(const std::string& error_msg) = 0;
106 120
107 // Communicates back command success. 121 // Communicates back command success.
108 virtual void ReturnSuccess() { DCHECK(!command_failed_); }; 122 virtual void ReturnSuccess() { DCHECK(!command_failed_); }
109 123
110 // Returns whether the command has failed. 124 // Returns whether the command has failed.
111 inline bool has_failed() { return command_failed_; } 125 inline bool has_failed() { return command_failed_; }
112 126
113 // Returns the opened cache backend. 127 // Returns the opened cache backend.
114 Backend* cache_backend() { return cache_backend_; } 128 Backend* cache_backend() { return cache_backend_; }
115 129
116 protected: 130 protected:
117 bool command_failed_; 131 bool command_failed_;
118 Backend* const cache_backend_; 132 Backend* const cache_backend_;
(...skipping 73 matching lines...) Expand 10 before | Expand all | Expand 10 after
192 private: 206 private:
193 const base::CommandLine::StringVector command_line_args_; 207 const base::CommandLine::StringVector command_line_args_;
194 size_t args_id_; 208 size_t args_id_;
195 }; 209 };
196 210
197 // Online command input/output that receives pickled commands from stdin and 211 // Online command input/output that receives pickled commands from stdin and
198 // returns their results back in stdout. Send the stop command to properly exit 212 // returns their results back in stdout. Send the stop command to properly exit
199 // cachetool's main loop. 213 // cachetool's main loop.
200 class StreamCommandMarshal final : public CommandMarshal { 214 class StreamCommandMarshal final : public CommandMarshal {
201 public: 215 public:
202 StreamCommandMarshal(Backend* cache_backend) 216 explicit StreamCommandMarshal(Backend* cache_backend)
203 : CommandMarshal(cache_backend) {} 217 : CommandMarshal(cache_backend) {}
204 218
205 // Implements CommandMarshal. 219 // Implements CommandMarshal.
206 std::string ReadCommandName() override { 220 std::string ReadCommandName() override {
207 if (has_failed()) 221 if (has_failed())
208 return ""; 222 return "";
209 std::cout.flush(); 223 std::cout.flush();
210 size_t command_id = static_cast<size_t>(std::cin.get()); 224 size_t command_id = static_cast<size_t>(std::cin.get());
211 if (command_id >= arraysize(kCommandNames)) { 225 if (command_id >= arraysize(kCommandNames)) {
212 ReturnFailure("Unknown command."); 226 ReturnFailure("Unknown command.");
(...skipping 82 matching lines...) Expand 10 before | Expand all | Expand 10 after
295 std::string url = entry->GetKey(); 309 std::string url = entry->GetKey();
296 command_marshal->ReturnString(url); 310 command_marshal->ReturnString(url);
297 entry->Close(); 311 entry->Close();
298 entry = nullptr; 312 entry = nullptr;
299 rv = entry_iterator->OpenNextEntry(&entry, cb.callback()); 313 rv = entry_iterator->OpenNextEntry(&entry, cb.callback());
300 } 314 }
301 command_marshal->ReturnString(""); 315 command_marshal->ReturnString("");
302 return true; 316 return true;
303 } 317 }
304 318
319 bool GetResponseInfoForEntry(disk_cache::Entry* entry,
320 net::HttpResponseInfo* response_info) {
321 int size = entry->GetDataSize(kResponseInfoIndex);
322 if (size == 0)
323 return false;
324 scoped_refptr<net::IOBuffer> buffer = new net::IOBufferWithSize(size);
325 net::TestCompletionCallback cb;
326
327 int bytes_read = 0;
328 while (true) {
329 int rv = entry->ReadData(kResponseInfoIndex, bytes_read, buffer.get(), size,
330 cb.callback());
331 rv = cb.GetResult(rv);
332 if (rv < 0) {
333 entry->Close();
334 return false;
335 }
336
337 if (rv == 0) {
338 bool truncated_response_info = false;
339 net::HttpCache::ParseResponseInfo(buffer->data(), size, response_info,
340 &truncated_response_info);
341 return !truncated_response_info;
342 }
343
344 bytes_read += rv;
345 }
346
347 NOTREACHED();
348 return false;
349 }
350
351 std::string GetMD5ForResponseBody(disk_cache::Entry* entry) {
352 if (entry->GetDataSize(kResponseContentIndex) == 0)
353 return "";
354
355 const int kInitBufferSize = 81920;
gavinp 2016/12/08 18:15:04 Why 81920?
jkarlin 2016/12/08 18:23:50 No good reason. Open to suggestions.
gavinp 2016/12/09 15:01:45 Seems fine; maybe just write it as 80 * 1024 ?
jkarlin 2016/12/14 12:50:01 Done.
356 scoped_refptr<net::IOBuffer> buffer =
357 new net::IOBufferWithSize(kInitBufferSize);
358 net::TestCompletionCallback cb;
359
360 base::MD5Context ctx;
361 base::MD5Init(&ctx);
362
363 int bytes_read = 0;
364 while (true) {
365 int rv = entry->ReadData(kResponseContentIndex, bytes_read, buffer.get(),
366 kInitBufferSize, cb.callback());
367 rv = cb.GetResult(rv);
368 if (rv < 0) {
369 entry->Close();
370 return "";
371 }
372
373 if (rv == 0) {
374 base::MD5Digest digest;
375 base::MD5Final(&digest, &ctx);
376 return base::MD5DigestToBase16(digest);
377 }
378
379 bytes_read += rv;
380 MD5Update(&ctx, base::StringPiece(buffer->data(), rv));
381 }
382
383 NOTREACHED();
384 return "";
385 }
386
387 void ListDups(CommandMarshal* command_marshal) {
388 std::unique_ptr<Backend::Iterator> entry_iterator =
389 command_marshal->cache_backend()->CreateIterator();
390 Entry* entry = nullptr;
391 net::TestCompletionCallback cb;
392 int rv = entry_iterator->OpenNextEntry(&entry, cb.callback());
393 command_marshal->ReturnSuccess();
394
395 std::unordered_map<std::string, std::vector<EntryData>> md5_entries;
396
397 int total_entries = 0;
398
399 while (cb.GetResult(rv) == net::OK) {
400 total_entries += 1;
401 net::HttpResponseInfo response_info;
402 if (!GetResponseInfoForEntry(entry, &response_info)) {
403 entry->Close();
404 entry = nullptr;
405 rv = entry_iterator->OpenNextEntry(&entry, cb.callback());
406 continue;
407 }
408
409 std::string hash = GetMD5ForResponseBody(entry);
410 if (hash.empty()) {
411 // Sparse entries and empty bodies are skipped.
412 entry->Close();
413 entry = nullptr;
414 rv = entry_iterator->OpenNextEntry(&entry, cb.callback());
415 continue;
416 }
417
418 EntryData entry_data;
419
420 entry_data.url = entry->GetKey();
421 entry_data.size = entry->GetDataSize(kResponseContentIndex);
422 if (response_info.headers)
423 response_info.headers->GetMimeType(&entry_data.mime_type);
424
425 auto iter = md5_entries.find(hash);
426 if (iter == md5_entries.end())
427 md5_entries.insert(
428 std::make_pair(hash, std::vector<EntryData>{entry_data}));
429 else
430 iter->second.push_back(entry_data);
431
432 entry->Close();
433 entry = nullptr;
434 rv = entry_iterator->OpenNextEntry(&entry, cb.callback());
435 }
436
437 // Print the duplicates and collect stats.
438 int total_duped_entries = 0;
439 int64_t total_duped_bytes = 0u;
440 for (const auto& hash_and_entries : md5_entries) {
441 if (hash_and_entries.second.size() == 1)
442 continue;
443
444 int dups = hash_and_entries.second.size() - 1;
445 total_duped_entries += dups;
446 total_duped_bytes += hash_and_entries.second[0].size * dups;
447
448 for (const auto& entry : hash_and_entries.second) {
449 std::string out =
450 base::StringPrintf("%d, %s, %s, ", entry.size, entry.url.c_str(),
451 entry.mime_type.c_str());
452 command_marshal->ReturnString(out);
453 }
454 }
455
456 // Print the stats.
457 rv = command_marshal->cache_backend()->CalculateSizeOfAllEntries(
458 cb.callback());
459 rv = cb.GetResult(rv);
460 LOG(ERROR) << "Wasted bytes = " << total_duped_bytes;
461 LOG(ERROR) << "Wasted entries = " << total_duped_entries;
462 LOG(ERROR) << "Total entries = " << total_entries;
463 LOG(ERROR) << "Cache size = " << rv;
464 LOG(ERROR) << "Percentage of cache wasted = " << total_duped_bytes * 100 / rv;
465 }
466
305 // Gets a key's stream to a buffer. 467 // Gets a key's stream to a buffer.
306 scoped_refptr<net::GrowableIOBuffer> GetStreamForKeyBuffer( 468 scoped_refptr<net::GrowableIOBuffer> GetStreamForKeyBuffer(
307 CommandMarshal* command_marshal, 469 CommandMarshal* command_marshal,
308 const std::string& key, 470 const std::string& key,
309 int index) { 471 int index) {
310 DCHECK(!command_marshal->has_failed()); 472 DCHECK(!command_marshal->has_failed());
311 Entry* cache_entry; 473 Entry* cache_entry;
312 net::TestCompletionCallback cb; 474 net::TestCompletionCallback cb;
313 int rv = command_marshal->cache_backend()->OpenEntry(key, &cache_entry, 475 int rv = command_marshal->cache_backend()->OpenEntry(key, &cache_entry,
314 cb.callback()); 476 cb.callback());
(...skipping 138 matching lines...) Expand 10 before | Expand all | Expand 10 after
453 } else if (subcommand == "delete_stream") { 615 } else if (subcommand == "delete_stream") {
454 DeleteStreamForKey(command_marshal); 616 DeleteStreamForKey(command_marshal);
455 } else if (subcommand == "get_size") { 617 } else if (subcommand == "get_size") {
456 GetSize(command_marshal); 618 GetSize(command_marshal);
457 } else if (subcommand == "get_stream") { 619 } else if (subcommand == "get_stream") {
458 GetStreamForKey(command_marshal); 620 GetStreamForKey(command_marshal);
459 } else if (subcommand == "list_keys") { 621 } else if (subcommand == "list_keys") {
460 ListKeys(command_marshal); 622 ListKeys(command_marshal);
461 } else if (subcommand == "update_raw_headers") { 623 } else if (subcommand == "update_raw_headers") {
462 UpdateRawResponseHeaders(command_marshal); 624 UpdateRawResponseHeaders(command_marshal);
625 } else if (subcommand == "list_dups") {
626 ListDups(command_marshal);
463 } else { 627 } else {
464 // The wrong subcommand is originated from the command line. 628 // The wrong subcommand is originated from the command line.
465 command_marshal->ReturnFailure("Unknown command."); 629 command_marshal->ReturnFailure("Unknown command.");
466 PrintHelp(); 630 PrintHelp();
467 } 631 }
468 } 632 }
469 return false; 633 return false;
470 } 634 }
471 635
472 } // namespace 636 } // namespace
(...skipping 38 matching lines...) Expand 10 before | Expand all | Expand 10 after
511 ProgramArgumentCommandMarshal program_argument_marshal( 675 ProgramArgumentCommandMarshal program_argument_marshal(
512 cache_backend.get(), 676 cache_backend.get(),
513 base::CommandLine::StringVector(args.begin() + 2, args.end())); 677 base::CommandLine::StringVector(args.begin() + 2, args.end()));
514 bool successful_commands = ExecuteCommands(&program_argument_marshal); 678 bool successful_commands = ExecuteCommands(&program_argument_marshal);
515 679
516 base::RunLoop().RunUntilIdle(); 680 base::RunLoop().RunUntilIdle();
517 cache_backend = nullptr; 681 cache_backend = nullptr;
518 base::RunLoop().RunUntilIdle(); 682 base::RunLoop().RunUntilIdle();
519 return !successful_commands; 683 return !successful_commands;
520 } 684 }
OLDNEW
« no previous file with comments | « no previous file | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698