Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(299)

Side by Side Diff: content/browser/download/base_file.cc

Issue 1751603002: [Downloads] Rework how hashes are calculated for download files. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Rebase on top of https://codereview.chromium.org/1781983002 since that's going in first. Created 4 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « content/browser/download/base_file.h ('k') | content/browser/download/base_file_linux.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "content/browser/download/base_file.h" 5 #include "content/browser/download/base_file.h"
6 6
7 #include <utility> 7 #include <utility>
8 8
9 #include "base/bind.h" 9 #include "base/bind.h"
10 #include "base/files/file.h" 10 #include "base/files/file.h"
11 #include "base/files/file_util.h" 11 #include "base/files/file_util.h"
12 #include "base/format_macros.h" 12 #include "base/format_macros.h"
13 #include "base/logging.h" 13 #include "base/logging.h"
14 #include "base/pickle.h" 14 #include "base/pickle.h"
15 #include "base/strings/stringprintf.h" 15 #include "base/strings/stringprintf.h"
16 #include "base/threading/thread_restrictions.h" 16 #include "base/threading/thread_restrictions.h"
17 #include "build/build_config.h" 17 #include "build/build_config.h"
18 #include "content/browser/download/download_interrupt_reasons_impl.h" 18 #include "content/browser/download/download_interrupt_reasons_impl.h"
19 #include "content/browser/download/download_net_log_parameters.h" 19 #include "content/browser/download/download_net_log_parameters.h"
20 #include "content/browser/download/download_stats.h" 20 #include "content/browser/download/download_stats.h"
21 #include "content/public/browser/browser_thread.h" 21 #include "content/public/browser/browser_thread.h"
22 #include "content/public/browser/content_browser_client.h" 22 #include "content/public/browser/content_browser_client.h"
23 #include "crypto/secure_hash.h" 23 #include "crypto/secure_hash.h"
24 #include "net/base/net_errors.h" 24 #include "net/base/net_errors.h"
25 25
26 namespace content { 26 namespace content {
27 27
28 // This will initialize the entire array to zero. 28 BaseFile::BaseFile(const net::BoundNetLog& bound_net_log)
29 const unsigned char BaseFile::kEmptySha256Hash[] = { 0 }; 29 : bound_net_log_(bound_net_log) {}
30
31 BaseFile::BaseFile(const base::FilePath& full_path,
32 const GURL& source_url,
33 const GURL& referrer_url,
34 int64_t received_bytes,
35 bool calculate_hash,
36 const std::string& hash_state_bytes,
37 base::File file,
38 const net::BoundNetLog& bound_net_log)
39 : full_path_(full_path),
40 source_url_(source_url),
41 referrer_url_(referrer_url),
42 file_(std::move(file)),
43 bytes_so_far_(received_bytes),
44 start_tick_(base::TimeTicks::Now()),
45 calculate_hash_(calculate_hash),
46 detached_(false),
47 bound_net_log_(bound_net_log) {
48 memcpy(sha256_hash_, kEmptySha256Hash, crypto::kSHA256Length);
49 if (calculate_hash_) {
50 secure_hash_.reset(crypto::SecureHash::Create(crypto::SecureHash::SHA256));
51 if ((bytes_so_far_ > 0) && // Not starting at the beginning.
52 (!IsEmptyHash(hash_state_bytes))) {
53 base::Pickle hash_state(hash_state_bytes.c_str(),
54 hash_state_bytes.size());
55 base::PickleIterator data_iterator(hash_state);
56 secure_hash_->Deserialize(&data_iterator);
57 }
58 }
59 }
60 30
61 BaseFile::~BaseFile() { 31 BaseFile::~BaseFile() {
62 DCHECK_CURRENTLY_ON(BrowserThread::FILE); 32 DCHECK_CURRENTLY_ON(BrowserThread::FILE);
63 if (detached_) 33 if (detached_)
64 Close(); 34 Close();
65 else 35 else
66 Cancel(); // Will delete the file. 36 Cancel(); // Will delete the file.
67 } 37 }
68 38
69 DownloadInterruptReason BaseFile::Initialize( 39 DownloadInterruptReason BaseFile::Initialize(
70 const base::FilePath& default_directory) { 40 const base::FilePath& full_path,
41 const base::FilePath& default_directory,
42 base::File file,
43 int64_t bytes_so_far,
44 const std::string& hash_so_far,
45 scoped_ptr<crypto::SecureHash> hash_state) {
71 DCHECK_CURRENTLY_ON(BrowserThread::FILE); 46 DCHECK_CURRENTLY_ON(BrowserThread::FILE);
72 DCHECK(!detached_); 47 DCHECK(!detached_);
73 48
74 if (full_path_.empty()) { 49 if (full_path.empty()) {
75 base::FilePath initial_directory(default_directory); 50 base::FilePath initial_directory(default_directory);
76 base::FilePath temp_file; 51 base::FilePath temp_file;
77 if (initial_directory.empty()) { 52 if (initial_directory.empty()) {
78 initial_directory = 53 initial_directory =
79 GetContentClient()->browser()->GetDefaultDownloadDirectory(); 54 GetContentClient()->browser()->GetDefaultDownloadDirectory();
80 } 55 }
81 // |initial_directory| can still be empty if ContentBrowserClient returned 56 // |initial_directory| can still be empty if ContentBrowserClient returned
82 // an empty path for the downloads directory. 57 // an empty path for the downloads directory.
83 if ((initial_directory.empty() || 58 if ((initial_directory.empty() ||
84 !base::CreateTemporaryFileInDir(initial_directory, &temp_file)) && 59 !base::CreateTemporaryFileInDir(initial_directory, &temp_file)) &&
85 !base::CreateTemporaryFile(&temp_file)) { 60 !base::CreateTemporaryFile(&temp_file)) {
86 return LogInterruptReason("Unable to create", 0, 61 return LogInterruptReason("Unable to create", 0,
87 DOWNLOAD_INTERRUPT_REASON_FILE_FAILED); 62 DOWNLOAD_INTERRUPT_REASON_FILE_FAILED);
88 } 63 }
89 full_path_ = temp_file; 64 full_path_ = temp_file;
65 } else {
66 full_path_ = full_path;
90 } 67 }
91 68
92 return Open(); 69 bytes_so_far_ = bytes_so_far;
70 secure_hash_ = std::move(hash_state);
71 file_ = std::move(file);
72
73 return Open(hash_so_far);
93 } 74 }
94 75
95 DownloadInterruptReason BaseFile::AppendDataToFile(const char* data, 76 DownloadInterruptReason BaseFile::AppendDataToFile(const char* data,
96 size_t data_len) { 77 size_t data_len) {
97 DCHECK_CURRENTLY_ON(BrowserThread::FILE); 78 DCHECK_CURRENTLY_ON(BrowserThread::FILE);
98 DCHECK(!detached_); 79 DCHECK(!detached_);
99 80
100 // NOTE(benwells): The above DCHECK won't be present in release builds, 81 // NOTE(benwells): The above DCHECK won't be present in release builds,
101 // so we log any occurences to see how common this error is in the wild. 82 // so we log any occurences to see how common this error is in the wild.
102 if (detached_) 83 if (detached_)
(...skipping 24 matching lines...) Expand all
127 size_t write_size = static_cast<size_t>(write_result); 108 size_t write_size = static_cast<size_t>(write_result);
128 DCHECK_LE(write_size, len); 109 DCHECK_LE(write_size, len);
129 len -= write_size; 110 len -= write_size;
130 current_data += write_size; 111 current_data += write_size;
131 bytes_so_far_ += write_size; 112 bytes_so_far_ += write_size;
132 } 113 }
133 114
134 RecordDownloadWriteSize(data_len); 115 RecordDownloadWriteSize(data_len);
135 RecordDownloadWriteLoopCount(write_count); 116 RecordDownloadWriteLoopCount(write_count);
136 117
137 if (calculate_hash_) 118 if (secure_hash_)
138 secure_hash_->Update(data, data_len); 119 secure_hash_->Update(data, data_len);
139 120
140 return DOWNLOAD_INTERRUPT_REASON_NONE; 121 return DOWNLOAD_INTERRUPT_REASON_NONE;
141 } 122 }
142 123
143 DownloadInterruptReason BaseFile::Rename(const base::FilePath& new_path) { 124 DownloadInterruptReason BaseFile::Rename(const base::FilePath& new_path) {
144 DCHECK_CURRENTLY_ON(BrowserThread::FILE); 125 DCHECK_CURRENTLY_ON(BrowserThread::FILE);
145 DownloadInterruptReason rename_result = DOWNLOAD_INTERRUPT_REASON_NONE; 126 DownloadInterruptReason rename_result = DOWNLOAD_INTERRUPT_REASON_NONE;
146 127
147 // If the new path is same as the old one, there is no need to perform the 128 // If the new path is same as the old one, there is no need to perform the
(...skipping 15 matching lines...) Expand all
163 // permissions / security descriptors that makes sense in the new directory. 144 // permissions / security descriptors that makes sense in the new directory.
164 rename_result = MoveFileAndAdjustPermissions(new_path); 145 rename_result = MoveFileAndAdjustPermissions(new_path);
165 146
166 if (rename_result == DOWNLOAD_INTERRUPT_REASON_NONE) 147 if (rename_result == DOWNLOAD_INTERRUPT_REASON_NONE)
167 full_path_ = new_path; 148 full_path_ = new_path;
168 149
169 // Re-open the file if we were still using it regardless of the interrupt 150 // Re-open the file if we were still using it regardless of the interrupt
170 // reason. 151 // reason.
171 DownloadInterruptReason open_result = DOWNLOAD_INTERRUPT_REASON_NONE; 152 DownloadInterruptReason open_result = DOWNLOAD_INTERRUPT_REASON_NONE;
172 if (was_in_progress) 153 if (was_in_progress)
173 open_result = Open(); 154 open_result = Open(std::string());
174 155
175 bound_net_log_.EndEvent(net::NetLog::TYPE_DOWNLOAD_FILE_RENAMED); 156 bound_net_log_.EndEvent(net::NetLog::TYPE_DOWNLOAD_FILE_RENAMED);
176 return rename_result == DOWNLOAD_INTERRUPT_REASON_NONE ? open_result 157 return rename_result == DOWNLOAD_INTERRUPT_REASON_NONE ? open_result
177 : rename_result; 158 : rename_result;
178 } 159 }
179 160
180 void BaseFile::Detach() { 161 void BaseFile::Detach() {
181 detached_ = true; 162 detached_ = true;
182 bound_net_log_.AddEvent(net::NetLog::TYPE_DOWNLOAD_FILE_DETACHED); 163 bound_net_log_.AddEvent(net::NetLog::TYPE_DOWNLOAD_FILE_DETACHED);
183 } 164 }
184 165
185 void BaseFile::Cancel() { 166 void BaseFile::Cancel() {
186 DCHECK_CURRENTLY_ON(BrowserThread::FILE); 167 DCHECK_CURRENTLY_ON(BrowserThread::FILE);
187 DCHECK(!detached_); 168 DCHECK(!detached_);
188 169
189 bound_net_log_.AddEvent(net::NetLog::TYPE_CANCELLED); 170 bound_net_log_.AddEvent(net::NetLog::TYPE_CANCELLED);
190 171
191 Close(); 172 Close();
192 173
193 if (!full_path_.empty()) { 174 if (!full_path_.empty()) {
194 bound_net_log_.AddEvent(net::NetLog::TYPE_DOWNLOAD_FILE_DELETED); 175 bound_net_log_.AddEvent(net::NetLog::TYPE_DOWNLOAD_FILE_DELETED);
195 base::DeleteFile(full_path_, false); 176 base::DeleteFile(full_path_, false);
196 } 177 }
197 178
198 Detach(); 179 Detach();
199 } 180 }
200 181
201 void BaseFile::Finish() { 182 scoped_ptr<crypto::SecureHash> BaseFile::Finish() {
202 DCHECK_CURRENTLY_ON(BrowserThread::FILE);
203
204 if (calculate_hash_)
205 secure_hash_->Finish(sha256_hash_, crypto::kSHA256Length);
206 Close();
207 }
208
209 void BaseFile::FinishWithError() {
210 DCHECK_CURRENTLY_ON(BrowserThread::FILE); 183 DCHECK_CURRENTLY_ON(BrowserThread::FILE);
211 Close(); 184 Close();
212 } 185 return std::move(secure_hash_);
213
214 void BaseFile::SetClientGuid(const std::string& guid) {
215 client_guid_ = guid;
216 } 186 }
217 187
218 // OS_WIN, OS_MACOSX and OS_LINUX have specialized implementations. 188 // OS_WIN, OS_MACOSX and OS_LINUX have specialized implementations.
219 #if !defined(OS_WIN) && !defined(OS_MACOSX) && !defined(OS_LINUX) 189 #if !defined(OS_WIN) && !defined(OS_MACOSX) && !defined(OS_LINUX)
220 DownloadInterruptReason BaseFile::AnnotateWithSourceInformation() { 190 DownloadInterruptReason BaseFile::AnnotateWithSourceInformation(
191 const std::string& client_guid,
192 const GURL& source_url,
193 const GURL& referrer_url) {
221 return DOWNLOAD_INTERRUPT_REASON_NONE; 194 return DOWNLOAD_INTERRUPT_REASON_NONE;
222 } 195 }
223 #endif 196 #endif
224 197
225 bool BaseFile::GetHash(std::string* hash) { 198 std::string BaseFile::DebugString() const {
226 DCHECK(!detached_); 199 return base::StringPrintf(
227 hash->assign(reinterpret_cast<const char*>(sha256_hash_), 200 "{ "
228 sizeof(sha256_hash_)); 201 " full_path_ = \"%" PRFilePath
229 return (calculate_hash_ && !in_progress()); 202 "\""
203 " bytes_so_far_ = %" PRId64 " detached_ = %c }",
204 full_path_.value().c_str(),
205 bytes_so_far_,
206 detached_ ? 'T' : 'F');
230 } 207 }
231 208
232 std::string BaseFile::GetHashState() { 209 DownloadInterruptReason BaseFile::CalculatePartialHash(
233 if (!calculate_hash_) 210 const std::string& hash_to_expect) {
234 return std::string(); 211 secure_hash_.reset(crypto::SecureHash::Create(crypto::SecureHash::SHA256));
235 212
236 base::Pickle hash_state; 213 if (bytes_so_far_ == 0)
237 if (!secure_hash_->Serialize(&hash_state)) 214 return DOWNLOAD_INTERRUPT_REASON_NONE;
238 return std::string();
239 215
240 return std::string(reinterpret_cast<const char*>(hash_state.data()), 216 if (file_.Seek(base::File::FROM_BEGIN, 0) != 0)
241 hash_state.size()); 217 return LogSystemError("Seek partial file",
218 logging::GetLastSystemErrorCode());
219
220 const size_t kMinBufferSize = secure_hash_->GetHashLength();
221 const size_t kMaxBufferSize = 1024 * 512;
222
223 // The size of the buffer is:
224 // - at least kMinBufferSize so that we can use it to hold the hash as well.
225 // - at most kMaxBufferSize so that there's a reasonable bound.
226 // - not larger than |bytes_so_far_| unless bytes_so_far_ is less than the
227 // hash size.
228 std::vector<char> buffer(std::max(
229 kMinBufferSize, std::min<size_t>(kMaxBufferSize, bytes_so_far_)));
230
231 int64_t current_position = 0;
232 while (current_position < bytes_so_far_) {
233 int length = file_.ReadAtCurrentPos(&buffer.front(), buffer.size());
234 if (length == -1) {
235 return LogInterruptReason("Reading partial file",
236 logging::GetLastSystemErrorCode(),
237 DOWNLOAD_INTERRUPT_REASON_FILE_TOO_SHORT);
238 }
239
240 if (length == 0)
241 break;
242
243 secure_hash_->Update(&buffer.front(), length);
244 current_position += length;
245 }
246
247 if (current_position != bytes_so_far_) {
248 return LogInterruptReason(
249 "Verifying prefix hash", 0, DOWNLOAD_INTERRUPT_REASON_FILE_TOO_SHORT);
250 }
251
252 if (!hash_to_expect.empty()) {
253 DCHECK_EQ(secure_hash_->GetHashLength(), hash_to_expect.size());
254 DCHECK(buffer.size() >= secure_hash_->GetHashLength());
255 scoped_ptr<crypto::SecureHash> partial_hash(secure_hash_->Clone());
256 partial_hash->Finish(&buffer.front(), buffer.size());
257
258 if (memcmp(&buffer.front(),
259 hash_to_expect.c_str(),
260 partial_hash->GetHashLength())) {
261 return LogInterruptReason("Verifying prefix hash",
262 0,
263 DOWNLOAD_INTERRUPT_REASON_FILE_HASH_MISMATCH);
264 }
265 }
266
267 return DOWNLOAD_INTERRUPT_REASON_NONE;
242 } 268 }
243 269
244 // static 270 DownloadInterruptReason BaseFile::Open(const std::string& hash_so_far) {
245 bool BaseFile::IsEmptyHash(const std::string& hash) {
246 return (hash.size() == crypto::kSHA256Length &&
247 0 == memcmp(hash.data(), kEmptySha256Hash, crypto::kSHA256Length));
248 }
249
250 std::string BaseFile::DebugString() const {
251 return base::StringPrintf("{ source_url_ = \"%s\""
252 " full_path_ = \"%" PRFilePath "\""
253 " bytes_so_far_ = %" PRId64
254 " detached_ = %c }",
255 source_url_.spec().c_str(),
256 full_path_.value().c_str(),
257 bytes_so_far_,
258 detached_ ? 'T' : 'F');
259 }
260
261 DownloadInterruptReason BaseFile::Open() {
262 DCHECK_CURRENTLY_ON(BrowserThread::FILE); 271 DCHECK_CURRENTLY_ON(BrowserThread::FILE);
263 DCHECK(!detached_); 272 DCHECK(!detached_);
264 DCHECK(!full_path_.empty()); 273 DCHECK(!full_path_.empty());
265 274
275 // Create a new file if it is not provided.
276 if (!file_.IsValid()) {
277 file_.Initialize(full_path_,
278 base::File::FLAG_OPEN_ALWAYS | base::File::FLAG_WRITE |
279 base::File::FLAG_READ);
280 if (!file_.IsValid()) {
281 return LogNetError("Open/Initialize File",
282 net::FileErrorToNetError(file_.error_details()));
283 }
284 }
285
266 bound_net_log_.BeginEvent( 286 bound_net_log_.BeginEvent(
267 net::NetLog::TYPE_DOWNLOAD_FILE_OPENED, 287 net::NetLog::TYPE_DOWNLOAD_FILE_OPENED,
268 base::Bind(&FileOpenedNetLogCallback, &full_path_, bytes_so_far_)); 288 base::Bind(&FileOpenedNetLogCallback, &full_path_, bytes_so_far_));
269 289
270 // Create a new file if it is not provided. 290 if (!secure_hash_) {
271 if (!file_.IsValid()) { 291 DownloadInterruptReason reason = CalculatePartialHash(hash_so_far);
272 file_.Initialize( 292 if (reason != DOWNLOAD_INTERRUPT_REASON_NONE) {
273 full_path_, base::File::FLAG_OPEN_ALWAYS | base::File::FLAG_WRITE); 293 ClearFile();
274 if (!file_.IsValid()) { 294 return reason;
275 return LogNetError("Open",
276 net::FileErrorToNetError(file_.error_details()));
277 } 295 }
278 } 296 }
279 297
280 // We may be re-opening the file after rename. Always make sure we're
281 // writing at the end of the file.
282 int64_t file_size = file_.Seek(base::File::FROM_END, 0); 298 int64_t file_size = file_.Seek(base::File::FROM_END, 0);
283 if (file_size < 0) { 299 if (file_size < 0) {
284 logging::SystemErrorCode error = logging::GetLastSystemErrorCode(); 300 logging::SystemErrorCode error = logging::GetLastSystemErrorCode();
285 ClearFile(); 301 ClearFile();
286 return LogSystemError("Seek", error); 302 return LogSystemError("Seeking to end", error);
287 } else if (file_size > bytes_so_far_) { 303 } else if (file_size > bytes_so_far_) {
288 // The file is larger than we expected. 304 // The file is larger than we expected.
289 // This is OK, as long as we don't use the extra. 305 // This is OK, as long as we don't use the extra.
290 // Truncate the file. 306 // Truncate the file.
291 if (!file_.SetLength(bytes_so_far_) || 307 if (!file_.SetLength(bytes_so_far_) ||
292 file_.Seek(base::File::FROM_BEGIN, bytes_so_far_) != bytes_so_far_) { 308 file_.Seek(base::File::FROM_BEGIN, bytes_so_far_) != bytes_so_far_) {
293 logging::SystemErrorCode error = logging::GetLastSystemErrorCode(); 309 logging::SystemErrorCode error = logging::GetLastSystemErrorCode();
294 ClearFile(); 310 ClearFile();
295 return LogSystemError("Truncate", error); 311 return LogSystemError("Truncating to last known offset", error);
296 } 312 }
297 } else if (file_size < bytes_so_far_) { 313 } else if (file_size < bytes_so_far_) {
298 // The file is shorter than we expected. Our hashes won't be valid. 314 // The file is shorter than we expected. Our hashes won't be valid.
299 ClearFile(); 315 ClearFile();
300 return LogInterruptReason("Unable to seek to last written point", 0, 316 return LogInterruptReason("Unable to seek to last written point", 0,
301 DOWNLOAD_INTERRUPT_REASON_FILE_TOO_SHORT); 317 DOWNLOAD_INTERRUPT_REASON_FILE_TOO_SHORT);
302 } 318 }
303 319
304 return DOWNLOAD_INTERRUPT_REASON_NONE; 320 return DOWNLOAD_INTERRUPT_REASON_NONE;
305 } 321 }
(...skipping 34 matching lines...) Expand 10 before | Expand all | Expand 10 after
340 base::File::Error file_error = base::File::OSErrorToFileError(os_error); 356 base::File::Error file_error = base::File::OSErrorToFileError(os_error);
341 return LogInterruptReason( 357 return LogInterruptReason(
342 operation, os_error, 358 operation, os_error,
343 ConvertFileErrorToInterruptReason(file_error)); 359 ConvertFileErrorToInterruptReason(file_error));
344 } 360 }
345 361
346 DownloadInterruptReason BaseFile::LogInterruptReason( 362 DownloadInterruptReason BaseFile::LogInterruptReason(
347 const char* operation, 363 const char* operation,
348 int os_error, 364 int os_error,
349 DownloadInterruptReason reason) { 365 DownloadInterruptReason reason) {
366 DVLOG(1) << __FUNCTION__ << "() operation:" << operation
367 << " os_error:" << os_error
368 << " reason:" << DownloadInterruptReasonToString(reason);
350 bound_net_log_.AddEvent( 369 bound_net_log_.AddEvent(
351 net::NetLog::TYPE_DOWNLOAD_FILE_ERROR, 370 net::NetLog::TYPE_DOWNLOAD_FILE_ERROR,
352 base::Bind(&FileInterruptedNetLogCallback, operation, os_error, reason)); 371 base::Bind(&FileInterruptedNetLogCallback, operation, os_error, reason));
353 return reason; 372 return reason;
354 } 373 }
355 374
356 } // namespace content 375 } // namespace content
OLDNEW
« no previous file with comments | « content/browser/download/base_file.h ('k') | content/browser/download/base_file_linux.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698