OLD | NEW |
(Empty) | |
| 1 // Copyright (c) 2010 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. |
| 4 |
| 5 #include "pdf/document_loader.h" |
| 6 |
| 7 #include "base/logging.h" |
| 8 #include "base/strings/string_util.h" |
| 9 #include "net/http/http_util.h" |
| 10 #include "ppapi/c/pp_errors.h" |
| 11 #include "ppapi/cpp/url_loader.h" |
| 12 #include "ppapi/cpp/url_request_info.h" |
| 13 #include "ppapi/cpp/url_response_info.h" |
| 14 |
| 15 namespace chrome_pdf { |
| 16 |
| 17 // Document below size will be downloaded in one chunk. |
| 18 const uint32 kMinFileSize = 64*1024; |
| 19 |
| 20 DocumentLoader::DocumentLoader(Client* client) |
| 21 : client_(client), partial_document_(false), request_pending_(false), |
| 22 current_pos_(0), current_chunk_size_(0), current_chunk_read_(0), |
| 23 document_size_(0), header_request_(true), is_multipart_(false) { |
| 24 loader_factory_.Initialize(this); |
| 25 } |
| 26 |
| 27 DocumentLoader::~DocumentLoader() { |
| 28 } |
| 29 |
| 30 bool DocumentLoader::Init(const pp::URLLoader& loader, |
| 31 const std::string& url, |
| 32 const std::string& headers) { |
| 33 DCHECK(url_.empty()); |
| 34 url_ = url; |
| 35 loader_ = loader; |
| 36 |
| 37 std::string response_headers; |
| 38 if (!headers.empty()) { |
| 39 response_headers = headers; |
| 40 } else { |
| 41 pp::URLResponseInfo response = loader_.GetResponseInfo(); |
| 42 pp::Var headers_var = response.GetHeaders(); |
| 43 |
| 44 if (headers_var.is_string()) { |
| 45 response_headers = headers_var.AsString(); |
| 46 } |
| 47 } |
| 48 |
| 49 bool accept_ranges_bytes = false; |
| 50 bool content_encoded = false; |
| 51 uint32 content_length = 0; |
| 52 std::string type; |
| 53 std::string disposition; |
| 54 if (!response_headers.empty()) { |
| 55 net::HttpUtil::HeadersIterator it(response_headers.begin(), |
| 56 response_headers.end(), "\n"); |
| 57 while (it.GetNext()) { |
| 58 if (LowerCaseEqualsASCII(it.name(), "content-length")) { |
| 59 content_length = atoi(it.values().c_str()); |
| 60 } else if (LowerCaseEqualsASCII(it.name(), "accept-ranges")) { |
| 61 accept_ranges_bytes = LowerCaseEqualsASCII(it.values(), "bytes"); |
| 62 } else if (LowerCaseEqualsASCII(it.name(), "content-encoding")) { |
| 63 content_encoded = true; |
| 64 } else if (LowerCaseEqualsASCII(it.name(), "content-type")) { |
| 65 type = it.values(); |
| 66 size_t semi_colon_pos = type.find(';'); |
| 67 if (semi_colon_pos != std::string::npos) { |
| 68 type = type.substr(0, semi_colon_pos); |
| 69 } |
| 70 TrimWhitespace(type, base::TRIM_ALL, &type); |
| 71 } else if (LowerCaseEqualsASCII(it.name(), "content-disposition")) { |
| 72 disposition = it.values(); |
| 73 } |
| 74 } |
| 75 } |
| 76 if (!type.empty() && |
| 77 !EndsWith(type, "/pdf", false) && |
| 78 !EndsWith(type, ".pdf", false) && |
| 79 !EndsWith(type, "/x-pdf", false) && |
| 80 !EndsWith(type, "/*", false) && |
| 81 !EndsWith(type, "/acrobat", false) && |
| 82 !EndsWith(type, "/unknown", false) && |
| 83 !StartsWithASCII(url, "blob:", false)) { |
| 84 return false; |
| 85 } |
| 86 if (StartsWithASCII(disposition, "attachment", false)) { |
| 87 return false; |
| 88 } |
| 89 |
| 90 if (content_length > 0) |
| 91 chunk_stream_.Preallocate(content_length); |
| 92 |
| 93 document_size_ = content_length; |
| 94 requests_count_ = 0; |
| 95 |
| 96 // Document loading strategy. |
| 97 // Following table shows the growth on the minimal request size depending |
| 98 // on the number requests that has been made already. |
| 99 chunk_size_table_[10] = 32*1024; |
| 100 chunk_size_table_[20] = 64*1024; |
| 101 chunk_size_table_[30] = 128*1024; |
| 102 chunk_size_table_[40] = 256*1024; |
| 103 chunk_size_table_[50] = 512*1024; |
| 104 chunk_size_table_[60] = 1024*1024; |
| 105 chunk_size_table_[70] = 2048*1024; |
| 106 |
| 107 // Enable partial loading only if file size is above the threshold. |
| 108 // It will allow avoiding latency for multiple requests. |
| 109 if (content_length > kMinFileSize && |
| 110 accept_ranges_bytes && |
| 111 !content_encoded) { |
| 112 LoadPartialDocument(); |
| 113 } else { |
| 114 LoadFullDocument(); |
| 115 } |
| 116 return true; |
| 117 } |
| 118 |
| 119 void DocumentLoader::LoadPartialDocument() { |
| 120 partial_document_ = true; |
| 121 // Force the main request to be cancelled, since if we're a full-frame plugin |
| 122 // there could be other references to the loader. |
| 123 loader_.Close(); |
| 124 loader_ = pp::URLLoader(); |
| 125 // Download file header. |
| 126 header_request_ = true; |
| 127 RequestData(0, std::min(GetRequestSize(), document_size_)); |
| 128 } |
| 129 |
| 130 void DocumentLoader::LoadFullDocument() { |
| 131 partial_document_ = false; |
| 132 chunk_buffer_.clear(); |
| 133 ReadMore(); |
| 134 } |
| 135 |
| 136 bool DocumentLoader::IsDocumentComplete() const { |
| 137 if (document_size_ == 0) // Document size unknown. |
| 138 return false; |
| 139 return IsDataAvailable(0, document_size_); |
| 140 } |
| 141 |
| 142 uint32 DocumentLoader::GetAvailableData() const { |
| 143 if (document_size_ == 0) { // If document size is unknown. |
| 144 return current_pos_; |
| 145 } |
| 146 |
| 147 std::vector<std::pair<size_t, size_t> > ranges; |
| 148 chunk_stream_.GetMissedRanges(0, document_size_, &ranges); |
| 149 uint32 available = document_size_; |
| 150 std::vector<std::pair<size_t, size_t> >::iterator it; |
| 151 for (it = ranges.begin(); it != ranges.end(); ++it) { |
| 152 available -= it->second; |
| 153 } |
| 154 return available; |
| 155 } |
| 156 |
| 157 void DocumentLoader::ClearPendingRequests() { |
| 158 // The first item in the queue is pending (need to keep it in the queue). |
| 159 if (pending_requests_.size() > 1) { |
| 160 // Remove all elements except the first one. |
| 161 pending_requests_.erase(++pending_requests_.begin(), |
| 162 pending_requests_.end()); |
| 163 } |
| 164 } |
| 165 |
| 166 bool DocumentLoader::GetBlock(uint32 position, uint32 size, void* buf) const { |
| 167 return chunk_stream_.ReadData(position, size, buf); |
| 168 } |
| 169 |
| 170 bool DocumentLoader::IsDataAvailable(uint32 position, uint32 size) const { |
| 171 return chunk_stream_.IsRangeAvailable(position, size); |
| 172 } |
| 173 |
| 174 void DocumentLoader::RequestData(uint32 position, uint32 size) { |
| 175 DCHECK(partial_document_); |
| 176 |
| 177 // We have some artefact request from |
| 178 // PDFiumEngine::OnDocumentComplete() -> FPDFAvail_IsPageAvail after |
| 179 // document is complete. |
| 180 // We need this fix in PDFIum. Adding this as a work around. |
| 181 // Bug: http://code.google.com/p/chromium/issues/detail?id=79996 |
| 182 // Test url: |
| 183 // http://www.icann.org/en/correspondence/holtzman-to-jeffrey-02mar11-en.pdf |
| 184 if (IsDocumentComplete()) |
| 185 return; |
| 186 |
| 187 pending_requests_.push_back(std::pair<size_t, size_t>(position, size)); |
| 188 DownloadPendingRequests(); |
| 189 } |
| 190 |
| 191 void DocumentLoader::DownloadPendingRequests() { |
| 192 if (request_pending_ || pending_requests_.empty()) |
| 193 return; |
| 194 |
| 195 // Remove already completed requests. |
| 196 // By design DownloadPendingRequests() should have at least 1 request in the |
| 197 // queue. ReadComplete() will remove the last pending comment from the queue. |
| 198 while (pending_requests_.size() > 1) { |
| 199 if (IsDataAvailable(pending_requests_.front().first, |
| 200 pending_requests_.front().second)) { |
| 201 pending_requests_.pop_front(); |
| 202 } else { |
| 203 break; |
| 204 } |
| 205 } |
| 206 |
| 207 uint32 pos = pending_requests_.front().first; |
| 208 uint32 size = pending_requests_.front().second; |
| 209 if (IsDataAvailable(pos, size)) { |
| 210 ReadComplete(); |
| 211 return; |
| 212 } |
| 213 |
| 214 // If current request has been partially downloaded already, split it into |
| 215 // a few smaller requests. |
| 216 std::vector<std::pair<size_t, size_t> > ranges; |
| 217 chunk_stream_.GetMissedRanges(pos, size, &ranges); |
| 218 if (ranges.size() > 0) { |
| 219 pending_requests_.pop_front(); |
| 220 pending_requests_.insert(pending_requests_.begin(), |
| 221 ranges.begin(), ranges.end()); |
| 222 pos = pending_requests_.front().first; |
| 223 size = pending_requests_.front().second; |
| 224 } |
| 225 |
| 226 uint32 cur_request_size = GetRequestSize(); |
| 227 // If size is less than default request, try to expand download range for |
| 228 // more optimal download. |
| 229 if (size < cur_request_size && partial_document_) { |
| 230 // First, try to expand block towards the end of the file. |
| 231 uint32 new_pos = pos; |
| 232 uint32 new_size = cur_request_size; |
| 233 if (pos + new_size > document_size_) |
| 234 new_size = document_size_ - pos; |
| 235 |
| 236 std::vector<std::pair<size_t, size_t> > ranges; |
| 237 if (chunk_stream_.GetMissedRanges(new_pos, new_size, &ranges)) { |
| 238 new_pos = ranges[0].first; |
| 239 new_size = ranges[0].second; |
| 240 } |
| 241 |
| 242 // Second, try to expand block towards the beginning of the file. |
| 243 if (new_size < cur_request_size) { |
| 244 uint32 block_end = new_pos + new_size; |
| 245 if (block_end > cur_request_size) { |
| 246 new_pos = block_end - cur_request_size; |
| 247 } else { |
| 248 new_pos = 0; |
| 249 } |
| 250 new_size = block_end - new_pos; |
| 251 |
| 252 if (chunk_stream_.GetMissedRanges(new_pos, new_size, &ranges)) { |
| 253 new_pos = ranges.back().first; |
| 254 new_size = ranges.back().second; |
| 255 } |
| 256 } |
| 257 pos = new_pos; |
| 258 size = new_size; |
| 259 } |
| 260 |
| 261 size_t last_byte_before = chunk_stream_.GetLastByteBefore(pos); |
| 262 size_t first_byte_after = chunk_stream_.GetFirstByteAfter(pos + size - 1); |
| 263 if (pos - last_byte_before < cur_request_size) { |
| 264 size = pos + size - last_byte_before; |
| 265 pos = last_byte_before; |
| 266 } |
| 267 |
| 268 if ((pos + size < first_byte_after) && |
| 269 (pos + size + cur_request_size >= first_byte_after)) |
| 270 size = first_byte_after - pos; |
| 271 |
| 272 request_pending_ = true; |
| 273 |
| 274 // Start downloading first pending request. |
| 275 loader_.Close(); |
| 276 loader_ = client_->CreateURLLoader(); |
| 277 pp::CompletionCallback callback = |
| 278 loader_factory_.NewCallback(&DocumentLoader::DidOpen); |
| 279 pp::URLRequestInfo request = GetRequest(pos, size); |
| 280 requests_count_++; |
| 281 int rv = loader_.Open(request, callback); |
| 282 if (rv != PP_OK_COMPLETIONPENDING) |
| 283 callback.Run(rv); |
| 284 } |
| 285 |
| 286 pp::URLRequestInfo DocumentLoader::GetRequest(uint32 position, |
| 287 uint32 size) const { |
| 288 pp::URLRequestInfo request(client_->GetPluginInstance()); |
| 289 request.SetURL(url_.c_str()); |
| 290 request.SetMethod("GET"); |
| 291 request.SetFollowRedirects(true); |
| 292 |
| 293 const size_t kBufSize = 100; |
| 294 char buf[kBufSize]; |
| 295 // According to rfc2616, byte range specifies position of the first and last |
| 296 // bytes in the requested range inclusively. Therefore we should subtract 1 |
| 297 // from the position + size, to get index of the last byte that needs to be |
| 298 // downloaded. |
| 299 base::snprintf(buf, kBufSize, "Range: bytes=%d-%d", position, |
| 300 position + size - 1); |
| 301 pp::Var header(buf); |
| 302 request.SetHeaders(header); |
| 303 |
| 304 return request; |
| 305 } |
| 306 |
| 307 void DocumentLoader::DidOpen(int32_t result) { |
| 308 if (result != PP_OK) { |
| 309 NOTREACHED(); |
| 310 return; |
| 311 } |
| 312 |
| 313 is_multipart_ = false; |
| 314 current_chunk_size_ = 0; |
| 315 current_chunk_read_ = 0; |
| 316 |
| 317 pp::Var headers_var = loader_.GetResponseInfo().GetHeaders(); |
| 318 std::string headers; |
| 319 if (headers_var.is_string()) |
| 320 headers = headers_var.AsString(); |
| 321 |
| 322 std::string boundary = GetMultiPartBoundary(headers); |
| 323 if (boundary.size()) { |
| 324 // Leave position untouched for now, when we read the data we'll get it. |
| 325 is_multipart_ = true; |
| 326 multipart_boundary_ = boundary; |
| 327 } else { |
| 328 // Need to make sure that the server returned a byte-range, since it's |
| 329 // possible for a server to just ignore our bye-range request and just |
| 330 // return the entire document even if it supports byte-range requests. |
| 331 // i.e. sniff response to |
| 332 // http://www.act.org/compass/sample/pdf/geometry.pdf |
| 333 current_pos_ = 0; |
| 334 uint32 start_pos, end_pos; |
| 335 if (GetByteRange(headers, &start_pos, &end_pos)) { |
| 336 current_pos_ = start_pos; |
| 337 if (end_pos && end_pos > start_pos) |
| 338 current_chunk_size_ = end_pos - start_pos + 1; |
| 339 } |
| 340 } |
| 341 |
| 342 ReadMore(); |
| 343 } |
| 344 |
| 345 bool DocumentLoader::GetByteRange(const std::string& headers, uint32* start, |
| 346 uint32* end) { |
| 347 net::HttpUtil::HeadersIterator it(headers.begin(), headers.end(), "\n"); |
| 348 while (it.GetNext()) { |
| 349 if (LowerCaseEqualsASCII(it.name(), "content-range")) { |
| 350 std::string range = it.values().c_str(); |
| 351 if (StartsWithASCII(range, "bytes", false)) { |
| 352 range = range.substr(strlen("bytes")); |
| 353 std::string::size_type pos = range.find('-'); |
| 354 std::string range_end; |
| 355 if (pos != std::string::npos) |
| 356 range_end = range.substr(pos + 1); |
| 357 TrimWhitespaceASCII(range, base::TRIM_LEADING, &range); |
| 358 TrimWhitespaceASCII(range_end, base::TRIM_LEADING, &range_end); |
| 359 *start = atoi(range.c_str()); |
| 360 *end = atoi(range_end.c_str()); |
| 361 return true; |
| 362 } |
| 363 } |
| 364 } |
| 365 return false; |
| 366 } |
| 367 |
| 368 std::string DocumentLoader::GetMultiPartBoundary(const std::string& headers) { |
| 369 net::HttpUtil::HeadersIterator it(headers.begin(), headers.end(), "\n"); |
| 370 while (it.GetNext()) { |
| 371 if (LowerCaseEqualsASCII(it.name(), "content-type")) { |
| 372 std::string type = StringToLowerASCII(it.values()); |
| 373 if (StartsWithASCII(type, "multipart/", true)) { |
| 374 const char* boundary = strstr(type.c_str(), "boundary="); |
| 375 if (!boundary) { |
| 376 NOTREACHED(); |
| 377 break; |
| 378 } |
| 379 |
| 380 return std::string(boundary + 9); |
| 381 } |
| 382 } |
| 383 } |
| 384 return std::string(); |
| 385 } |
| 386 |
| 387 void DocumentLoader::ReadMore() { |
| 388 pp::CompletionCallback callback = |
| 389 loader_factory_.NewCallback(&DocumentLoader::DidRead); |
| 390 int rv = loader_.ReadResponseBody(buffer_, sizeof(buffer_), callback); |
| 391 if (rv != PP_OK_COMPLETIONPENDING) |
| 392 callback.Run(rv); |
| 393 } |
| 394 |
| 395 void DocumentLoader::DidRead(int32_t result) { |
| 396 if (result > 0) { |
| 397 char* start = buffer_; |
| 398 size_t length = result; |
| 399 if (is_multipart_ && result > 2) { |
| 400 for (int i = 2; i < result; ++i) { |
| 401 if ((buffer_[i - 1] == '\n' && buffer_[i - 2] == '\n') || |
| 402 (i >= 4 && |
| 403 buffer_[i - 1] == '\n' && buffer_[i - 2] == '\r' && |
| 404 buffer_[i - 3] == '\n' && buffer_[i - 4] == '\r')) { |
| 405 uint32 start_pos, end_pos; |
| 406 if (GetByteRange(std::string(buffer_, i), &start_pos, &end_pos)) { |
| 407 current_pos_ = start_pos; |
| 408 start += i; |
| 409 length -= i; |
| 410 if (end_pos && end_pos > start_pos) |
| 411 current_chunk_size_ = end_pos - start_pos + 1; |
| 412 } |
| 413 break; |
| 414 } |
| 415 } |
| 416 |
| 417 // Reset this flag so we don't look inside the buffer in future calls of |
| 418 // DidRead for this response. Note that this code DOES NOT handle multi- |
| 419 // part responses with more than one part (we don't issue them at the |
| 420 // moment, so they shouldn't arrive). |
| 421 is_multipart_ = false; |
| 422 } |
| 423 |
| 424 if (current_chunk_size_ && |
| 425 current_chunk_read_ + length > current_chunk_size_) |
| 426 length = current_chunk_size_ - current_chunk_read_; |
| 427 |
| 428 if (length) { |
| 429 if (document_size_ > 0) { |
| 430 chunk_stream_.WriteData(current_pos_, start, length); |
| 431 } else { |
| 432 // If we did not get content-length in the response, we can't |
| 433 // preallocate buffer for the entire document. Resizing array causing |
| 434 // memory fragmentation issues on the large files and OOM exceptions. |
| 435 // To fix this, we collect all chunks of the file to the list and |
| 436 // concatenate them together after request is complete. |
| 437 chunk_buffer_.push_back(std::vector<unsigned char>()); |
| 438 chunk_buffer_.back().resize(length); |
| 439 memcpy(&(chunk_buffer_.back()[0]), start, length); |
| 440 } |
| 441 current_pos_ += length; |
| 442 current_chunk_read_ += length; |
| 443 client_->OnNewDataAvailable(); |
| 444 } |
| 445 ReadMore(); |
| 446 } else if (result == PP_OK) { |
| 447 ReadComplete(); |
| 448 } else { |
| 449 NOTREACHED(); |
| 450 } |
| 451 } |
| 452 |
| 453 void DocumentLoader::ReadComplete() { |
| 454 if (!partial_document_) { |
| 455 if (document_size_ == 0) { |
| 456 // For the document with no 'content-length" specified we've collected all |
| 457 // the chunks already. Let's allocate final document buffer and copy them |
| 458 // over. |
| 459 chunk_stream_.Preallocate(current_pos_); |
| 460 uint32 pos = 0; |
| 461 std::list<std::vector<unsigned char> >::iterator it; |
| 462 for (it = chunk_buffer_.begin(); it != chunk_buffer_.end(); ++it) { |
| 463 chunk_stream_.WriteData(pos, &((*it)[0]), it->size()); |
| 464 pos += it->size(); |
| 465 } |
| 466 chunk_buffer_.clear(); |
| 467 } |
| 468 document_size_ = current_pos_; |
| 469 client_->OnDocumentComplete(); |
| 470 return; |
| 471 } |
| 472 |
| 473 request_pending_ = false; |
| 474 pending_requests_.pop_front(); |
| 475 |
| 476 // If there are more pending request - continue downloading. |
| 477 if (!pending_requests_.empty()) { |
| 478 DownloadPendingRequests(); |
| 479 return; |
| 480 } |
| 481 |
| 482 if (IsDocumentComplete()) { |
| 483 client_->OnDocumentComplete(); |
| 484 return; |
| 485 } |
| 486 |
| 487 if (header_request_) |
| 488 client_->OnPartialDocumentLoaded(); |
| 489 else |
| 490 client_->OnPendingRequestComplete(); |
| 491 header_request_ = false; |
| 492 |
| 493 // The OnPendingRequestComplete could have added more requests. |
| 494 if (!pending_requests_.empty()) { |
| 495 DownloadPendingRequests(); |
| 496 } else { |
| 497 // Document is not complete and we have no outstanding requests. |
| 498 // Let's keep downloading PDF file in small chunks. |
| 499 uint32 pos = chunk_stream_.GetFirstMissingByte(); |
| 500 std::vector<std::pair<size_t, size_t> > ranges; |
| 501 chunk_stream_.GetMissedRanges(pos, GetRequestSize(), &ranges); |
| 502 DCHECK(ranges.size() > 0); |
| 503 RequestData(ranges[0].first, ranges[0].second); |
| 504 } |
| 505 } |
| 506 |
| 507 uint32 DocumentLoader::GetRequestSize() const { |
| 508 std::map<uint32, uint32>::const_iterator iter = |
| 509 chunk_size_table_.lower_bound(requests_count_); |
| 510 if (iter == chunk_size_table_.end()) |
| 511 iter--; |
| 512 return iter->second; |
| 513 } |
| 514 |
| 515 } // namespace chrome_pdf |
OLD | NEW |