| OLD | NEW |
| 1 // Copyright (c) 2010 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2010 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "pdf/document_loader.h" | 5 #include "pdf/document_loader.h" |
| 6 | 6 |
| 7 #include <stddef.h> | 7 #include <stddef.h> |
| 8 #include <stdint.h> | 8 #include <stdint.h> |
| 9 | 9 |
| 10 #include <algorithm> | |
| 11 | |
| 12 #include "base/logging.h" | 10 #include "base/logging.h" |
| 13 #include "base/memory/ptr_util.h" | |
| 14 #include "base/numerics/safe_math.h" | |
| 15 #include "base/strings/string_util.h" | 11 #include "base/strings/string_util.h" |
| 16 #include "pdf/url_loader_wrapper.h" | 12 #include "net/http/http_util.h" |
| 17 #include "ppapi/c/pp_errors.h" | 13 #include "ppapi/c/pp_errors.h" |
| 18 #include "ui/gfx/range/range.h" | 14 #include "ppapi/cpp/url_loader.h" |
| 15 #include "ppapi/cpp/url_request_info.h" |
| 16 #include "ppapi/cpp/url_response_info.h" |
| 19 | 17 |
| 20 namespace chrome_pdf { | 18 namespace chrome_pdf { |
| 21 | 19 |
| 22 namespace { | 20 namespace { |
| 23 | 21 |
| 24 // The distance from last received chunk, when we wait requesting data, using | 22 // If the headers have a byte-range response, writes the start and end |
| 25 // current connection (like playing a cassette tape) and do not send new range | 23 // positions and returns true if at least the start position was parsed. |
| 26 // request (like rewind a cassette tape, and continue playing after). | 24 // The end position will be set to 0 if it was not found or parsed from the |
| 27 // Experimentally chosen value. | 25 // response. |
| 28 const int kChunkCloseDistance = 10; | 26 // Returns false if not even a start position could be parsed. |
| 27 bool GetByteRange(const std::string& headers, uint32_t* start, uint32_t* end) { |
| 28 net::HttpUtil::HeadersIterator it(headers.begin(), headers.end(), "\n"); |
| 29 while (it.GetNext()) { |
| 30 if (base::LowerCaseEqualsASCII(it.name(), "content-range")) { |
| 31 std::string range = it.values().c_str(); |
| 32 if (base::StartsWith(range, "bytes", |
| 33 base::CompareCase::INSENSITIVE_ASCII)) { |
| 34 range = range.substr(strlen("bytes")); |
| 35 std::string::size_type pos = range.find('-'); |
| 36 std::string range_end; |
| 37 if (pos != std::string::npos) |
| 38 range_end = range.substr(pos + 1); |
| 39 base::TrimWhitespaceASCII(range, base::TRIM_LEADING, &range); |
| 40 base::TrimWhitespaceASCII(range_end, base::TRIM_LEADING, &range_end); |
| 41 *start = atoi(range.c_str()); |
| 42 *end = atoi(range_end.c_str()); |
| 43 return true; |
| 44 } |
| 45 } |
| 46 } |
| 47 return false; |
| 48 } |
| 49 |
| 50 // If the headers have a multi-part response, returns the boundary name. |
| 51 // Otherwise returns an empty string. |
| 52 std::string GetMultiPartBoundary(const std::string& headers) { |
| 53 net::HttpUtil::HeadersIterator it(headers.begin(), headers.end(), "\n"); |
| 54 while (it.GetNext()) { |
| 55 if (base::LowerCaseEqualsASCII(it.name(), "content-type")) { |
| 56 std::string type = base::ToLowerASCII(it.values()); |
| 57 if (base::StartsWith(type, "multipart/", base::CompareCase::SENSITIVE)) { |
| 58 const char* boundary = strstr(type.c_str(), "boundary="); |
| 59 if (!boundary) { |
| 60 NOTREACHED(); |
| 61 break; |
| 62 } |
| 63 |
| 64 return std::string(boundary + 9); |
| 65 } |
| 66 } |
| 67 } |
| 68 return std::string(); |
| 69 } |
| 29 | 70 |
| 30 bool IsValidContentType(const std::string& type) { | 71 bool IsValidContentType(const std::string& type) { |
| 31 return (base::EndsWith(type, "/pdf", base::CompareCase::INSENSITIVE_ASCII) || | 72 return (base::EndsWith(type, "/pdf", base::CompareCase::INSENSITIVE_ASCII) || |
| 32 base::EndsWith(type, ".pdf", base::CompareCase::INSENSITIVE_ASCII) || | 73 base::EndsWith(type, ".pdf", base::CompareCase::INSENSITIVE_ASCII) || |
| 33 base::EndsWith(type, "/x-pdf", | 74 base::EndsWith(type, "/x-pdf", |
| 34 base::CompareCase::INSENSITIVE_ASCII) || | 75 base::CompareCase::INSENSITIVE_ASCII) || |
| 35 base::EndsWith(type, "/*", base::CompareCase::INSENSITIVE_ASCII) || | 76 base::EndsWith(type, "/*", base::CompareCase::INSENSITIVE_ASCII) || |
| 36 base::EndsWith(type, "/acrobat", | 77 base::EndsWith(type, "/acrobat", |
| 37 base::CompareCase::INSENSITIVE_ASCII) || | 78 base::CompareCase::INSENSITIVE_ASCII) || |
| 38 base::EndsWith(type, "/unknown", | 79 base::EndsWith(type, "/unknown", |
| 39 base::CompareCase::INSENSITIVE_ASCII)); | 80 base::CompareCase::INSENSITIVE_ASCII)); |
| 40 } | 81 } |
| 41 | 82 |
| 42 } // namespace | 83 } // namespace |
| 43 | 84 |
| 44 DocumentLoader::Client::~Client() { | 85 DocumentLoader::Client::~Client() { |
| 45 } | 86 } |
| 46 | 87 |
| 47 DocumentLoader::Chunk::Chunk() {} | 88 DocumentLoader::DocumentLoader(Client* client) |
| 48 | 89 : client_(client), partial_document_(false), request_pending_(false), |
| 49 DocumentLoader::Chunk::~Chunk() {} | 90 current_pos_(0), current_chunk_size_(0), current_chunk_read_(0), |
| 50 | 91 document_size_(0), header_request_(true), is_multipart_(false) { |
| 51 void DocumentLoader::Chunk::Clear() { | 92 loader_factory_.Initialize(this); |
| 52 chunk_index = 0; | |
| 53 data_size = 0; | |
| 54 chunk_data.reset(); | |
| 55 } | 93 } |
| 56 | 94 |
| 57 DocumentLoader::DocumentLoader(Client* client) | |
| 58 : client_(client), loader_factory_(this) {} | |
| 59 | |
| 60 DocumentLoader::~DocumentLoader() { | 95 DocumentLoader::~DocumentLoader() { |
| 61 } | 96 } |
| 62 | 97 |
| 63 bool DocumentLoader::Init(std::unique_ptr<URLLoaderWrapper> loader, | 98 bool DocumentLoader::Init(const pp::URLLoader& loader, |
| 64 const std::string& url) { | 99 const std::string& url, |
| 100 const std::string& headers) { |
| 65 DCHECK(url_.empty()); | 101 DCHECK(url_.empty()); |
| 66 DCHECK(!loader_); | 102 url_ = url; |
| 103 loader_ = loader; |
| 67 | 104 |
| 68 std::string type = loader->GetContentType(); | 105 std::string response_headers; |
| 106 if (!headers.empty()) { |
| 107 response_headers = headers; |
| 108 } else { |
| 109 pp::URLResponseInfo response = loader_.GetResponseInfo(); |
| 110 pp::Var headers_var = response.GetHeaders(); |
| 111 |
| 112 if (headers_var.is_string()) { |
| 113 response_headers = headers_var.AsString(); |
| 114 } |
| 115 } |
| 116 |
| 117 bool accept_ranges_bytes = false; |
| 118 bool content_encoded = false; |
| 119 uint32_t content_length = 0; |
| 120 std::string type; |
| 121 std::string disposition; |
| 69 | 122 |
| 70 // This happens for PDFs not loaded from http(s) sources. | 123 // This happens for PDFs not loaded from http(s) sources. |
| 71 if (type == "text/plain") { | 124 if (response_headers == "Content-Type: text/plain") { |
| 72 if (!base::StartsWith(url, "http://", | 125 if (!base::StartsWith(url, "http://", |
| 73 base::CompareCase::INSENSITIVE_ASCII) && | 126 base::CompareCase::INSENSITIVE_ASCII) && |
| 74 !base::StartsWith(url, "https://", | 127 !base::StartsWith(url, "https://", |
| 75 base::CompareCase::INSENSITIVE_ASCII)) { | 128 base::CompareCase::INSENSITIVE_ASCII)) { |
| 76 type = "application/pdf"; | 129 type = "application/pdf"; |
| 77 } | 130 } |
| 78 } | 131 } |
| 132 if (type.empty() && !response_headers.empty()) { |
| 133 net::HttpUtil::HeadersIterator it(response_headers.begin(), |
| 134 response_headers.end(), "\n"); |
| 135 while (it.GetNext()) { |
| 136 if (base::LowerCaseEqualsASCII(it.name(), "content-length")) { |
| 137 content_length = atoi(it.values().c_str()); |
| 138 } else if (base::LowerCaseEqualsASCII(it.name(), "accept-ranges")) { |
| 139 accept_ranges_bytes = base::LowerCaseEqualsASCII(it.values(), "bytes"); |
| 140 } else if (base::LowerCaseEqualsASCII(it.name(), "content-encoding")) { |
| 141 content_encoded = true; |
| 142 } else if (base::LowerCaseEqualsASCII(it.name(), "content-type")) { |
| 143 type = it.values(); |
| 144 size_t semi_colon_pos = type.find(';'); |
| 145 if (semi_colon_pos != std::string::npos) { |
| 146 type = type.substr(0, semi_colon_pos); |
| 147 } |
| 148 TrimWhitespaceASCII(type, base::TRIM_ALL, &type); |
| 149 } else if (base::LowerCaseEqualsASCII(it.name(), "content-disposition")) { |
| 150 disposition = it.values(); |
| 151 } |
| 152 } |
| 153 } |
| 79 if (!type.empty() && !IsValidContentType(type)) | 154 if (!type.empty() && !IsValidContentType(type)) |
| 80 return false; | 155 return false; |
| 81 | 156 if (base::StartsWith(disposition, "attachment", |
| 82 if (base::StartsWith(loader->GetContentDisposition(), "attachment", | |
| 83 base::CompareCase::INSENSITIVE_ASCII)) | 157 base::CompareCase::INSENSITIVE_ASCII)) |
| 84 return false; | 158 return false; |
| 85 | 159 |
| 86 url_ = url; | 160 if (content_length > 0) |
| 87 loader_ = std::move(loader); | 161 chunk_stream_.Preallocate(content_length); |
| 88 | 162 |
| 89 if (!loader_->IsContentEncoded()) { | 163 document_size_ = content_length; |
| 90 chunk_stream_.set_eof_pos(std::max(0, loader_->GetContentLength())); | 164 requests_count_ = 0; |
| 165 |
| 166 // Enable partial loading only if file size is above the threshold. |
| 167 // It will allow avoiding latency for multiple requests. |
| 168 if (content_length > kMinFileSize && |
| 169 accept_ranges_bytes && |
| 170 !content_encoded) { |
| 171 LoadPartialDocument(); |
| 172 } else { |
| 173 LoadFullDocument(); |
| 91 } | 174 } |
| 92 int64_t bytes_received = 0; | |
| 93 int64_t total_bytes_to_be_received = 0; | |
| 94 if (!chunk_stream_.eof_pos() && | |
| 95 loader_->GetDownloadProgress(&bytes_received, | |
| 96 &total_bytes_to_be_received)) { | |
| 97 chunk_stream_.set_eof_pos( | |
| 98 std::max(0, static_cast<int>(total_bytes_to_be_received))); | |
| 99 } | |
| 100 | |
| 101 SetPartialLoadingEnabled( | |
| 102 partial_loading_enabled_ && | |
| 103 !base::StartsWith(url, "file://", base::CompareCase::INSENSITIVE_ASCII) && | |
| 104 loader_->IsAcceptRangesBytes() && !loader_->IsContentEncoded() && | |
| 105 GetDocumentSize()); | |
| 106 | |
| 107 ReadMore(); | |
| 108 return true; | 175 return true; |
| 109 } | 176 } |
| 110 | 177 |
| 111 bool DocumentLoader::IsDocumentComplete() const { | 178 void DocumentLoader::LoadPartialDocument() { |
| 112 return chunk_stream_.IsComplete(); | 179 // The current request is a full request (not a range request) so it starts at |
| 180 // 0 and ends at |document_size_|. |
| 181 current_chunk_size_ = document_size_; |
| 182 current_pos_ = 0; |
| 183 current_request_offset_ = 0; |
| 184 current_request_size_ = 0; |
| 185 current_request_extended_size_ = document_size_; |
| 186 request_pending_ = true; |
| 187 |
| 188 partial_document_ = true; |
| 189 header_request_ = true; |
| 190 ReadMore(); |
| 113 } | 191 } |
| 114 | 192 |
| 115 uint32_t DocumentLoader::GetDocumentSize() const { | 193 void DocumentLoader::LoadFullDocument() { |
| 116 return chunk_stream_.eof_pos(); | 194 partial_document_ = false; |
| 195 chunk_buffer_.clear(); |
| 196 ReadMore(); |
| 197 } |
| 198 |
| 199 bool DocumentLoader::IsDocumentComplete() const { |
| 200 if (document_size_ == 0) // Document size unknown. |
| 201 return false; |
| 202 return IsDataAvailable(0, document_size_); |
| 203 } |
| 204 |
| 205 uint32_t DocumentLoader::GetAvailableData() const { |
| 206 if (document_size_ == 0) { // If document size is unknown. |
| 207 return current_pos_; |
| 208 } |
| 209 |
| 210 std::vector<std::pair<size_t, size_t> > ranges; |
| 211 chunk_stream_.GetMissedRanges(0, document_size_, &ranges); |
| 212 uint32_t available = document_size_; |
| 213 for (const auto& range : ranges) |
| 214 available -= range.second; |
| 215 return available; |
| 117 } | 216 } |
| 118 | 217 |
| 119 void DocumentLoader::ClearPendingRequests() { | 218 void DocumentLoader::ClearPendingRequests() { |
| 120 pending_requests_.Clear(); | 219 pending_requests_.erase(pending_requests_.begin(), |
| 220 pending_requests_.end()); |
| 121 } | 221 } |
| 122 | 222 |
| 123 bool DocumentLoader::GetBlock(uint32_t position, | 223 bool DocumentLoader::GetBlock(uint32_t position, |
| 124 uint32_t size, | 224 uint32_t size, |
| 125 void* buf) const { | 225 void* buf) const { |
| 126 base::CheckedNumeric<uint32_t> addition_result = position; | 226 return chunk_stream_.ReadData(position, size, buf); |
| 127 addition_result += size; | |
| 128 if (!addition_result.IsValid()) | |
| 129 return false; | |
| 130 return chunk_stream_.ReadData( | |
| 131 gfx::Range(position, addition_result.ValueOrDie()), buf); | |
| 132 } | 227 } |
| 133 | 228 |
| 134 bool DocumentLoader::IsDataAvailable(uint32_t position, uint32_t size) const { | 229 bool DocumentLoader::IsDataAvailable(uint32_t position, uint32_t size) const { |
| 135 base::CheckedNumeric<uint32_t> addition_result = position; | 230 return chunk_stream_.IsRangeAvailable(position, size); |
| 136 addition_result += size; | |
| 137 if (!addition_result.IsValid()) | |
| 138 return false; | |
| 139 return chunk_stream_.IsRangeAvailable( | |
| 140 gfx::Range(position, addition_result.ValueOrDie())); | |
| 141 } | 231 } |
| 142 | 232 |
| 143 void DocumentLoader::RequestData(uint32_t position, uint32_t size) { | 233 void DocumentLoader::RequestData(uint32_t position, uint32_t size) { |
| 144 if (!size || IsDataAvailable(position, size)) { | 234 DCHECK(partial_document_); |
| 145 return; | |
| 146 } | |
| 147 { | |
| 148 // Check integer overflow. | |
| 149 base::CheckedNumeric<uint32_t> addition_result = position; | |
| 150 addition_result += size; | |
| 151 if (!addition_result.IsValid()) | |
| 152 return; | |
| 153 } | |
| 154 | |
| 155 if (GetDocumentSize() && (position + size > GetDocumentSize())) { | |
| 156 return; | |
| 157 } | |
| 158 | 235 |
| 159 // We have some artefact request from | 236 // We have some artefact request from |
| 160 // PDFiumEngine::OnDocumentComplete() -> FPDFAvail_IsPageAvail after | 237 // PDFiumEngine::OnDocumentComplete() -> FPDFAvail_IsPageAvail after |
| 161 // document is complete. | 238 // document is complete. |
| 162 // We need this fix in PDFIum. Adding this as a work around. | 239 // We need this fix in PDFIum. Adding this as a work around. |
| 163 // Bug: http://code.google.com/p/chromium/issues/detail?id=79996 | 240 // Bug: http://code.google.com/p/chromium/issues/detail?id=79996 |
| 164 // Test url: | 241 // Test url: |
| 165 // http://www.icann.org/en/correspondence/holtzman-to-jeffrey-02mar11-en.pdf | 242 // http://www.icann.org/en/correspondence/holtzman-to-jeffrey-02mar11-en.pdf |
| 166 if (!loader_) | 243 if (IsDocumentComplete()) |
| 167 return; | 244 return; |
| 168 | 245 |
| 169 RangeSet requested_chunks(chunk_stream_.GetChunksRange(position, size)); | 246 pending_requests_.push_back(std::pair<size_t, size_t>(position, size)); |
| 170 requested_chunks.Subtract(chunk_stream_.filled_chunks()); | 247 DownloadPendingRequests(); |
| 171 if (requested_chunks.IsEmpty()) { | 248 } |
| 249 |
| 250 void DocumentLoader::RemoveCompletedRanges() { |
| 251 // Split every request that has been partially downloaded already into smaller |
| 252 // requests. |
| 253 std::vector<std::pair<size_t, size_t> > ranges; |
| 254 auto it = pending_requests_.begin(); |
| 255 while (it != pending_requests_.end()) { |
| 256 chunk_stream_.GetMissedRanges(it->first, it->second, &ranges); |
| 257 pending_requests_.insert(it, ranges.begin(), ranges.end()); |
| 258 ranges.clear(); |
| 259 pending_requests_.erase(it++); |
| 260 } |
| 261 } |
| 262 |
| 263 void DocumentLoader::DownloadPendingRequests() { |
| 264 if (request_pending_) |
| 265 return; |
| 266 |
| 267 uint32_t pos; |
| 268 uint32_t size; |
| 269 if (pending_requests_.empty()) { |
| 270 // If the document is not complete and we have no outstanding requests, |
| 271 // download what's left for as long as no other request gets added to |
| 272 // |pending_requests_|. |
| 273 pos = chunk_stream_.GetFirstMissingByte(); |
| 274 if (pos >= document_size_) { |
| 275 // We're done downloading the document. |
| 276 return; |
| 277 } |
| 278 // Start with size 0, we'll set |current_request_extended_size_| to > 0. |
| 279 // This way this request will get cancelled as soon as the renderer wants |
| 280 // another portion of the document. |
| 281 size = 0; |
| 282 } else { |
| 283 RemoveCompletedRanges(); |
| 284 |
| 285 pos = pending_requests_.front().first; |
| 286 size = pending_requests_.front().second; |
| 287 if (IsDataAvailable(pos, size)) { |
| 288 ReadComplete(); |
| 289 return; |
| 290 } |
| 291 } |
| 292 |
| 293 size_t last_byte_before = chunk_stream_.GetFirstMissingByteInInterval(pos); |
| 294 if (size < kDefaultRequestSize) { |
| 295 // Try to extend before pos, up to size |kDefaultRequestSize|. |
| 296 if (pos + size - last_byte_before > kDefaultRequestSize) { |
| 297 pos += size - kDefaultRequestSize; |
| 298 size = kDefaultRequestSize; |
| 299 } else { |
| 300 size += pos - last_byte_before; |
| 301 pos = last_byte_before; |
| 302 } |
| 303 } |
| 304 if (pos - last_byte_before < kDefaultRequestSize) { |
| 305 // Don't leave a gap smaller than |kDefaultRequestSize|. |
| 306 size += pos - last_byte_before; |
| 307 pos = last_byte_before; |
| 308 } |
| 309 |
| 310 current_request_offset_ = pos; |
| 311 current_request_size_ = size; |
| 312 |
| 313 // Extend the request until the next downloaded byte or the end of the |
| 314 // document. |
| 315 size_t last_missing_byte = |
| 316 chunk_stream_.GetLastMissingByteInInterval(pos + size - 1); |
| 317 current_request_extended_size_ = last_missing_byte - pos + 1; |
| 318 |
| 319 request_pending_ = true; |
| 320 |
| 321 // Start downloading first pending request. |
| 322 loader_.Close(); |
| 323 loader_ = client_->CreateURLLoader(); |
| 324 pp::CompletionCallback callback = |
| 325 loader_factory_.NewCallback(&DocumentLoader::DidOpen); |
| 326 pp::URLRequestInfo request = GetRequest(pos, current_request_extended_size_); |
| 327 requests_count_++; |
| 328 int rv = loader_.Open(request, callback); |
| 329 if (rv != PP_OK_COMPLETIONPENDING) |
| 330 callback.Run(rv); |
| 331 } |
| 332 |
| 333 pp::URLRequestInfo DocumentLoader::GetRequest(uint32_t position, |
| 334 uint32_t size) const { |
| 335 pp::URLRequestInfo request(client_->GetPluginInstance()); |
| 336 request.SetURL(url_); |
| 337 request.SetMethod("GET"); |
| 338 request.SetFollowRedirects(false); |
| 339 request.SetCustomReferrerURL(url_); |
| 340 |
| 341 const size_t kBufSize = 100; |
| 342 char buf[kBufSize]; |
| 343 // According to rfc2616, byte range specifies position of the first and last |
| 344 // bytes in the requested range inclusively. Therefore we should subtract 1 |
| 345 // from the position + size, to get index of the last byte that needs to be |
| 346 // downloaded. |
| 347 base::snprintf(buf, kBufSize, "Range: bytes=%d-%d", position, |
| 348 position + size - 1); |
| 349 pp::Var header(buf); |
| 350 request.SetHeaders(header); |
| 351 |
| 352 return request; |
| 353 } |
| 354 |
| 355 void DocumentLoader::DidOpen(int32_t result) { |
| 356 if (result != PP_OK) { |
| 172 NOTREACHED(); | 357 NOTREACHED(); |
| 173 return; | 358 return; |
| 174 } | 359 } |
| 175 pending_requests_.Union(requested_chunks); | 360 |
| 176 } | 361 int32_t http_code = loader_.GetResponseInfo().GetStatusCode(); |
| 177 | |
| 178 void DocumentLoader::SetPartialLoadingEnabled(bool enabled) { | |
| 179 partial_loading_enabled_ = enabled; | |
| 180 if (!enabled) { | |
| 181 is_partial_loader_active_ = false; | |
| 182 } | |
| 183 } | |
| 184 | |
| 185 bool DocumentLoader::ShouldCancelLoading() const { | |
| 186 if (!loader_) | |
| 187 return true; | |
| 188 if (!partial_loading_enabled_ || pending_requests_.IsEmpty()) | |
| 189 return false; | |
| 190 const gfx::Range current_range(chunk_.chunk_index, | |
| 191 chunk_.chunk_index + kChunkCloseDistance); | |
| 192 return !pending_requests_.Intersects(current_range); | |
| 193 } | |
| 194 | |
| 195 void DocumentLoader::ContinueDownload() { | |
| 196 if (!ShouldCancelLoading()) | |
| 197 return ReadMore(); | |
| 198 DCHECK(partial_loading_enabled_); | |
| 199 DCHECK(!IsDocumentComplete()); | |
| 200 DCHECK(GetDocumentSize()); | |
| 201 | |
| 202 const uint32_t range_start = | |
| 203 pending_requests_.IsEmpty() ? 0 : pending_requests_.First().start(); | |
| 204 RangeSet candidates_for_request( | |
| 205 gfx::Range(range_start, chunk_stream_.total_chunks_count())); | |
| 206 candidates_for_request.Subtract(chunk_stream_.filled_chunks()); | |
| 207 DCHECK(!candidates_for_request.IsEmpty()); | |
| 208 gfx::Range next_request = candidates_for_request.First(); | |
| 209 if (candidates_for_request.Size() == 1 && | |
| 210 next_request.length() < kChunkCloseDistance) { | |
| 211 // We have only request at the end, try to enlarge it to improve back order | |
| 212 // reading. | |
| 213 const int additional_chunks_count = | |
| 214 kChunkCloseDistance - next_request.length(); | |
| 215 int new_start = std::max( | |
| 216 0, static_cast<int>(next_request.start()) - additional_chunks_count); | |
| 217 candidates_for_request = | |
| 218 RangeSet(gfx::Range(new_start, next_request.end())); | |
| 219 candidates_for_request.Subtract(chunk_stream_.filled_chunks()); | |
| 220 next_request = candidates_for_request.Last(); | |
| 221 } | |
| 222 | |
| 223 loader_.reset(); | |
| 224 chunk_.Clear(); | |
| 225 if (!is_partial_loader_active_) { | |
| 226 client_->CancelBrowserDownload(); | |
| 227 is_partial_loader_active_ = true; | |
| 228 } | |
| 229 | |
| 230 const uint32_t start = next_request.start() * DataStream::kChunkSize; | |
| 231 const uint32_t length = | |
| 232 std::min(chunk_stream_.eof_pos() - start, | |
| 233 next_request.length() * DataStream::kChunkSize); | |
| 234 | |
| 235 loader_ = client_->CreateURLLoader(); | |
| 236 | |
| 237 loader_->OpenRange( | |
| 238 url_, url_, start, length, | |
| 239 loader_factory_.NewCallback(&DocumentLoader::DidOpenPartial)); | |
| 240 } | |
| 241 | |
| 242 void DocumentLoader::DidOpenPartial(int32_t result) { | |
| 243 if (result != PP_OK) { | |
| 244 return ReadComplete(); | |
| 245 } | |
| 246 | |
| 247 int32_t http_code = loader_->GetStatusCode(); | |
| 248 if (http_code >= 400 && http_code < 500) { | 362 if (http_code >= 400 && http_code < 500) { |
| 249 // Error accessing resource. 4xx error indicate subsequent requests | 363 // Error accessing resource. 4xx error indicate subsequent requests |
| 250 // will fail too. | 364 // will fail too. |
| 251 // E.g. resource has been removed from the server while loading it. | 365 // E.g. resource has been removed from the server while loading it. |
| 252 return ReadComplete(); | 366 // https://code.google.com/p/chromium/issues/detail?id=414827 |
| 253 } | 367 return; |
| 254 | 368 } |
| 255 // Leave position untouched for multiparted responce for now, when we read the | 369 |
| 256 // data we'll get it. | 370 is_multipart_ = false; |
| 257 if (!loader_->IsMultipart()) { | 371 current_chunk_size_ = 0; |
| 372 current_chunk_read_ = 0; |
| 373 |
| 374 pp::Var headers_var = loader_.GetResponseInfo().GetHeaders(); |
| 375 std::string headers; |
| 376 if (headers_var.is_string()) |
| 377 headers = headers_var.AsString(); |
| 378 |
| 379 std::string boundary = GetMultiPartBoundary(headers); |
| 380 if (!boundary.empty()) { |
| 381 // Leave position untouched for now, when we read the data we'll get it. |
| 382 is_multipart_ = true; |
| 383 multipart_boundary_ = boundary; |
| 384 } else { |
| 258 // Need to make sure that the server returned a byte-range, since it's | 385 // Need to make sure that the server returned a byte-range, since it's |
| 259 // possible for a server to just ignore our byte-range request and just | 386 // possible for a server to just ignore our byte-range request and just |
| 260 // return the entire document even if it supports byte-range requests. | 387 // return the entire document even if it supports byte-range requests. |
| 261 // i.e. sniff response to | 388 // i.e. sniff response to |
| 262 // http://www.act.org/compass/sample/pdf/geometry.pdf | 389 // http://www.act.org/compass/sample/pdf/geometry.pdf |
| 263 int start_pos = 0; | 390 current_pos_ = 0; |
| 264 int end_pos = 0; | 391 uint32_t start_pos, end_pos; |
| 265 if (loader_->GetByteRange(&start_pos, &end_pos)) { | 392 if (GetByteRange(headers, &start_pos, &end_pos)) { |
| 266 if (start_pos % DataStream::kChunkSize != 0) { | 393 current_pos_ = start_pos; |
| 267 return ReadComplete(); | 394 if (end_pos && end_pos > start_pos) |
| 395 current_chunk_size_ = end_pos - start_pos + 1; |
| 396 } else { |
| 397 partial_document_ = false; |
| 398 } |
| 399 } |
| 400 |
| 401 ReadMore(); |
| 402 } |
| 403 |
| 404 void DocumentLoader::ReadMore() { |
| 405 pp::CompletionCallback callback = |
| 406 loader_factory_.NewCallback(&DocumentLoader::DidRead); |
| 407 int rv = loader_.ReadResponseBody(buffer_, sizeof(buffer_), callback); |
| 408 if (rv != PP_OK_COMPLETIONPENDING) |
| 409 callback.Run(rv); |
| 410 } |
| 411 |
| 412 void DocumentLoader::DidRead(int32_t result) { |
| 413 if (result <= 0) { |
| 414 // If |result| == PP_OK, the document was loaded, otherwise an error was |
| 415 // encountered. Either way we want to stop processing the response. In the |
| 416 // case where an error occurred, the renderer will detect that we're missing |
| 417 // data and will display a message. |
| 418 ReadComplete(); |
| 419 return; |
| 420 } |
| 421 |
| 422 char* start = buffer_; |
| 423 size_t length = result; |
| 424 if (is_multipart_ && result > 2) { |
| 425 for (int i = 2; i < result; ++i) { |
| 426 if ((buffer_[i - 1] == '\n' && buffer_[i - 2] == '\n') || |
| 427 (i >= 4 && buffer_[i - 1] == '\n' && buffer_[i - 2] == '\r' && |
| 428 buffer_[i - 3] == '\n' && buffer_[i - 4] == '\r')) { |
| 429 uint32_t start_pos, end_pos; |
| 430 if (GetByteRange(std::string(buffer_, i), &start_pos, &end_pos)) { |
| 431 current_pos_ = start_pos; |
| 432 start += i; |
| 433 length -= i; |
| 434 if (end_pos && end_pos > start_pos) |
| 435 current_chunk_size_ = end_pos - start_pos + 1; |
| 436 } |
| 437 break; |
| 268 } | 438 } |
| 269 DCHECK(!chunk_.chunk_data); | 439 } |
| 270 chunk_.chunk_index = chunk_stream_.GetChunkIndex(start_pos); | 440 |
| 441 // Reset this flag so we don't look inside the buffer in future calls of |
| 442 // DidRead for this response. Note that this code DOES NOT handle multi- |
| 443 // part responses with more than one part (we don't issue them at the |
| 444 // moment, so they shouldn't arrive). |
| 445 is_multipart_ = false; |
| 446 } |
| 447 |
| 448 if (current_chunk_size_ && current_chunk_read_ + length > current_chunk_size_) |
| 449 length = current_chunk_size_ - current_chunk_read_; |
| 450 |
| 451 if (length) { |
| 452 if (document_size_ > 0) { |
| 453 chunk_stream_.WriteData(current_pos_, start, length); |
| 271 } else { | 454 } else { |
| 272 SetPartialLoadingEnabled(false); | 455 // If we did not get content-length in the response, we can't |
| 273 } | 456 // preallocate buffer for the entire document. Resizing array causing |
| 274 return ContinueDownload(); | 457 // memory fragmentation issues on the large files and OOM exceptions. |
| 275 } | 458 // To fix this, we collect all chunks of the file to the list and |
| 276 // Needs more data to calc chunk index. | 459 // concatenate them together after request is complete. |
| 277 return ReadMore(); | 460 std::vector<unsigned char> buf(length); |
| 278 } | 461 memcpy(buf.data(), start, length); |
| 279 | 462 chunk_buffer_.push_back(std::move(buf)); |
| 280 void DocumentLoader::ReadMore() { | 463 } |
| 281 loader_->ReadResponseBody( | 464 current_pos_ += length; |
| 282 buffer_, sizeof(buffer_), | 465 current_chunk_read_ += length; |
| 283 loader_factory_.NewCallback(&DocumentLoader::DidRead)); | 466 client_->OnNewDataAvailable(); |
| 284 } | 467 } |
| 285 | 468 |
| 286 void DocumentLoader::DidRead(int32_t result) { | 469 // Only call the renderer if we allow partial loading. |
| 287 if (result < 0) { | 470 if (!partial_document_) { |
| 288 // An error occurred. | 471 ReadMore(); |
| 289 // The renderer will detect that we're missing data and will display a | 472 return; |
| 290 // message. | 473 } |
| 291 return ReadComplete(); | 474 |
| 292 } | 475 UpdateRendering(); |
| 293 if (result == 0) { | 476 RemoveCompletedRanges(); |
| 294 loader_.reset(); | 477 |
| 295 if (!is_partial_loader_active_) | 478 if (!pending_requests_.empty()) { |
| 296 return ReadComplete(); | 479 // If there are pending requests and the current content we're downloading |
| 297 return ContinueDownload(); | 480 // doesn't satisfy any of these requests, cancel the current request to |
| 298 } | 481 // fullfill those more important requests. |
| 299 if (loader_->IsMultipart()) { | 482 bool satisfying_pending_request = |
| 300 int start_pos = 0; | 483 SatisfyingRequest(current_request_offset_, current_request_size_); |
| 301 int end_pos = 0; | 484 for (const auto& pending_request : pending_requests_) { |
| 302 if (!loader_->GetByteRange(&start_pos, &end_pos)) { | 485 if (SatisfyingRequest(pending_request.first, pending_request.second)) { |
| 303 return ReadComplete(); | 486 satisfying_pending_request = true; |
| 304 } | 487 break; |
| 305 DCHECK(!chunk_.chunk_data); | 488 } |
| 306 chunk_.chunk_index = chunk_stream_.GetChunkIndex(start_pos); | 489 } |
| 307 } | 490 // Cancel the request as it's not satisfying any request from the |
| 308 if (!SaveChunkData(buffer_, result)) { | 491 // renderer, unless the current request is finished in which case we let |
| 309 return ReadMore(); | 492 // it finish cleanly. |
| 310 } | 493 if (!satisfying_pending_request && |
| 311 if (IsDocumentComplete()) { | 494 current_pos_ < |
| 312 return ReadComplete(); | 495 current_request_offset_ + current_request_extended_size_) { |
| 313 } | 496 loader_.Close(); |
| 314 return ContinueDownload(); | 497 } |
| 315 } | 498 } |
| 316 | 499 |
| 317 bool DocumentLoader::SaveChunkData(char* input, uint32_t input_size) { | 500 ReadMore(); |
| 318 count_of_bytes_received_ += input_size; | 501 } |
| 319 bool chunk_saved = false; | 502 |
| 320 bool loading_pending_request = pending_requests_.Contains(chunk_.chunk_index); | 503 bool DocumentLoader::SatisfyingRequest(size_t offset, size_t size) const { |
| 321 while (input_size > 0) { | 504 return offset <= current_pos_ + kDefaultRequestSize && |
| 322 if (chunk_.data_size == 0) { | 505 current_pos_ < offset + size; |
| 323 chunk_.chunk_data = base::MakeUnique<DataStream::ChunkData>(); | |
| 324 } | |
| 325 const uint32_t new_chunk_data_len = | |
| 326 std::min(DataStream::kChunkSize - chunk_.data_size, input_size); | |
| 327 memcpy(chunk_.chunk_data->data() + chunk_.data_size, input, | |
| 328 new_chunk_data_len); | |
| 329 chunk_.data_size += new_chunk_data_len; | |
| 330 if (chunk_.data_size == DataStream::kChunkSize || | |
| 331 chunk_stream_.eof_pos() == | |
| 332 chunk_.chunk_index * DataStream::kChunkSize + chunk_.data_size) { | |
| 333 chunk_stream_.SetChunkData(chunk_.chunk_index, | |
| 334 std::move(chunk_.chunk_data)); | |
| 335 pending_requests_.Subtract( | |
| 336 gfx::Range(chunk_.chunk_index, chunk_.chunk_index + 1)); | |
| 337 chunk_.data_size = 0; | |
| 338 ++(chunk_.chunk_index); | |
| 339 chunk_saved = true; | |
| 340 } | |
| 341 | |
| 342 input += new_chunk_data_len; | |
| 343 input_size -= new_chunk_data_len; | |
| 344 } | |
| 345 | |
| 346 if (IsDocumentComplete()) | |
| 347 return true; | |
| 348 | |
| 349 if (!chunk_saved) | |
| 350 return false; | |
| 351 | |
| 352 if (loading_pending_request && | |
| 353 !pending_requests_.Contains(chunk_.chunk_index)) { | |
| 354 client_->OnPendingRequestComplete(); | |
| 355 } | |
| 356 client_->OnNewDataAvailable(); | |
| 357 return true; | |
| 358 } | 506 } |
| 359 | 507 |
| 360 void DocumentLoader::ReadComplete() { | 508 void DocumentLoader::ReadComplete() { |
| 361 if (!GetDocumentSize()) { | 509 if (!partial_document_) { |
| 362 uint32_t eof = | 510 if (document_size_ == 0) { |
| 363 chunk_.chunk_index * DataStream::kChunkSize + chunk_.data_size; | 511 // For the document with no 'content-length" specified we've collected all |
| 364 if (!chunk_stream_.filled_chunks().IsEmpty()) { | 512 // the chunks already. Let's allocate final document buffer and copy them |
| 365 eof = std::max( | 513 // over. |
| 366 chunk_stream_.filled_chunks().Last().end() * DataStream::kChunkSize, | 514 chunk_stream_.Preallocate(current_pos_); |
| 367 eof); | 515 uint32_t pos = 0; |
| 368 } | 516 for (auto& chunk : chunk_buffer_) { |
| 369 chunk_stream_.set_eof_pos(eof); | 517 chunk_stream_.WriteData(pos, chunk.data(), chunk.size()); |
| 370 if (eof == chunk_.chunk_index * DataStream::kChunkSize + chunk_.data_size) { | 518 pos += chunk.size(); |
| 371 chunk_stream_.SetChunkData(chunk_.chunk_index, | 519 } |
| 372 std::move(chunk_.chunk_data)); | 520 chunk_buffer_.clear(); |
| 373 } | 521 } |
| 374 } | 522 document_size_ = current_pos_; |
| 375 loader_.reset(); | 523 client_->OnDocumentComplete(); |
| 524 return; |
| 525 } |
| 526 |
| 527 request_pending_ = false; |
| 528 |
| 376 if (IsDocumentComplete()) { | 529 if (IsDocumentComplete()) { |
| 377 client_->OnDocumentComplete(); | 530 client_->OnDocumentComplete(); |
| 378 } else { | 531 return; |
| 379 client_->OnDocumentCanceled(); | 532 } |
| 380 } | 533 |
| 381 } | 534 UpdateRendering(); |
| 382 | 535 DownloadPendingRequests(); |
| 383 float DocumentLoader::GetProgress() const { | 536 } |
| 384 if (!GetDocumentSize()) | 537 |
| 385 return -1; | 538 void DocumentLoader::UpdateRendering() { |
| 386 if (IsDocumentComplete()) | 539 if (header_request_) |
| 387 return 1; | 540 client_->OnPartialDocumentLoaded(); |
| 388 return static_cast<float>(chunk_stream_.filled_chunks_count()) / | 541 else |
| 389 chunk_stream_.total_chunks_count(); | 542 client_->OnPendingRequestComplete(); |
| 543 header_request_ = false; |
| 390 } | 544 } |
| 391 | 545 |
| 392 } // namespace chrome_pdf | 546 } // namespace chrome_pdf |
| OLD | NEW |