OLD | NEW |
1 // Copyright (c) 2010 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2010 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "pdf/document_loader.h" | 5 #include "pdf/document_loader.h" |
6 | 6 |
7 #include <stddef.h> | 7 #include <stddef.h> |
8 #include <stdint.h> | 8 #include <stdint.h> |
9 | 9 |
10 #include <algorithm> | |
11 | |
12 #include "base/logging.h" | 10 #include "base/logging.h" |
13 #include "base/memory/ptr_util.h" | |
14 #include "base/numerics/safe_math.h" | |
15 #include "base/strings/string_util.h" | 11 #include "base/strings/string_util.h" |
16 #include "pdf/url_loader_wrapper.h" | 12 #include "net/http/http_util.h" |
17 #include "ppapi/c/pp_errors.h" | 13 #include "ppapi/c/pp_errors.h" |
18 #include "ui/gfx/range/range.h" | 14 #include "ppapi/cpp/url_loader.h" |
| 15 #include "ppapi/cpp/url_request_info.h" |
| 16 #include "ppapi/cpp/url_response_info.h" |
19 | 17 |
20 namespace chrome_pdf { | 18 namespace chrome_pdf { |
21 | 19 |
22 namespace { | 20 namespace { |
23 | 21 |
24 // The distance from last received chunk, when we wait requesting data, using | 22 // If the headers have a byte-range response, writes the start and end |
25 // current connection (like playing a cassette tape) and do not send new range | 23 // positions and returns true if at least the start position was parsed. |
26 // request (like rewind a cassette tape, and continue playing after). | 24 // The end position will be set to 0 if it was not found or parsed from the |
27 // Experimentally chosen value. | 25 // response. |
28 const int kChunkCloseDistance = 10; | 26 // Returns false if not even a start position could be parsed. |
| 27 bool GetByteRange(const std::string& headers, uint32_t* start, uint32_t* end) { |
| 28 net::HttpUtil::HeadersIterator it(headers.begin(), headers.end(), "\n"); |
| 29 while (it.GetNext()) { |
| 30 if (base::LowerCaseEqualsASCII(it.name(), "content-range")) { |
| 31 std::string range = it.values().c_str(); |
| 32 if (base::StartsWith(range, "bytes", |
| 33 base::CompareCase::INSENSITIVE_ASCII)) { |
| 34 range = range.substr(strlen("bytes")); |
| 35 std::string::size_type pos = range.find('-'); |
| 36 std::string range_end; |
| 37 if (pos != std::string::npos) |
| 38 range_end = range.substr(pos + 1); |
| 39 base::TrimWhitespaceASCII(range, base::TRIM_LEADING, &range); |
| 40 base::TrimWhitespaceASCII(range_end, base::TRIM_LEADING, &range_end); |
| 41 *start = atoi(range.c_str()); |
| 42 *end = atoi(range_end.c_str()); |
| 43 return true; |
| 44 } |
| 45 } |
| 46 } |
| 47 return false; |
| 48 } |
| 49 |
| 50 // If the headers have a multi-part response, returns the boundary name. |
| 51 // Otherwise returns an empty string. |
| 52 std::string GetMultiPartBoundary(const std::string& headers) { |
| 53 net::HttpUtil::HeadersIterator it(headers.begin(), headers.end(), "\n"); |
| 54 while (it.GetNext()) { |
| 55 if (base::LowerCaseEqualsASCII(it.name(), "content-type")) { |
| 56 std::string type = base::ToLowerASCII(it.values()); |
| 57 if (base::StartsWith(type, "multipart/", base::CompareCase::SENSITIVE)) { |
| 58 const char* boundary = strstr(type.c_str(), "boundary="); |
| 59 if (!boundary) { |
| 60 NOTREACHED(); |
| 61 break; |
| 62 } |
| 63 |
| 64 return std::string(boundary + 9); |
| 65 } |
| 66 } |
| 67 } |
| 68 return std::string(); |
| 69 } |
29 | 70 |
30 // Return true if the HTTP response of |loader| is a successful one and loading | 71 // Return true if the HTTP response of |loader| is a successful one and loading |
31 // should continue. 4xx error indicate subsequent requests will fail too. | 72 // should continue. 4xx error indicate subsequent requests will fail too. |
32 // e.g. resource has been removed from the server while loading it. 301 | 73 // e.g. resource has been removed from the server while loading it. 301 |
33 // indicates a redirect was returned which won't be successful because we | 74 // indicates a redirect was returned which won't be successful because we |
34 // disable following redirects for PDF loading (we assume they are already | 75 // disable following redirects for PDF loading (we assume they are already |
35 // resolved by the browser. | 76 // resolved by the browser. |
36 bool ResponseStatusSuccess(const URLLoaderWrapper* loader) { | 77 bool ResponseStatusSuccess(const pp::URLLoader& loader) { |
37 int32_t http_code = loader->GetStatusCode(); | 78 int32_t http_code = loader.GetResponseInfo().GetStatusCode(); |
38 return (http_code < 400 && http_code != 301) || http_code >= 500; | 79 return (http_code < 400 && http_code != 301) || http_code >= 500; |
39 } | 80 } |
40 | 81 |
41 bool IsValidContentType(const std::string& type) { | 82 bool IsValidContentType(const std::string& type) { |
42 return (base::EndsWith(type, "/pdf", base::CompareCase::INSENSITIVE_ASCII) || | 83 return (base::EndsWith(type, "/pdf", base::CompareCase::INSENSITIVE_ASCII) || |
43 base::EndsWith(type, ".pdf", base::CompareCase::INSENSITIVE_ASCII) || | 84 base::EndsWith(type, ".pdf", base::CompareCase::INSENSITIVE_ASCII) || |
44 base::EndsWith(type, "/x-pdf", | 85 base::EndsWith(type, "/x-pdf", |
45 base::CompareCase::INSENSITIVE_ASCII) || | 86 base::CompareCase::INSENSITIVE_ASCII) || |
46 base::EndsWith(type, "/*", base::CompareCase::INSENSITIVE_ASCII) || | 87 base::EndsWith(type, "/*", base::CompareCase::INSENSITIVE_ASCII) || |
47 base::EndsWith(type, "/acrobat", | 88 base::EndsWith(type, "/acrobat", |
48 base::CompareCase::INSENSITIVE_ASCII) || | 89 base::CompareCase::INSENSITIVE_ASCII) || |
49 base::EndsWith(type, "/unknown", | 90 base::EndsWith(type, "/unknown", |
50 base::CompareCase::INSENSITIVE_ASCII)); | 91 base::CompareCase::INSENSITIVE_ASCII)); |
51 } | 92 } |
52 | 93 |
53 } // namespace | 94 } // namespace |
54 | 95 |
55 DocumentLoader::Client::~Client() { | 96 DocumentLoader::Client::~Client() { |
56 } | 97 } |
57 | 98 |
58 DocumentLoader::Chunk::Chunk() {} | 99 DocumentLoader::DocumentLoader(Client* client) |
59 | 100 : client_(client), partial_document_(false), request_pending_(false), |
60 DocumentLoader::Chunk::~Chunk() {} | 101 current_pos_(0), current_chunk_size_(0), current_chunk_read_(0), |
61 | 102 document_size_(0), header_request_(true), is_multipart_(false) { |
62 void DocumentLoader::Chunk::Clear() { | 103 loader_factory_.Initialize(this); |
63 chunk_index = 0; | |
64 data_size = 0; | |
65 chunk_data.reset(); | |
66 } | 104 } |
67 | 105 |
68 DocumentLoader::DocumentLoader(Client* client) | |
69 : client_(client), loader_factory_(this) {} | |
70 | |
71 DocumentLoader::~DocumentLoader() { | 106 DocumentLoader::~DocumentLoader() { |
72 } | 107 } |
73 | 108 |
74 bool DocumentLoader::Init(std::unique_ptr<URLLoaderWrapper> loader, | 109 bool DocumentLoader::Init(const pp::URLLoader& loader, |
75 const std::string& url) { | 110 const std::string& url, |
| 111 const std::string& headers) { |
76 DCHECK(url_.empty()); | 112 DCHECK(url_.empty()); |
77 DCHECK(!loader_); | |
78 | 113 |
79 // Check that the initial response status is a valid one. | 114 // Check that the initial response status is a valid one. |
80 if (!ResponseStatusSuccess(loader.get())) | 115 if (!ResponseStatusSuccess(loader)) |
81 return false; | 116 return false; |
82 | 117 |
83 std::string type = loader->GetContentType(); | 118 url_ = url; |
| 119 loader_ = loader; |
| 120 |
| 121 std::string response_headers; |
| 122 if (!headers.empty()) { |
| 123 response_headers = headers; |
| 124 } else { |
| 125 pp::URLResponseInfo response = loader_.GetResponseInfo(); |
| 126 pp::Var headers_var = response.GetHeaders(); |
| 127 |
| 128 if (headers_var.is_string()) { |
| 129 response_headers = headers_var.AsString(); |
| 130 } |
| 131 } |
| 132 |
| 133 bool accept_ranges_bytes = false; |
| 134 bool content_encoded = false; |
| 135 uint32_t content_length = 0; |
| 136 std::string type; |
| 137 std::string disposition; |
84 | 138 |
85 // This happens for PDFs not loaded from http(s) sources. | 139 // This happens for PDFs not loaded from http(s) sources. |
86 if (type == "text/plain") { | 140 if (response_headers == "Content-Type: text/plain") { |
87 if (!base::StartsWith(url, "http://", | 141 if (!base::StartsWith(url, "http://", |
88 base::CompareCase::INSENSITIVE_ASCII) && | 142 base::CompareCase::INSENSITIVE_ASCII) && |
89 !base::StartsWith(url, "https://", | 143 !base::StartsWith(url, "https://", |
90 base::CompareCase::INSENSITIVE_ASCII)) { | 144 base::CompareCase::INSENSITIVE_ASCII)) { |
91 type = "application/pdf"; | 145 type = "application/pdf"; |
92 } | 146 } |
93 } | 147 } |
| 148 if (type.empty() && !response_headers.empty()) { |
| 149 net::HttpUtil::HeadersIterator it(response_headers.begin(), |
| 150 response_headers.end(), "\n"); |
| 151 while (it.GetNext()) { |
| 152 if (base::LowerCaseEqualsASCII(it.name(), "content-length")) { |
| 153 content_length = atoi(it.values().c_str()); |
| 154 } else if (base::LowerCaseEqualsASCII(it.name(), "accept-ranges")) { |
| 155 accept_ranges_bytes = base::LowerCaseEqualsASCII(it.values(), "bytes"); |
| 156 } else if (base::LowerCaseEqualsASCII(it.name(), "content-encoding")) { |
| 157 content_encoded = true; |
| 158 } else if (base::LowerCaseEqualsASCII(it.name(), "content-type")) { |
| 159 type = it.values(); |
| 160 size_t semi_colon_pos = type.find(';'); |
| 161 if (semi_colon_pos != std::string::npos) { |
| 162 type = type.substr(0, semi_colon_pos); |
| 163 } |
| 164 TrimWhitespaceASCII(type, base::TRIM_ALL, &type); |
| 165 } else if (base::LowerCaseEqualsASCII(it.name(), "content-disposition")) { |
| 166 disposition = it.values(); |
| 167 } |
| 168 } |
| 169 } |
94 if (!type.empty() && !IsValidContentType(type)) | 170 if (!type.empty() && !IsValidContentType(type)) |
95 return false; | 171 return false; |
96 | 172 if (base::StartsWith(disposition, "attachment", |
97 if (base::StartsWith(loader->GetContentDisposition(), "attachment", | |
98 base::CompareCase::INSENSITIVE_ASCII)) | 173 base::CompareCase::INSENSITIVE_ASCII)) |
99 return false; | 174 return false; |
100 | 175 |
101 url_ = url; | 176 if (content_length > 0) |
102 loader_ = std::move(loader); | 177 chunk_stream_.Preallocate(content_length); |
103 | 178 |
104 if (!loader_->IsContentEncoded()) { | 179 document_size_ = content_length; |
105 chunk_stream_.set_eof_pos(std::max(0, loader_->GetContentLength())); | 180 requests_count_ = 0; |
| 181 |
| 182 // Enable partial loading only if file size is above the threshold. |
| 183 // It will allow avoiding latency for multiple requests. |
| 184 if (content_length > kMinFileSize && |
| 185 accept_ranges_bytes && |
| 186 !content_encoded) { |
| 187 LoadPartialDocument(); |
| 188 } else { |
| 189 LoadFullDocument(); |
106 } | 190 } |
107 int64_t bytes_received = 0; | |
108 int64_t total_bytes_to_be_received = 0; | |
109 if (!chunk_stream_.eof_pos() && | |
110 loader_->GetDownloadProgress(&bytes_received, | |
111 &total_bytes_to_be_received)) { | |
112 chunk_stream_.set_eof_pos( | |
113 std::max(0, static_cast<int>(total_bytes_to_be_received))); | |
114 } | |
115 | |
116 SetPartialLoadingEnabled( | |
117 partial_loading_enabled_ && | |
118 !base::StartsWith(url, "file://", base::CompareCase::INSENSITIVE_ASCII) && | |
119 loader_->IsAcceptRangesBytes() && !loader_->IsContentEncoded() && | |
120 GetDocumentSize()); | |
121 | |
122 ReadMore(); | |
123 return true; | 191 return true; |
124 } | 192 } |
125 | 193 |
126 bool DocumentLoader::IsDocumentComplete() const { | 194 void DocumentLoader::LoadPartialDocument() { |
127 return chunk_stream_.IsComplete(); | 195 // The current request is a full request (not a range request) so it starts at |
| 196 // 0 and ends at |document_size_|. |
| 197 current_chunk_size_ = document_size_; |
| 198 current_pos_ = 0; |
| 199 current_request_offset_ = 0; |
| 200 current_request_size_ = 0; |
| 201 current_request_extended_size_ = document_size_; |
| 202 request_pending_ = true; |
| 203 |
| 204 partial_document_ = true; |
| 205 header_request_ = true; |
| 206 ReadMore(); |
128 } | 207 } |
129 | 208 |
130 uint32_t DocumentLoader::GetDocumentSize() const { | 209 void DocumentLoader::LoadFullDocument() { |
131 return chunk_stream_.eof_pos(); | 210 partial_document_ = false; |
| 211 chunk_buffer_.clear(); |
| 212 ReadMore(); |
| 213 } |
| 214 |
| 215 bool DocumentLoader::IsDocumentComplete() const { |
| 216 if (document_size_ == 0) // Document size unknown. |
| 217 return false; |
| 218 return IsDataAvailable(0, document_size_); |
| 219 } |
| 220 |
| 221 uint32_t DocumentLoader::GetAvailableData() const { |
| 222 if (document_size_ == 0) { // If document size is unknown. |
| 223 return current_pos_; |
| 224 } |
| 225 |
| 226 std::vector<std::pair<size_t, size_t> > ranges; |
| 227 chunk_stream_.GetMissedRanges(0, document_size_, &ranges); |
| 228 uint32_t available = document_size_; |
| 229 for (const auto& range : ranges) |
| 230 available -= range.second; |
| 231 return available; |
132 } | 232 } |
133 | 233 |
134 void DocumentLoader::ClearPendingRequests() { | 234 void DocumentLoader::ClearPendingRequests() { |
135 pending_requests_.Clear(); | 235 pending_requests_.erase(pending_requests_.begin(), |
| 236 pending_requests_.end()); |
136 } | 237 } |
137 | 238 |
138 bool DocumentLoader::GetBlock(uint32_t position, | 239 bool DocumentLoader::GetBlock(uint32_t position, |
139 uint32_t size, | 240 uint32_t size, |
140 void* buf) const { | 241 void* buf) const { |
141 base::CheckedNumeric<uint32_t> addition_result = position; | 242 return chunk_stream_.ReadData(position, size, buf); |
142 addition_result += size; | |
143 if (!addition_result.IsValid()) | |
144 return false; | |
145 return chunk_stream_.ReadData( | |
146 gfx::Range(position, addition_result.ValueOrDie()), buf); | |
147 } | 243 } |
148 | 244 |
149 bool DocumentLoader::IsDataAvailable(uint32_t position, uint32_t size) const { | 245 bool DocumentLoader::IsDataAvailable(uint32_t position, uint32_t size) const { |
150 base::CheckedNumeric<uint32_t> addition_result = position; | 246 return chunk_stream_.IsRangeAvailable(position, size); |
151 addition_result += size; | |
152 if (!addition_result.IsValid()) | |
153 return false; | |
154 return chunk_stream_.IsRangeAvailable( | |
155 gfx::Range(position, addition_result.ValueOrDie())); | |
156 } | 247 } |
157 | 248 |
158 void DocumentLoader::RequestData(uint32_t position, uint32_t size) { | 249 void DocumentLoader::RequestData(uint32_t position, uint32_t size) { |
159 if (!size || IsDataAvailable(position, size)) { | 250 DCHECK(partial_document_); |
160 return; | |
161 } | |
162 { | |
163 // Check integer overflow. | |
164 base::CheckedNumeric<uint32_t> addition_result = position; | |
165 addition_result += size; | |
166 if (!addition_result.IsValid()) | |
167 return; | |
168 } | |
169 | |
170 if (GetDocumentSize() && (position + size > GetDocumentSize())) { | |
171 return; | |
172 } | |
173 | 251 |
174 // We have some artefact request from | 252 // We have some artefact request from |
175 // PDFiumEngine::OnDocumentComplete() -> FPDFAvail_IsPageAvail after | 253 // PDFiumEngine::OnDocumentComplete() -> FPDFAvail_IsPageAvail after |
176 // document is complete. | 254 // document is complete. |
177 // We need this fix in PDFIum. Adding this as a work around. | 255 // We need this fix in PDFIum. Adding this as a work around. |
178 // Bug: http://code.google.com/p/chromium/issues/detail?id=79996 | 256 // Bug: http://code.google.com/p/chromium/issues/detail?id=79996 |
179 // Test url: | 257 // Test url: |
180 // http://www.icann.org/en/correspondence/holtzman-to-jeffrey-02mar11-en.pdf | 258 // http://www.icann.org/en/correspondence/holtzman-to-jeffrey-02mar11-en.pdf |
181 if (!loader_) | 259 if (IsDocumentComplete()) |
182 return; | 260 return; |
183 | 261 |
184 RangeSet requested_chunks(chunk_stream_.GetChunksRange(position, size)); | 262 pending_requests_.push_back(std::pair<size_t, size_t>(position, size)); |
185 requested_chunks.Subtract(chunk_stream_.filled_chunks()); | 263 DownloadPendingRequests(); |
186 if (requested_chunks.IsEmpty()) { | 264 } |
| 265 |
| 266 void DocumentLoader::RemoveCompletedRanges() { |
| 267 // Split every request that has been partially downloaded already into smaller |
| 268 // requests. |
| 269 std::vector<std::pair<size_t, size_t> > ranges; |
| 270 auto it = pending_requests_.begin(); |
| 271 while (it != pending_requests_.end()) { |
| 272 chunk_stream_.GetMissedRanges(it->first, it->second, &ranges); |
| 273 pending_requests_.insert(it, ranges.begin(), ranges.end()); |
| 274 ranges.clear(); |
| 275 pending_requests_.erase(it++); |
| 276 } |
| 277 } |
| 278 |
| 279 void DocumentLoader::DownloadPendingRequests() { |
| 280 if (request_pending_) |
| 281 return; |
| 282 |
| 283 uint32_t pos; |
| 284 uint32_t size; |
| 285 if (pending_requests_.empty()) { |
| 286 // If the document is not complete and we have no outstanding requests, |
| 287 // download what's left for as long as no other request gets added to |
| 288 // |pending_requests_|. |
| 289 pos = chunk_stream_.GetFirstMissingByte(); |
| 290 if (pos >= document_size_) { |
| 291 // We're done downloading the document. |
| 292 return; |
| 293 } |
| 294 // Start with size 0, we'll set |current_request_extended_size_| to > 0. |
| 295 // This way this request will get cancelled as soon as the renderer wants |
| 296 // another portion of the document. |
| 297 size = 0; |
| 298 } else { |
| 299 RemoveCompletedRanges(); |
| 300 |
| 301 pos = pending_requests_.front().first; |
| 302 size = pending_requests_.front().second; |
| 303 if (IsDataAvailable(pos, size)) { |
| 304 ReadComplete(); |
| 305 return; |
| 306 } |
| 307 } |
| 308 |
| 309 size_t last_byte_before = chunk_stream_.GetFirstMissingByteInInterval(pos); |
| 310 if (size < kDefaultRequestSize) { |
| 311 // Try to extend before pos, up to size |kDefaultRequestSize|. |
| 312 if (pos + size - last_byte_before > kDefaultRequestSize) { |
| 313 pos += size - kDefaultRequestSize; |
| 314 size = kDefaultRequestSize; |
| 315 } else { |
| 316 size += pos - last_byte_before; |
| 317 pos = last_byte_before; |
| 318 } |
| 319 } |
| 320 if (pos - last_byte_before < kDefaultRequestSize) { |
| 321 // Don't leave a gap smaller than |kDefaultRequestSize|. |
| 322 size += pos - last_byte_before; |
| 323 pos = last_byte_before; |
| 324 } |
| 325 |
| 326 current_request_offset_ = pos; |
| 327 current_request_size_ = size; |
| 328 |
| 329 // Extend the request until the next downloaded byte or the end of the |
| 330 // document. |
| 331 size_t last_missing_byte = |
| 332 chunk_stream_.GetLastMissingByteInInterval(pos + size - 1); |
| 333 current_request_extended_size_ = last_missing_byte - pos + 1; |
| 334 |
| 335 request_pending_ = true; |
| 336 |
| 337 // Start downloading first pending request. |
| 338 loader_.Close(); |
| 339 loader_ = client_->CreateURLLoader(); |
| 340 pp::CompletionCallback callback = |
| 341 loader_factory_.NewCallback(&DocumentLoader::DidOpen); |
| 342 pp::URLRequestInfo request = GetRequest(pos, current_request_extended_size_); |
| 343 requests_count_++; |
| 344 int rv = loader_.Open(request, callback); |
| 345 if (rv != PP_OK_COMPLETIONPENDING) |
| 346 callback.Run(rv); |
| 347 } |
| 348 |
| 349 pp::URLRequestInfo DocumentLoader::GetRequest(uint32_t position, |
| 350 uint32_t size) const { |
| 351 pp::URLRequestInfo request(client_->GetPluginInstance()); |
| 352 request.SetURL(url_); |
| 353 request.SetMethod("GET"); |
| 354 request.SetFollowRedirects(false); |
| 355 request.SetCustomReferrerURL(url_); |
| 356 |
| 357 const size_t kBufSize = 100; |
| 358 char buf[kBufSize]; |
| 359 // According to rfc2616, byte range specifies position of the first and last |
| 360 // bytes in the requested range inclusively. Therefore we should subtract 1 |
| 361 // from the position + size, to get index of the last byte that needs to be |
| 362 // downloaded. |
| 363 base::snprintf(buf, kBufSize, "Range: bytes=%d-%d", position, |
| 364 position + size - 1); |
| 365 pp::Var header(buf); |
| 366 request.SetHeaders(header); |
| 367 |
| 368 return request; |
| 369 } |
| 370 |
| 371 void DocumentLoader::DidOpen(int32_t result) { |
| 372 if (result != PP_OK) { |
187 NOTREACHED(); | 373 NOTREACHED(); |
188 return; | 374 client_->OnDocumentFailed(); |
189 } | 375 return; |
190 pending_requests_.Union(requested_chunks); | 376 } |
191 } | 377 |
192 | 378 if (!ResponseStatusSuccess(loader_)) { |
193 void DocumentLoader::SetPartialLoadingEnabled(bool enabled) { | 379 client_->OnDocumentFailed(); |
194 partial_loading_enabled_ = enabled; | 380 return; |
195 if (!enabled) { | 381 } |
196 is_partial_loader_active_ = false; | 382 |
197 } | 383 is_multipart_ = false; |
198 } | 384 current_chunk_size_ = 0; |
199 | 385 current_chunk_read_ = 0; |
200 bool DocumentLoader::ShouldCancelLoading() const { | 386 |
201 if (!loader_) | 387 pp::Var headers_var = loader_.GetResponseInfo().GetHeaders(); |
202 return true; | 388 std::string headers; |
203 if (!partial_loading_enabled_ || pending_requests_.IsEmpty()) | 389 if (headers_var.is_string()) |
204 return false; | 390 headers = headers_var.AsString(); |
205 const gfx::Range current_range(chunk_.chunk_index, | 391 |
206 chunk_.chunk_index + kChunkCloseDistance); | 392 std::string boundary = GetMultiPartBoundary(headers); |
207 return !pending_requests_.Intersects(current_range); | 393 if (!boundary.empty()) { |
208 } | 394 // Leave position untouched for now, when we read the data we'll get it. |
209 | 395 is_multipart_ = true; |
210 void DocumentLoader::ContinueDownload() { | 396 multipart_boundary_ = boundary; |
211 if (!ShouldCancelLoading()) | 397 } else { |
212 return ReadMore(); | |
213 DCHECK(partial_loading_enabled_); | |
214 DCHECK(!IsDocumentComplete()); | |
215 DCHECK(GetDocumentSize()); | |
216 | |
217 const uint32_t range_start = | |
218 pending_requests_.IsEmpty() ? 0 : pending_requests_.First().start(); | |
219 RangeSet candidates_for_request( | |
220 gfx::Range(range_start, chunk_stream_.total_chunks_count())); | |
221 candidates_for_request.Subtract(chunk_stream_.filled_chunks()); | |
222 DCHECK(!candidates_for_request.IsEmpty()); | |
223 gfx::Range next_request = candidates_for_request.First(); | |
224 if (candidates_for_request.Size() == 1 && | |
225 next_request.length() < kChunkCloseDistance) { | |
226 // We have only request at the end, try to enlarge it to improve back order | |
227 // reading. | |
228 const int additional_chunks_count = | |
229 kChunkCloseDistance - next_request.length(); | |
230 int new_start = std::max( | |
231 0, static_cast<int>(next_request.start()) - additional_chunks_count); | |
232 candidates_for_request = | |
233 RangeSet(gfx::Range(new_start, next_request.end())); | |
234 candidates_for_request.Subtract(chunk_stream_.filled_chunks()); | |
235 next_request = candidates_for_request.Last(); | |
236 } | |
237 | |
238 loader_.reset(); | |
239 chunk_.Clear(); | |
240 if (!is_partial_loader_active_) { | |
241 client_->CancelBrowserDownload(); | |
242 is_partial_loader_active_ = true; | |
243 } | |
244 | |
245 const uint32_t start = next_request.start() * DataStream::kChunkSize; | |
246 const uint32_t length = | |
247 std::min(chunk_stream_.eof_pos() - start, | |
248 next_request.length() * DataStream::kChunkSize); | |
249 | |
250 loader_ = client_->CreateURLLoader(); | |
251 | |
252 loader_->OpenRange( | |
253 url_, url_, start, length, | |
254 loader_factory_.NewCallback(&DocumentLoader::DidOpenPartial)); | |
255 } | |
256 | |
257 void DocumentLoader::DidOpenPartial(int32_t result) { | |
258 if (result != PP_OK) { | |
259 return ReadComplete(); | |
260 } | |
261 | |
262 if (!ResponseStatusSuccess(loader_.get())) | |
263 return ReadComplete(); | |
264 | |
265 // Leave position untouched for multiparted responce for now, when we read the | |
266 // data we'll get it. | |
267 if (!loader_->IsMultipart()) { | |
268 // Need to make sure that the server returned a byte-range, since it's | 398 // Need to make sure that the server returned a byte-range, since it's |
269 // possible for a server to just ignore our byte-range request and just | 399 // possible for a server to just ignore our byte-range request and just |
270 // return the entire document even if it supports byte-range requests. | 400 // return the entire document even if it supports byte-range requests. |
271 // i.e. sniff response to | 401 // i.e. sniff response to |
272 // http://www.act.org/compass/sample/pdf/geometry.pdf | 402 // http://www.act.org/compass/sample/pdf/geometry.pdf |
273 int start_pos = 0; | 403 current_pos_ = 0; |
274 int end_pos = 0; | 404 uint32_t start_pos, end_pos; |
275 if (loader_->GetByteRange(&start_pos, &end_pos)) { | 405 if (GetByteRange(headers, &start_pos, &end_pos)) { |
276 if (start_pos % DataStream::kChunkSize != 0) { | 406 current_pos_ = start_pos; |
277 return ReadComplete(); | 407 if (end_pos && end_pos > start_pos) |
| 408 current_chunk_size_ = end_pos - start_pos + 1; |
| 409 } else { |
| 410 partial_document_ = false; |
| 411 } |
| 412 } |
| 413 |
| 414 ReadMore(); |
| 415 } |
| 416 |
| 417 void DocumentLoader::ReadMore() { |
| 418 pp::CompletionCallback callback = |
| 419 loader_factory_.NewCallback(&DocumentLoader::DidRead); |
| 420 int rv = loader_.ReadResponseBody(buffer_, sizeof(buffer_), callback); |
| 421 if (rv != PP_OK_COMPLETIONPENDING) |
| 422 callback.Run(rv); |
| 423 } |
| 424 |
| 425 void DocumentLoader::DidRead(int32_t result) { |
| 426 if (result <= 0) { |
| 427 // If |result| == PP_OK, the document was loaded, otherwise an error was |
| 428 // encountered. Either way we want to stop processing the response. In the |
| 429 // case where an error occurred, the renderer will detect that we're missing |
| 430 // data and will display a message. |
| 431 ReadComplete(); |
| 432 return; |
| 433 } |
| 434 |
| 435 char* start = buffer_; |
| 436 size_t length = result; |
| 437 if (is_multipart_ && result > 2) { |
| 438 for (int i = 2; i < result; ++i) { |
| 439 if ((buffer_[i - 1] == '\n' && buffer_[i - 2] == '\n') || |
| 440 (i >= 4 && buffer_[i - 1] == '\n' && buffer_[i - 2] == '\r' && |
| 441 buffer_[i - 3] == '\n' && buffer_[i - 4] == '\r')) { |
| 442 uint32_t start_pos, end_pos; |
| 443 if (GetByteRange(std::string(buffer_, i), &start_pos, &end_pos)) { |
| 444 current_pos_ = start_pos; |
| 445 start += i; |
| 446 length -= i; |
| 447 if (end_pos && end_pos > start_pos) |
| 448 current_chunk_size_ = end_pos - start_pos + 1; |
| 449 } |
| 450 break; |
278 } | 451 } |
279 DCHECK(!chunk_.chunk_data); | 452 } |
280 chunk_.chunk_index = chunk_stream_.GetChunkIndex(start_pos); | 453 |
| 454 // Reset this flag so we don't look inside the buffer in future calls of |
| 455 // DidRead for this response. Note that this code DOES NOT handle multi- |
| 456 // part responses with more than one part (we don't issue them at the |
| 457 // moment, so they shouldn't arrive). |
| 458 is_multipart_ = false; |
| 459 } |
| 460 |
| 461 if (current_chunk_size_ && current_chunk_read_ + length > current_chunk_size_) |
| 462 length = current_chunk_size_ - current_chunk_read_; |
| 463 |
| 464 if (length) { |
| 465 if (document_size_ > 0) { |
| 466 chunk_stream_.WriteData(current_pos_, start, length); |
281 } else { | 467 } else { |
282 SetPartialLoadingEnabled(false); | 468 // If we did not get content-length in the response, we can't |
283 } | 469 // preallocate buffer for the entire document. Resizing array causing |
284 return ContinueDownload(); | 470 // memory fragmentation issues on the large files and OOM exceptions. |
285 } | 471 // To fix this, we collect all chunks of the file to the list and |
286 // Needs more data to calc chunk index. | 472 // concatenate them together after request is complete. |
287 return ReadMore(); | 473 std::vector<unsigned char> buf(length); |
288 } | 474 memcpy(buf.data(), start, length); |
289 | 475 chunk_buffer_.push_back(std::move(buf)); |
290 void DocumentLoader::ReadMore() { | 476 } |
291 loader_->ReadResponseBody( | 477 current_pos_ += length; |
292 buffer_, sizeof(buffer_), | 478 current_chunk_read_ += length; |
293 loader_factory_.NewCallback(&DocumentLoader::DidRead)); | 479 client_->OnNewDataAvailable(); |
294 } | 480 } |
295 | 481 |
296 void DocumentLoader::DidRead(int32_t result) { | 482 // Only call the renderer if we allow partial loading. |
297 if (result < 0) { | 483 if (!partial_document_) { |
298 // An error occurred. | 484 ReadMore(); |
299 // The renderer will detect that we're missing data and will display a | 485 return; |
300 // message. | 486 } |
301 return ReadComplete(); | 487 |
302 } | 488 UpdateRendering(); |
303 if (result == 0) { | 489 RemoveCompletedRanges(); |
304 loader_.reset(); | 490 |
305 if (!is_partial_loader_active_) | 491 if (!pending_requests_.empty()) { |
306 return ReadComplete(); | 492 // If there are pending requests and the current content we're downloading |
307 return ContinueDownload(); | 493 // doesn't satisfy any of these requests, cancel the current request to |
308 } | 494 // fullfill those more important requests. |
309 if (loader_->IsMultipart()) { | 495 bool satisfying_pending_request = |
310 int start_pos = 0; | 496 SatisfyingRequest(current_request_offset_, current_request_size_); |
311 int end_pos = 0; | 497 for (const auto& pending_request : pending_requests_) { |
312 if (!loader_->GetByteRange(&start_pos, &end_pos)) { | 498 if (SatisfyingRequest(pending_request.first, pending_request.second)) { |
313 return ReadComplete(); | 499 satisfying_pending_request = true; |
314 } | 500 break; |
315 DCHECK(!chunk_.chunk_data); | 501 } |
316 chunk_.chunk_index = chunk_stream_.GetChunkIndex(start_pos); | 502 } |
317 } | 503 // Cancel the request as it's not satisfying any request from the |
318 if (!SaveChunkData(buffer_, result)) { | 504 // renderer, unless the current request is finished in which case we let |
319 return ReadMore(); | 505 // it finish cleanly. |
320 } | 506 if (!satisfying_pending_request && |
321 if (IsDocumentComplete()) { | 507 current_pos_ < |
322 return ReadComplete(); | 508 current_request_offset_ + current_request_extended_size_) { |
323 } | 509 loader_.Close(); |
324 return ContinueDownload(); | 510 } |
325 } | 511 } |
326 | 512 |
327 bool DocumentLoader::SaveChunkData(char* input, uint32_t input_size) { | 513 ReadMore(); |
328 count_of_bytes_received_ += input_size; | 514 } |
329 bool chunk_saved = false; | 515 |
330 bool loading_pending_request = pending_requests_.Contains(chunk_.chunk_index); | 516 bool DocumentLoader::SatisfyingRequest(size_t offset, size_t size) const { |
331 while (input_size > 0) { | 517 return offset <= current_pos_ + kDefaultRequestSize && |
332 if (chunk_.data_size == 0) { | 518 current_pos_ < offset + size; |
333 chunk_.chunk_data = base::MakeUnique<DataStream::ChunkData>(); | |
334 } | |
335 const uint32_t new_chunk_data_len = | |
336 std::min(DataStream::kChunkSize - chunk_.data_size, input_size); | |
337 memcpy(chunk_.chunk_data->data() + chunk_.data_size, input, | |
338 new_chunk_data_len); | |
339 chunk_.data_size += new_chunk_data_len; | |
340 if (chunk_.data_size == DataStream::kChunkSize || | |
341 chunk_stream_.eof_pos() == | |
342 chunk_.chunk_index * DataStream::kChunkSize + chunk_.data_size) { | |
343 chunk_stream_.SetChunkData(chunk_.chunk_index, | |
344 std::move(chunk_.chunk_data)); | |
345 pending_requests_.Subtract( | |
346 gfx::Range(chunk_.chunk_index, chunk_.chunk_index + 1)); | |
347 chunk_.data_size = 0; | |
348 ++(chunk_.chunk_index); | |
349 chunk_saved = true; | |
350 } | |
351 | |
352 input += new_chunk_data_len; | |
353 input_size -= new_chunk_data_len; | |
354 } | |
355 | |
356 if (IsDocumentComplete()) | |
357 return true; | |
358 | |
359 if (!chunk_saved) | |
360 return false; | |
361 | |
362 if (loading_pending_request && | |
363 !pending_requests_.Contains(chunk_.chunk_index)) { | |
364 client_->OnPendingRequestComplete(); | |
365 } | |
366 client_->OnNewDataAvailable(); | |
367 return true; | |
368 } | 519 } |
369 | 520 |
370 void DocumentLoader::ReadComplete() { | 521 void DocumentLoader::ReadComplete() { |
371 if (!GetDocumentSize()) { | 522 if (!partial_document_) { |
372 uint32_t eof = | 523 if (document_size_ == 0) { |
373 chunk_.chunk_index * DataStream::kChunkSize + chunk_.data_size; | 524 // For the document with no 'content-length" specified we've collected all |
374 if (!chunk_stream_.filled_chunks().IsEmpty()) { | 525 // the chunks already. Let's allocate final document buffer and copy them |
375 eof = std::max( | 526 // over. |
376 chunk_stream_.filled_chunks().Last().end() * DataStream::kChunkSize, | 527 chunk_stream_.Preallocate(current_pos_); |
377 eof); | 528 uint32_t pos = 0; |
378 } | 529 for (auto& chunk : chunk_buffer_) { |
379 chunk_stream_.set_eof_pos(eof); | 530 chunk_stream_.WriteData(pos, chunk.data(), chunk.size()); |
380 if (eof == chunk_.chunk_index * DataStream::kChunkSize + chunk_.data_size) { | 531 pos += chunk.size(); |
381 chunk_stream_.SetChunkData(chunk_.chunk_index, | 532 } |
382 std::move(chunk_.chunk_data)); | 533 chunk_buffer_.clear(); |
383 } | 534 } |
384 } | 535 document_size_ = current_pos_; |
385 loader_.reset(); | 536 client_->OnDocumentComplete(); |
| 537 return; |
| 538 } |
| 539 |
| 540 request_pending_ = false; |
| 541 |
386 if (IsDocumentComplete()) { | 542 if (IsDocumentComplete()) { |
387 client_->OnDocumentComplete(); | 543 client_->OnDocumentComplete(); |
388 } else { | 544 return; |
389 client_->OnDocumentCanceled(); | 545 } |
390 } | 546 |
391 } | 547 UpdateRendering(); |
392 | 548 DownloadPendingRequests(); |
393 float DocumentLoader::GetProgress() const { | 549 } |
394 if (!GetDocumentSize()) | 550 |
395 return -1; | 551 void DocumentLoader::UpdateRendering() { |
396 if (IsDocumentComplete()) | 552 if (header_request_) |
397 return 1; | 553 client_->OnPartialDocumentLoaded(); |
398 return static_cast<float>(chunk_stream_.filled_chunks_count()) / | 554 else |
399 chunk_stream_.total_chunks_count(); | 555 client_->OnPendingRequestComplete(); |
| 556 header_request_ = false; |
400 } | 557 } |
401 | 558 |
402 } // namespace chrome_pdf | 559 } // namespace chrome_pdf |
OLD | NEW |