Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(360)

Side by Side Diff: pdf/document_loader.cc

Issue 2349753003: Improve linearized pdf load/show time. (Closed)
Patch Set: Fix review issues. Created 4 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright (c) 2010 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2010 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "pdf/document_loader.h" 5 #include "pdf/document_loader.h"
6 6
7 #include <stddef.h> 7 #include <stddef.h>
8 #include <stdint.h> 8 #include <stdint.h>
9 9
10 #include <algorithm>
11
10 #include "base/logging.h" 12 #include "base/logging.h"
11 #include "base/strings/string_util.h" 13 #include "base/strings/string_util.h"
12 #include "net/http/http_util.h" 14 #include "pdf/url_loader_wrapper.h"
13 #include "ppapi/c/pp_errors.h" 15 #include "ppapi/c/pp_errors.h"
14 #include "ppapi/cpp/url_loader.h" 16 #include "ui/gfx/range/range.h"
15 #include "ppapi/cpp/url_request_info.h"
16 #include "ppapi/cpp/url_response_info.h"
17 17
18 namespace chrome_pdf { 18 namespace chrome_pdf {
19 19
20 namespace { 20 namespace {
21 21 const int kChunkCloseDistance = 10;
Lei Zhang 2016/10/21 09:33:09 Can you document what this is / why this value was
snake 2016/10/21 15:13:15 Done.
22 // If the headers have a byte-range response, writes the start and end
23 // positions and returns true if at least the start position was parsed.
24 // The end position will be set to 0 if it was not found or parsed from the
25 // response.
26 // Returns false if not even a start position could be parsed.
27 bool GetByteRange(const std::string& headers, uint32_t* start, uint32_t* end) {
28 net::HttpUtil::HeadersIterator it(headers.begin(), headers.end(), "\n");
29 while (it.GetNext()) {
30 if (base::LowerCaseEqualsASCII(it.name(), "content-range")) {
31 std::string range = it.values().c_str();
32 if (base::StartsWith(range, "bytes",
33 base::CompareCase::INSENSITIVE_ASCII)) {
34 range = range.substr(strlen("bytes"));
35 std::string::size_type pos = range.find('-');
36 std::string range_end;
37 if (pos != std::string::npos)
38 range_end = range.substr(pos + 1);
39 base::TrimWhitespaceASCII(range, base::TRIM_LEADING, &range);
40 base::TrimWhitespaceASCII(range_end, base::TRIM_LEADING, &range_end);
41 *start = atoi(range.c_str());
42 *end = atoi(range_end.c_str());
43 return true;
44 }
45 }
46 }
47 return false;
48 }
49
50 // If the headers have a multi-part response, returns the boundary name.
51 // Otherwise returns an empty string.
52 std::string GetMultiPartBoundary(const std::string& headers) {
53 net::HttpUtil::HeadersIterator it(headers.begin(), headers.end(), "\n");
54 while (it.GetNext()) {
55 if (base::LowerCaseEqualsASCII(it.name(), "content-type")) {
56 std::string type = base::ToLowerASCII(it.values());
57 if (base::StartsWith(type, "multipart/", base::CompareCase::SENSITIVE)) {
58 const char* boundary = strstr(type.c_str(), "boundary=");
59 if (!boundary) {
60 NOTREACHED();
61 break;
62 }
63
64 return std::string(boundary + 9);
65 }
66 }
67 }
68 return std::string();
69 }
70 22
71 bool IsValidContentType(const std::string& type) { 23 bool IsValidContentType(const std::string& type) {
72 return (base::EndsWith(type, "/pdf", base::CompareCase::INSENSITIVE_ASCII) || 24 return (base::EndsWith(type, "/pdf", base::CompareCase::INSENSITIVE_ASCII) ||
73 base::EndsWith(type, ".pdf", base::CompareCase::INSENSITIVE_ASCII) || 25 base::EndsWith(type, ".pdf", base::CompareCase::INSENSITIVE_ASCII) ||
74 base::EndsWith(type, "/x-pdf", 26 base::EndsWith(type, "/x-pdf",
75 base::CompareCase::INSENSITIVE_ASCII) || 27 base::CompareCase::INSENSITIVE_ASCII) ||
76 base::EndsWith(type, "/*", base::CompareCase::INSENSITIVE_ASCII) || 28 base::EndsWith(type, "/*", base::CompareCase::INSENSITIVE_ASCII) ||
77 base::EndsWith(type, "/acrobat", 29 base::EndsWith(type, "/acrobat",
78 base::CompareCase::INSENSITIVE_ASCII) || 30 base::CompareCase::INSENSITIVE_ASCII) ||
79 base::EndsWith(type, "/unknown", 31 base::EndsWith(type, "/unknown",
80 base::CompareCase::INSENSITIVE_ASCII)); 32 base::CompareCase::INSENSITIVE_ASCII));
81 } 33 }
82 34
83 } // namespace 35 } // namespace
84 36
85 DocumentLoader::Client::~Client() { 37 DocumentLoader::Client::~Client() {
86 } 38 }
87 39
40 DocumentLoader::Chunk::Chunk() {}
41
42 DocumentLoader::Chunk::~Chunk() {}
43
44 void DocumentLoader::Chunk::Clear() {
45 chunk_index = 0;
46 data_size = 0;
47 chunk_data.reset();
48 }
49
88 DocumentLoader::DocumentLoader(Client* client) 50 DocumentLoader::DocumentLoader(Client* client)
89 : client_(client), partial_document_(false), request_pending_(false), 51 : client_(client), loader_factory_(this) {}
90 current_pos_(0), current_chunk_size_(0), current_chunk_read_(0),
91 document_size_(0), header_request_(true), is_multipart_(false) {
92 loader_factory_.Initialize(this);
93 }
94 52
95 DocumentLoader::~DocumentLoader() { 53 DocumentLoader::~DocumentLoader() {
96 } 54 }
97 55
98 bool DocumentLoader::Init(const pp::URLLoader& loader, 56 bool DocumentLoader::Init(std::unique_ptr<URLLoaderWrapper> loader,
99 const std::string& url, 57 const std::string& url) {
100 const std::string& headers) {
101 DCHECK(url_.empty()); 58 DCHECK(url_.empty());
59 DCHECK(!loader_);
102 url_ = url; 60 url_ = url;
103 loader_ = loader;
104 61
105 std::string response_headers; 62 std::string type = loader->GetContentType();
106 if (!headers.empty()) {
107 response_headers = headers;
108 } else {
109 pp::URLResponseInfo response = loader_.GetResponseInfo();
110 pp::Var headers_var = response.GetHeaders();
111
112 if (headers_var.is_string()) {
113 response_headers = headers_var.AsString();
114 }
115 }
116
117 bool accept_ranges_bytes = false;
118 bool content_encoded = false;
119 uint32_t content_length = 0;
120 std::string type;
121 std::string disposition;
122 63
123 // This happens for PDFs not loaded from http(s) sources. 64 // This happens for PDFs not loaded from http(s) sources.
124 if (response_headers == "Content-Type: text/plain") { 65 if (type == "text/plain") {
125 if (!base::StartsWith(url, "http://", 66 if (!base::StartsWith(url, "http://",
126 base::CompareCase::INSENSITIVE_ASCII) && 67 base::CompareCase::INSENSITIVE_ASCII) &&
127 !base::StartsWith(url, "https://", 68 !base::StartsWith(url, "https://",
128 base::CompareCase::INSENSITIVE_ASCII)) { 69 base::CompareCase::INSENSITIVE_ASCII)) {
129 type = "application/pdf"; 70 type = "application/pdf";
130 } 71 }
131 } 72 }
132 if (type.empty() && !response_headers.empty()) {
133 net::HttpUtil::HeadersIterator it(response_headers.begin(),
134 response_headers.end(), "\n");
135 while (it.GetNext()) {
136 if (base::LowerCaseEqualsASCII(it.name(), "content-length")) {
137 content_length = atoi(it.values().c_str());
138 } else if (base::LowerCaseEqualsASCII(it.name(), "accept-ranges")) {
139 accept_ranges_bytes = base::LowerCaseEqualsASCII(it.values(), "bytes");
140 } else if (base::LowerCaseEqualsASCII(it.name(), "content-encoding")) {
141 content_encoded = true;
142 } else if (base::LowerCaseEqualsASCII(it.name(), "content-type")) {
143 type = it.values();
144 size_t semi_colon_pos = type.find(';');
145 if (semi_colon_pos != std::string::npos) {
146 type = type.substr(0, semi_colon_pos);
147 }
148 TrimWhitespaceASCII(type, base::TRIM_ALL, &type);
149 } else if (base::LowerCaseEqualsASCII(it.name(), "content-disposition")) {
150 disposition = it.values();
151 }
152 }
153 }
154 if (!type.empty() && !IsValidContentType(type)) 73 if (!type.empty() && !IsValidContentType(type))
155 return false; 74 return false;
156 if (base::StartsWith(disposition, "attachment", 75
76 if (base::StartsWith(loader->GetContentDisposition(), "attachment",
157 base::CompareCase::INSENSITIVE_ASCII)) 77 base::CompareCase::INSENSITIVE_ASCII))
158 return false; 78 return false;
159 79
160 if (content_length > 0) 80 loader_ = std::move(loader);
Lei Zhang 2016/10/21 09:33:09 Should |url_| be set here instead of above?
snake 2016/10/21 15:13:14 Done.
161 chunk_stream_.Preallocate(content_length);
162 81
163 document_size_ = content_length; 82 if (!loader_->IsContentEncoded()) {
164 requests_count_ = 0; 83 chunk_stream_.set_eof_pos(std::max(0, loader_->GetContentLength()));
84 }
85 int64_t bytes_received = 0;
86 int64_t total_bytes_to_be_received = 0;
87 if (!chunk_stream_.eof_pos() &&
88 loader_->GetDownloadProgress(&bytes_received,
89 &total_bytes_to_be_received)) {
90 chunk_stream_.set_eof_pos(
91 std::max(0, static_cast<int>(total_bytes_to_be_received)));
92 }
165 93
166 // Enable partial loading only if file size is above the threshold. 94 SetPartialLoadingEnabled(
167 // It will allow avoiding latency for multiple requests. 95 partial_loading_enabled_ &&
168 if (content_length > kMinFileSize && 96 !base::StartsWith(url, "file://", base::CompareCase::INSENSITIVE_ASCII) &&
169 accept_ranges_bytes && 97 loader_->IsAcceptRangesBytes() && !loader_->IsContentEncoded() &&
170 !content_encoded) { 98 GetDocumentSize());
171 LoadPartialDocument(); 99
172 } else { 100 ReadMore();
173 LoadFullDocument();
174 }
175 return true; 101 return true;
176 } 102 }
177 103
178 void DocumentLoader::LoadPartialDocument() { 104 bool DocumentLoader::IsDocumentComplete() const {
179 // The current request is a full request (not a range request) so it starts at 105 return chunk_stream_.IsComplete();
180 // 0 and ends at |document_size_|.
181 current_chunk_size_ = document_size_;
182 current_pos_ = 0;
183 current_request_offset_ = 0;
184 current_request_size_ = 0;
185 current_request_extended_size_ = document_size_;
186 request_pending_ = true;
187
188 partial_document_ = true;
189 header_request_ = true;
190 ReadMore();
191 } 106 }
192 107
193 void DocumentLoader::LoadFullDocument() { 108 uint32_t DocumentLoader::GetDocumentSize() const {
194 partial_document_ = false; 109 return chunk_stream_.eof_pos();
195 chunk_buffer_.clear();
196 ReadMore();
197 }
198
199 bool DocumentLoader::IsDocumentComplete() const {
200 if (document_size_ == 0) // Document size unknown.
201 return false;
202 return IsDataAvailable(0, document_size_);
203 }
204
205 uint32_t DocumentLoader::GetAvailableData() const {
206 if (document_size_ == 0) { // If document size is unknown.
207 return current_pos_;
208 }
209
210 std::vector<std::pair<size_t, size_t> > ranges;
211 chunk_stream_.GetMissedRanges(0, document_size_, &ranges);
212 uint32_t available = document_size_;
213 for (const auto& range : ranges)
214 available -= range.second;
215 return available;
216 } 110 }
217 111
218 void DocumentLoader::ClearPendingRequests() { 112 void DocumentLoader::ClearPendingRequests() {
219 pending_requests_.erase(pending_requests_.begin(), 113 pending_requests_.Clear();
220 pending_requests_.end());
221 } 114 }
222 115
223 bool DocumentLoader::GetBlock(uint32_t position, 116 bool DocumentLoader::GetBlock(uint32_t position,
224 uint32_t size, 117 uint32_t size,
225 void* buf) const { 118 void* buf) const {
226 return chunk_stream_.ReadData(position, size, buf); 119 return chunk_stream_.ReadData(gfx::Range(position, position + size), buf);
Lei Zhang 2016/10/21 09:33:09 Can "position + size" overflow?
snake 2016/10/21 15:13:15 yes (bacause this method is called from outside),
Lei Zhang 2016/10/25 19:26:00 Can we stop integer overflows here, i.e. earlier r
snake 2016/10/25 20:07:17 Done.
227 } 120 }
228 121
229 bool DocumentLoader::IsDataAvailable(uint32_t position, uint32_t size) const { 122 bool DocumentLoader::IsDataAvailable(uint32_t position, uint32_t size) const {
230 return chunk_stream_.IsRangeAvailable(position, size); 123 return chunk_stream_.IsRangeAvailable(gfx::Range(position, position + size));
231 } 124 }
232 125
233 void DocumentLoader::RequestData(uint32_t position, uint32_t size) { 126 void DocumentLoader::RequestData(uint32_t position, uint32_t size) {
234 DCHECK(partial_document_); 127 if (!size || IsDataAvailable(position, size)) {
128 return;
129 }
130
131 if (GetDocumentSize() && (position + size > GetDocumentSize())) {
132 return;
133 }
235 134
236 // We have some artefact request from 135 // We have some artefact request from
237 // PDFiumEngine::OnDocumentComplete() -> FPDFAvail_IsPageAvail after 136 // PDFiumEngine::OnDocumentComplete() -> FPDFAvail_IsPageAvail after
238 // document is complete. 137 // document is complete.
239 // We need this fix in PDFIum. Adding this as a work around. 138 // We need this fix in PDFIum. Adding this as a work around.
240 // Bug: http://code.google.com/p/chromium/issues/detail?id=79996 139 // Bug: http://code.google.com/p/chromium/issues/detail?id=79996
241 // Test url: 140 // Test url:
242 // http://www.icann.org/en/correspondence/holtzman-to-jeffrey-02mar11-en.pdf 141 // http://www.icann.org/en/correspondence/holtzman-to-jeffrey-02mar11-en.pdf
243 if (IsDocumentComplete()) 142 if (!loader_)
244 return; 143 return;
245 144
246 pending_requests_.push_back(std::pair<size_t, size_t>(position, size)); 145 RangeSet requested_chunks(chunk_stream_.GetChunksRange(position, size));
247 DownloadPendingRequests(); 146 requested_chunks.Subtract(chunk_stream_.filled_chunks());
248 } 147 if (requested_chunks.IsEmpty()) {
249
250 void DocumentLoader::RemoveCompletedRanges() {
251 // Split every request that has been partially downloaded already into smaller
252 // requests.
253 std::vector<std::pair<size_t, size_t> > ranges;
254 auto it = pending_requests_.begin();
255 while (it != pending_requests_.end()) {
256 chunk_stream_.GetMissedRanges(it->first, it->second, &ranges);
257 pending_requests_.insert(it, ranges.begin(), ranges.end());
258 ranges.clear();
259 pending_requests_.erase(it++);
260 }
261 }
262
263 void DocumentLoader::DownloadPendingRequests() {
264 if (request_pending_)
265 return;
266
267 uint32_t pos;
268 uint32_t size;
269 if (pending_requests_.empty()) {
270 // If the document is not complete and we have no outstanding requests,
271 // download what's left for as long as no other request gets added to
272 // |pending_requests_|.
273 pos = chunk_stream_.GetFirstMissingByte();
274 if (pos >= document_size_) {
275 // We're done downloading the document.
276 return;
277 }
278 // Start with size 0, we'll set |current_request_extended_size_| to > 0.
279 // This way this request will get cancelled as soon as the renderer wants
280 // another portion of the document.
281 size = 0;
282 } else {
283 RemoveCompletedRanges();
284
285 pos = pending_requests_.front().first;
286 size = pending_requests_.front().second;
287 if (IsDataAvailable(pos, size)) {
288 ReadComplete();
289 return;
290 }
291 }
292
293 size_t last_byte_before = chunk_stream_.GetFirstMissingByteInInterval(pos);
294 if (size < kDefaultRequestSize) {
295 // Try to extend before pos, up to size |kDefaultRequestSize|.
296 if (pos + size - last_byte_before > kDefaultRequestSize) {
297 pos += size - kDefaultRequestSize;
298 size = kDefaultRequestSize;
299 } else {
300 size += pos - last_byte_before;
301 pos = last_byte_before;
302 }
303 }
304 if (pos - last_byte_before < kDefaultRequestSize) {
305 // Don't leave a gap smaller than |kDefaultRequestSize|.
306 size += pos - last_byte_before;
307 pos = last_byte_before;
308 }
309
310 current_request_offset_ = pos;
311 current_request_size_ = size;
312
313 // Extend the request until the next downloaded byte or the end of the
314 // document.
315 size_t last_missing_byte =
316 chunk_stream_.GetLastMissingByteInInterval(pos + size - 1);
317 current_request_extended_size_ = last_missing_byte - pos + 1;
318
319 request_pending_ = true;
320
321 // Start downloading first pending request.
322 loader_.Close();
323 loader_ = client_->CreateURLLoader();
324 pp::CompletionCallback callback =
325 loader_factory_.NewCallback(&DocumentLoader::DidOpen);
326 pp::URLRequestInfo request = GetRequest(pos, current_request_extended_size_);
327 requests_count_++;
328 int rv = loader_.Open(request, callback);
329 if (rv != PP_OK_COMPLETIONPENDING)
330 callback.Run(rv);
331 }
332
333 pp::URLRequestInfo DocumentLoader::GetRequest(uint32_t position,
334 uint32_t size) const {
335 pp::URLRequestInfo request(client_->GetPluginInstance());
336 request.SetURL(url_);
337 request.SetMethod("GET");
338 request.SetFollowRedirects(true);
339 request.SetCustomReferrerURL(url_);
340
341 const size_t kBufSize = 100;
342 char buf[kBufSize];
343 // According to rfc2616, byte range specifies position of the first and last
344 // bytes in the requested range inclusively. Therefore we should subtract 1
345 // from the position + size, to get index of the last byte that needs to be
346 // downloaded.
347 base::snprintf(buf, kBufSize, "Range: bytes=%d-%d", position,
348 position + size - 1);
349 pp::Var header(buf);
350 request.SetHeaders(header);
351
352 return request;
353 }
354
355 void DocumentLoader::DidOpen(int32_t result) {
356 if (result != PP_OK) {
357 NOTREACHED(); 148 NOTREACHED();
358 return; 149 return;
359 } 150 }
360 151 pending_requests_.Union(requested_chunks);
361 int32_t http_code = loader_.GetResponseInfo().GetStatusCode(); 152 }
153
154 void DocumentLoader::SetPartialLoadingEnabled(bool enabled) {
155 partial_loading_enabled_ = enabled;
156 if (!enabled) {
157 is_partial_loader_active_ = false;
158 }
159 }
160
161 bool DocumentLoader::ShouldCancelLoading() const {
162 if (!loader_)
163 return true;
164 if (!partial_loading_enabled_ || pending_requests_.IsEmpty())
165 return false;
166 const gfx::Range current_range(chunk_.chunk_index,
167 chunk_.chunk_index + kChunkCloseDistance);
168 return !pending_requests_.Intersects(current_range);
169 }
170
171 void DocumentLoader::ContinueDownload() {
172 if (!ShouldCancelLoading())
173 return ReadMore();
174 DCHECK(partial_loading_enabled_);
175 DCHECK(!IsDocumentComplete());
176 DCHECK(GetDocumentSize());
177
178 const uint32_t range_start =
179 pending_requests_.IsEmpty() ? 0 : pending_requests_.First().start();
180 RangeSet candidates_for_request(
181 gfx::Range(range_start, chunk_stream_.total_chunks_count()));
182 candidates_for_request.Subtract(chunk_stream_.filled_chunks());
183 DCHECK(!candidates_for_request.IsEmpty());
184 gfx::Range next_request = candidates_for_request.First();
185 if (candidates_for_request.Size() == 1 &&
186 next_request.length() < kChunkCloseDistance) {
187 // We have only request at the end, try to enlarge it to improve back order
188 // reading.
189 const int additional_chunks_count =
190 kChunkCloseDistance - next_request.length();
191 int new_start = std::max(
192 0, static_cast<int>(next_request.start()) - additional_chunks_count);
193 candidates_for_request =
194 RangeSet(gfx::Range(new_start, next_request.end()));
195 candidates_for_request.Subtract(chunk_stream_.filled_chunks());
196 next_request = candidates_for_request.Last();
197 }
198
199 loader_.reset();
200 chunk_.Clear();
201 if (!is_partial_loader_active_) {
202 client_->CancelBrowserDownload();
203 is_partial_loader_active_ = true;
204 }
205
206 const uint32_t start = next_request.start() * DataStream::kChunkSize;
207 const uint32_t length =
208 std::min(chunk_stream_.eof_pos() - start,
209 next_request.length() * DataStream::kChunkSize);
210
211 loader_ = client_->CreateURLLoader();
212
213 loader_->OpenRange(
214 url_, url_, start, length,
215 loader_factory_.NewCallback(&DocumentLoader::DidOpenPartial));
216 }
217
218 void DocumentLoader::DidOpenPartial(int32_t result) {
219 if (result != PP_OK) {
220 return ReadComplete();
221 }
222
223 int32_t http_code = loader_->GetStatusCode();
362 if (http_code >= 400 && http_code < 500) { 224 if (http_code >= 400 && http_code < 500) {
363 // Error accessing resource. 4xx error indicate subsequent requests 225 // Error accessing resource. 4xx error indicate subsequent requests
364 // will fail too. 226 // will fail too.
365 // E.g. resource has been removed from the server while loading it. 227 // E.g. resource has been removed from the server while loading it.
366 // https://code.google.com/p/chromium/issues/detail?id=414827 228 return ReadComplete();
367 return; 229 }
368 } 230
369 231 // Leave position untouched for multiparted responce for now, when we read the
370 is_multipart_ = false; 232 // data we'll get it.
371 current_chunk_size_ = 0; 233 if (!loader_->IsMultipart()) {
372 current_chunk_read_ = 0;
373
374 pp::Var headers_var = loader_.GetResponseInfo().GetHeaders();
375 std::string headers;
376 if (headers_var.is_string())
377 headers = headers_var.AsString();
378
379 std::string boundary = GetMultiPartBoundary(headers);
380 if (!boundary.empty()) {
381 // Leave position untouched for now, when we read the data we'll get it.
382 is_multipart_ = true;
383 multipart_boundary_ = boundary;
384 } else {
385 // Need to make sure that the server returned a byte-range, since it's 234 // Need to make sure that the server returned a byte-range, since it's
386 // possible for a server to just ignore our byte-range request and just 235 // possible for a server to just ignore our byte-range request and just
387 // return the entire document even if it supports byte-range requests. 236 // return the entire document even if it supports byte-range requests.
388 // i.e. sniff response to 237 // i.e. sniff response to
389 // http://www.act.org/compass/sample/pdf/geometry.pdf 238 // http://www.act.org/compass/sample/pdf/geometry.pdf
390 current_pos_ = 0; 239 int start_pos, end_pos;
Lei Zhang 2016/10/21 09:33:09 nit: 1 declaration per line please.
snake 2016/10/21 15:13:14 Done.
391 uint32_t start_pos, end_pos; 240 if (loader_->GetByteRange(&start_pos, &end_pos)) {
392 if (GetByteRange(headers, &start_pos, &end_pos)) { 241 if (start_pos % DataStream::kChunkSize != 0) {
393 current_pos_ = start_pos; 242 return ReadComplete();
394 if (end_pos && end_pos > start_pos) 243 }
395 current_chunk_size_ = end_pos - start_pos + 1; 244 DCHECK(!chunk_.chunk_data);
245 chunk_.chunk_index = chunk_stream_.GetChunkIndex(start_pos);
396 } else { 246 } else {
397 partial_document_ = false; 247 SetPartialLoadingEnabled(false);
398 } 248 }
399 } 249 return ContinueDownload();
400 250 }
401 ReadMore(); 251 // Needs more data to calc chunk index.
252 return ReadMore();
402 } 253 }
403 254
404 void DocumentLoader::ReadMore() { 255 void DocumentLoader::ReadMore() {
405 pp::CompletionCallback callback = 256 loader_->ReadResponseBody(
406 loader_factory_.NewCallback(&DocumentLoader::DidRead); 257 buffer_, sizeof(buffer_),
407 int rv = loader_.ReadResponseBody(buffer_, sizeof(buffer_), callback); 258 loader_factory_.NewCallback(&DocumentLoader::DidRead));
408 if (rv != PP_OK_COMPLETIONPENDING)
409 callback.Run(rv);
410 } 259 }
411 260
412 void DocumentLoader::DidRead(int32_t result) { 261 void DocumentLoader::DidRead(int32_t result) {
413 if (result <= 0) { 262 if (result < 0) {
414 // If |result| == PP_OK, the document was loaded, otherwise an error was 263 // An error occurred.
415 // encountered. Either way we want to stop processing the response. In the 264 // The renderer will detect that we're missing data and will display a
416 // case where an error occurred, the renderer will detect that we're missing 265 // message.
417 // data and will display a message. 266 return ReadComplete();
418 ReadComplete(); 267 } else if (result == 0) {
Lei Zhang 2016/10/21 09:33:09 nit: no else if after a return.
snake 2016/10/21 15:13:15 Done.
419 return; 268 loader_.reset();
420 } 269 if (!is_partial_loader_active_)
421 270 return ReadComplete();
422 char* start = buffer_; 271 return ContinueDownload();
423 size_t length = result; 272 }
424 if (is_multipart_ && result > 2) { 273 if (loader_->IsMultipart()) {
425 for (int i = 2; i < result; ++i) { 274 int start_pos = 0;
426 if ((buffer_[i - 1] == '\n' && buffer_[i - 2] == '\n') || 275 int end_pos = 0;
427 (i >= 4 && buffer_[i - 1] == '\n' && buffer_[i - 2] == '\r' && 276 if (!loader_->GetByteRange(&start_pos, &end_pos)) {
428 buffer_[i - 3] == '\n' && buffer_[i - 4] == '\r')) { 277 return ReadComplete();
429 uint32_t start_pos, end_pos; 278 }
430 if (GetByteRange(std::string(buffer_, i), &start_pos, &end_pos)) { 279 DCHECK(!chunk_.chunk_data);
431 current_pos_ = start_pos; 280 chunk_.chunk_index = chunk_stream_.GetChunkIndex(start_pos);
432 start += i; 281 }
433 length -= i; 282 if (!SaveChunkData(buffer_, result)) {
434 if (end_pos && end_pos > start_pos) 283 return ReadMore();
435 current_chunk_size_ = end_pos - start_pos + 1; 284 }
436 } 285 if (IsDocumentComplete()) {
437 break; 286 return ReadComplete();
438 } 287 }
439 } 288 return ContinueDownload();
440 289 }
441 // Reset this flag so we don't look inside the buffer in future calls of 290
442 // DidRead for this response. Note that this code DOES NOT handle multi- 291 bool DocumentLoader::SaveChunkData(char* input, uint32_t input_size) {
443 // part responses with more than one part (we don't issue them at the 292 count_of_bytes_received_ += input_size;
Lei Zhang 2016/10/21 09:33:09 Can this overflow?
snake 2016/10/21 15:13:15 No, count_of_bytes_received_ can not be great that
444 // moment, so they shouldn't arrive). 293 bool chunk_saved = false;
445 is_multipart_ = false; 294 bool loading_pending_request = pending_requests_.Contains(chunk_.chunk_index);
446 } 295 while (input_size > 0) {
447 296 if (chunk_.data_size == 0) {
448 if (current_chunk_size_ && current_chunk_read_ + length > current_chunk_size_) 297 chunk_.chunk_data.reset(new DataStream::ChunkData());
449 length = current_chunk_size_ - current_chunk_read_; 298 }
450 299 const uint32_t new_chunk_data_len =
451 if (length) { 300 std::min(DataStream::kChunkSize - chunk_.data_size, input_size);
452 if (document_size_ > 0) { 301 memcpy(chunk_.chunk_data->data() + chunk_.data_size, input,
453 chunk_stream_.WriteData(current_pos_, start, length); 302 new_chunk_data_len);
454 } else { 303 chunk_.data_size += new_chunk_data_len;
455 // If we did not get content-length in the response, we can't 304 if (chunk_.data_size == DataStream::kChunkSize ||
456 // preallocate buffer for the entire document. Resizing array causing 305 chunk_stream_.eof_pos() ==
457 // memory fragmentation issues on the large files and OOM exceptions. 306 chunk_.chunk_index * DataStream::kChunkSize + chunk_.data_size) {
458 // To fix this, we collect all chunks of the file to the list and 307 chunk_stream_.SetChunkData(chunk_.chunk_index,
459 // concatenate them together after request is complete. 308 std::move(chunk_.chunk_data));
460 std::vector<unsigned char> buf(length); 309 pending_requests_.Subtract(
461 memcpy(buf.data(), start, length); 310 gfx::Range(chunk_.chunk_index, chunk_.chunk_index + 1));
462 chunk_buffer_.push_back(std::move(buf)); 311 chunk_.data_size = 0;
463 } 312 ++(chunk_.chunk_index);
464 current_pos_ += length; 313 chunk_saved = true;
465 current_chunk_read_ += length; 314 }
315
316 input += new_chunk_data_len;
317 input_size -= new_chunk_data_len;
318 }
319 if (IsDocumentComplete())
320 return true;
321 if (chunk_saved) {
Lei Zhang 2016/10/21 09:33:09 if (!chunk_saved) return false; // do things re
snake 2016/10/21 15:13:14 Done.
322 if (loading_pending_request &&
323 !pending_requests_.Contains(chunk_.chunk_index)) {
324 client_->OnPendingRequestComplete();
325 }
466 client_->OnNewDataAvailable(); 326 client_->OnNewDataAvailable();
467 } 327 }
468 328 return chunk_saved;
469 // Only call the renderer if we allow partial loading.
470 if (!partial_document_) {
471 ReadMore();
472 return;
473 }
474
475 UpdateRendering();
476 RemoveCompletedRanges();
477
478 if (!pending_requests_.empty()) {
479 // If there are pending requests and the current content we're downloading
480 // doesn't satisfy any of these requests, cancel the current request to
481 // fullfill those more important requests.
482 bool satisfying_pending_request =
483 SatisfyingRequest(current_request_offset_, current_request_size_);
484 for (const auto& pending_request : pending_requests_) {
485 if (SatisfyingRequest(pending_request.first, pending_request.second)) {
486 satisfying_pending_request = true;
487 break;
488 }
489 }
490 // Cancel the request as it's not satisfying any request from the
491 // renderer, unless the current request is finished in which case we let
492 // it finish cleanly.
493 if (!satisfying_pending_request &&
494 current_pos_ <
495 current_request_offset_ + current_request_extended_size_) {
496 loader_.Close();
497 }
498 }
499
500 ReadMore();
501 }
502
503 bool DocumentLoader::SatisfyingRequest(size_t offset, size_t size) const {
504 return offset <= current_pos_ + kDefaultRequestSize &&
505 current_pos_ < offset + size;
506 } 329 }
507 330
508 void DocumentLoader::ReadComplete() { 331 void DocumentLoader::ReadComplete() {
509 if (!partial_document_) { 332 if (!GetDocumentSize()) {
510 if (document_size_ == 0) { 333 uint32_t eof =
511 // For the document with no 'content-length" specified we've collected all 334 chunk_.chunk_index * DataStream::kChunkSize + chunk_.data_size;
512 // the chunks already. Let's allocate final document buffer and copy them 335 if (!chunk_stream_.filled_chunks().IsEmpty()) {
513 // over. 336 eof = std::max(
514 chunk_stream_.Preallocate(current_pos_); 337 chunk_stream_.filled_chunks().Last().end() * DataStream::kChunkSize,
515 uint32_t pos = 0; 338 eof);
516 for (auto& chunk : chunk_buffer_) { 339 }
517 chunk_stream_.WriteData(pos, chunk.data(), chunk.size()); 340 chunk_stream_.set_eof_pos(eof);
518 pos += chunk.size(); 341 if (eof == chunk_.chunk_index * DataStream::kChunkSize + chunk_.data_size) {
519 } 342 chunk_stream_.SetChunkData(chunk_.chunk_index,
520 chunk_buffer_.clear(); 343 std::move(chunk_.chunk_data));
521 } 344 }
522 document_size_ = current_pos_; 345 }
523 client_->OnDocumentComplete(); 346 loader_.reset();
524 return;
525 }
526
527 request_pending_ = false;
528
529 if (IsDocumentComplete()) { 347 if (IsDocumentComplete()) {
530 client_->OnDocumentComplete(); 348 client_->OnDocumentComplete();
531 return; 349 } else {
532 } 350 client_->OnDocumentCanceled();
533 351 }
534 UpdateRendering(); 352 }
535 DownloadPendingRequests(); 353
536 } 354 float DocumentLoader::GetProgress() const {
537 355 if (!GetDocumentSize())
538 void DocumentLoader::UpdateRendering() { 356 return -1;
539 if (header_request_) 357 if (IsDocumentComplete())
540 client_->OnPartialDocumentLoaded(); 358 return 1;
541 else 359 return chunk_stream_.filled_chunks_count() * 1. /
Lei Zhang 2016/10/21 09:33:09 Can you static_cast to a float instead of "* 1." ?
snake 2016/10/21 15:13:14 Done.
542 client_->OnPendingRequestComplete(); 360 chunk_stream_.total_chunks_count();
543 header_request_ = false;
544 } 361 }
545 362
546 } // namespace chrome_pdf 363 } // namespace chrome_pdf
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698