Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(13)

Unified Diff: pdf/document_loader.cc

Issue 1506023002: Minimize the number of range requests made by PDFium (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Fix issue with chunk_stream Created 5 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « pdf/document_loader.h ('k') | no next file » | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: pdf/document_loader.cc
diff --git a/pdf/document_loader.cc b/pdf/document_loader.cc
index 868a8c4ce4f1367f6a011101efe453a8813c6557..2ec514104857fe894312b84f83aba830cbf8d3d9 100644
--- a/pdf/document_loader.cc
+++ b/pdf/document_loader.cc
@@ -16,9 +16,6 @@ namespace chrome_pdf {
namespace {
-// Document below size will be downloaded in one chunk.
-const uint32_t kMinFileSize = 64 * 1024;
-
// If the headers have a byte-range response, writes the start and end
// positions and returns true if at least the start position was parsed.
// The end position will be set to 0 if it was not found or parsed from the
@@ -176,14 +173,18 @@ bool DocumentLoader::Init(const pp::URLLoader& loader,
}
void DocumentLoader::LoadPartialDocument() {
+ // The current request is a full request (not a range request) so it starts at
+ // 0 and ends at |document_size_|.
+ current_chunk_size_ = document_size_;
+ current_pos_ = 0;
+ current_request_offset_ = 0;
+ current_request_size_ = 0;
+ current_request_extended_size_ = document_size_;
+ request_pending_ = true;
+
partial_document_ = true;
- // Force the main request to be cancelled, since if we're a full-frame plugin
- // there could be other references to the loader.
- loader_.Close();
- loader_ = pp::URLLoader();
- // Download file header.
header_request_ = true;
- RequestData(0, std::min(GetRequestSize(), document_size_));
+ ReadMore();
}
void DocumentLoader::LoadFullDocument() {
@@ -212,12 +213,8 @@ uint32_t DocumentLoader::GetAvailableData() const {
}
void DocumentLoader::ClearPendingRequests() {
- // The first item in the queue is pending (need to keep it in the queue).
- if (pending_requests_.size() > 1) {
- // Remove all elements except the first one.
- pending_requests_.erase(++pending_requests_.begin(),
- pending_requests_.end());
- }
+ pending_requests_.erase(pending_requests_.begin(),
+ pending_requests_.end());
}
bool DocumentLoader::GetBlock(uint32_t position,
@@ -247,86 +244,74 @@ void DocumentLoader::RequestData(uint32_t position, uint32_t size) {
DownloadPendingRequests();
}
+void DocumentLoader::RemoveCompletedRanges() {
+ // Split every request that has been partially downloaded already into smaller
+ // requests.
+ std::vector<std::pair<size_t, size_t> > ranges;
+ auto it = pending_requests_.begin();
+ while (it != pending_requests_.end()) {
+ chunk_stream_.GetMissedRanges(it->first, it->second, &ranges);
+ pending_requests_.insert(it, ranges.begin(), ranges.end());
+ ranges.clear();
+ pending_requests_.erase(it++);
+ }
+}
+
void DocumentLoader::DownloadPendingRequests() {
- if (request_pending_ || pending_requests_.empty())
+ if (request_pending_)
return;
- // Remove already completed requests.
- // By design DownloadPendingRequests() should have at least 1 request in the
- // queue. ReadComplete() will remove the last pending comment from the queue.
- while (pending_requests_.size() > 1) {
- if (IsDataAvailable(pending_requests_.front().first,
- pending_requests_.front().second)) {
- pending_requests_.pop_front();
- } else {
- break;
+ uint32_t pos;
+ uint32_t size;
+ if (pending_requests_.empty()) {
+ // If the document is not complete and we have no outstanding requests,
+ // download what's left for as long as no other request gets added to
+ // |pending_requests_|.
+ pos = chunk_stream_.GetFirstMissingByte();
+ if (pos >= document_size_) {
+ // We're done downloading the document.
+ return;
}
- }
-
- uint32_t pos = pending_requests_.front().first;
- uint32_t size = pending_requests_.front().second;
- if (IsDataAvailable(pos, size)) {
- ReadComplete();
- return;
- }
+ // Start with size 0, we'll set |current_request_extended_size_| to > 0.
+ // This way this request will get cancelled as soon as the renderer wants
+ // another portion of the document.
+ size = 0;
+ } else {
+ RemoveCompletedRanges();
- // If current request has been partially downloaded already, split it into
- // a few smaller requests.
- std::vector<std::pair<size_t, size_t> > ranges;
- chunk_stream_.GetMissedRanges(pos, size, &ranges);
- if (!ranges.empty()) {
- pending_requests_.pop_front();
- pending_requests_.insert(pending_requests_.begin(),
- ranges.begin(), ranges.end());
pos = pending_requests_.front().first;
size = pending_requests_.front().second;
- }
-
- uint32_t cur_request_size = GetRequestSize();
- // If size is less than default request, try to expand download range for
- // more optimal download.
- if (size < cur_request_size && partial_document_) {
- // First, try to expand block towards the end of the file.
- uint32_t new_pos = pos;
- uint32_t new_size = cur_request_size;
- if (pos + new_size > document_size_)
- new_size = document_size_ - pos;
-
- std::vector<std::pair<size_t, size_t> > ranges;
- if (chunk_stream_.GetMissedRanges(new_pos, new_size, &ranges)) {
- new_pos = ranges[0].first;
- new_size = ranges[0].second;
+ if (IsDataAvailable(pos, size)) {
+ ReadComplete();
+ return;
}
+ }
- // Second, try to expand block towards the beginning of the file.
- if (new_size < cur_request_size) {
- uint32_t block_end = new_pos + new_size;
- if (block_end > cur_request_size) {
- new_pos = block_end - cur_request_size;
- } else {
- new_pos = 0;
- }
- new_size = block_end - new_pos;
-
- if (chunk_stream_.GetMissedRanges(new_pos, new_size, &ranges)) {
- new_pos = ranges.back().first;
- new_size = ranges.back().second;
- }
+ size_t last_byte_before = chunk_stream_.GetFirstMissingByteInInterval(pos);
+ if (size < kDefaultRequestSize) {
+ // Try to extend before pos, up to size |kDefaultRequestSize|.
+ if (pos + size - last_byte_before > kDefaultRequestSize) {
+ pos += size - kDefaultRequestSize;
+ size = kDefaultRequestSize;
+ } else {
+ size += pos - last_byte_before;
+ pos = last_byte_before;
}
- pos = new_pos;
- size = new_size;
}
-
- size_t last_byte_before = chunk_stream_.GetLastByteBefore(pos);
- size_t first_byte_after = chunk_stream_.GetFirstByteAfter(pos + size - 1);
- if (pos - last_byte_before < cur_request_size) {
- size = pos + size - last_byte_before;
+ if (pos - last_byte_before < kDefaultRequestSize) {
+ // Don't leave a gap smaller than |kDefaultRequestSize|.
+ size += pos - last_byte_before;
pos = last_byte_before;
}
- if ((pos + size < first_byte_after) &&
- (pos + size + cur_request_size >= first_byte_after))
- size = first_byte_after - pos;
+ current_request_offset_ = pos;
+ current_request_size_ = size;
+
+ // Extend the request until the next downloaded byte or the end of the
+ // document.
+ size_t last_missing_byte =
+ chunk_stream_.GetLastMissingByteInInterval(pos + size - 1);
+ current_request_extended_size_ = last_missing_byte - pos + 1;
request_pending_ = true;
@@ -335,7 +320,7 @@ void DocumentLoader::DownloadPendingRequests() {
loader_ = client_->CreateURLLoader();
pp::CompletionCallback callback =
loader_factory_.NewCallback(&DocumentLoader::DidOpen);
- pp::URLRequestInfo request = GetRequest(pos, size);
+ pp::URLRequestInfo request = GetRequest(pos, current_request_extended_size_);
requests_count_++;
int rv = loader_.Open(request, callback);
if (rv != PP_OK_COMPLETIONPENDING)
@@ -469,14 +454,51 @@ void DocumentLoader::DidRead(int32_t result) {
current_chunk_read_ += length;
client_->OnNewDataAvailable();
}
+
+ // Only call the renderer if we allow partial loading.
+ if (!partial_document_) {
+ ReadMore();
+ return;
+ }
+
+ UpdateRendering();
+ RemoveCompletedRanges();
+
+ if (!pending_requests_.empty()) {
+ // If there are pending requests and the current content we're downloading
+ // doesn't satisfy any of these requests, cancel the current request to
+ // fullfill those more important requests.
+ bool satisfying_pending_request =
+ SatisfyingRequest(current_request_offset_, current_request_size_);
+ for (const auto& pending_request : pending_requests_) {
+ if (SatisfyingRequest(pending_request.first, pending_request.second)) {
+ satisfying_pending_request = true;
+ break;
+ }
+ }
+ // Cancel the request as it's not satisfying any request from the
+ // renderer, unless the current request is finished in which case we let
+ // it finish cleanly.
+ if (!satisfying_pending_request &&
+ current_pos_ < current_request_offset_ +
+ current_request_extended_size_) {
+ loader_.Close();
+ }
+ }
+
ReadMore();
- } else if (result == PP_OK) {
+ } else if (result == PP_OK || result == PP_ERROR_ABORTED) {
ReadComplete();
} else {
NOTREACHED();
}
}
+bool DocumentLoader::SatisfyingRequest(size_t offset, size_t size) const {
+ return offset <= current_pos_ + kDefaultRequestSize &&
+ current_pos_ < offset + size;
+}
+
void DocumentLoader::ReadComplete() {
if (!partial_document_) {
if (document_size_ == 0) {
@@ -497,46 +519,22 @@ void DocumentLoader::ReadComplete() {
}
request_pending_ = false;
- pending_requests_.pop_front();
-
- // If there are more pending request - continue downloading.
- if (!pending_requests_.empty()) {
- DownloadPendingRequests();
- return;
- }
if (IsDocumentComplete()) {
client_->OnDocumentComplete();
return;
}
+ UpdateRendering();
+ DownloadPendingRequests();
+}
+
+void DocumentLoader::UpdateRendering() {
if (header_request_)
client_->OnPartialDocumentLoaded();
else
client_->OnPendingRequestComplete();
header_request_ = false;
-
- // The OnPendingRequestComplete could have added more requests.
- if (!pending_requests_.empty()) {
- DownloadPendingRequests();
- } else {
- // Document is not complete and we have no outstanding requests.
- // Let's keep downloading PDF file in small chunks.
- uint32_t pos = chunk_stream_.GetFirstMissingByte();
- std::vector<std::pair<size_t, size_t> > ranges;
- chunk_stream_.GetMissedRanges(pos, GetRequestSize(), &ranges);
- DCHECK(!ranges.empty());
- RequestData(ranges[0].first, ranges[0].second);
- }
-}
-
-uint32_t DocumentLoader::GetRequestSize() const {
- // Document loading strategy:
- // For first 10 requests, we use 32k chunk sizes, for the next 10 requests we
- // double the size (64k), and so on, until we cap max request size at 2M for
- // 71 or more requests.
- uint32_t limited_count = std::min(std::max(requests_count_, 10u), 70u);
- return 32 * 1024 * (1 << ((limited_count - 1) / 10u));
}
} // namespace chrome_pdf
« no previous file with comments | « pdf/document_loader.h ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698