Index: pdf/document_loader.cc |
=================================================================== |
--- pdf/document_loader.cc (revision 0) |
+++ pdf/document_loader.cc (revision 0) |
@@ -0,0 +1,515 @@ |
+// Copyright (c) 2010 The Chromium Authors. All rights reserved. |
+// Use of this source code is governed by a BSD-style license that can be |
+// found in the LICENSE file. |
+ |
+#include "pdf/document_loader.h" |
+ |
+#include "base/logging.h" |
+#include "base/strings/string_util.h" |
+#include "net/http/http_util.h" |
+#include "ppapi/c/pp_errors.h" |
+#include "ppapi/cpp/url_loader.h" |
+#include "ppapi/cpp/url_request_info.h" |
+#include "ppapi/cpp/url_response_info.h" |
+ |
+namespace chrome_pdf { |
+ |
+// Document below size will be downloaded in one chunk. |
+const uint32 kMinFileSize = 64*1024; |
+ |
+DocumentLoader::DocumentLoader(Client* client) |
+ : client_(client), partial_document_(false), request_pending_(false), |
+ current_pos_(0), current_chunk_size_(0), current_chunk_read_(0), |
+ document_size_(0), header_request_(true), is_multipart_(false) { |
+ loader_factory_.Initialize(this); |
+} |
+ |
+DocumentLoader::~DocumentLoader() { |
+} |
+ |
+bool DocumentLoader::Init(const pp::URLLoader& loader, |
+ const std::string& url, |
+ const std::string& headers) { |
+ DCHECK(url_.empty()); |
+ url_ = url; |
+ loader_ = loader; |
+ |
+ std::string response_headers; |
+ if (!headers.empty()) { |
+ response_headers = headers; |
+ } else { |
+ pp::URLResponseInfo response = loader_.GetResponseInfo(); |
+ pp::Var headers_var = response.GetHeaders(); |
+ |
+ if (headers_var.is_string()) { |
+ response_headers = headers_var.AsString(); |
+ } |
+ } |
+ |
+ bool accept_ranges_bytes = false; |
+ bool content_encoded = false; |
+ uint32 content_length = 0; |
+ std::string type; |
+ std::string disposition; |
+ if (!response_headers.empty()) { |
+ net::HttpUtil::HeadersIterator it(response_headers.begin(), |
+ response_headers.end(), "\n"); |
+ while (it.GetNext()) { |
+ if (LowerCaseEqualsASCII(it.name(), "content-length")) { |
+ content_length = atoi(it.values().c_str()); |
+ } else if (LowerCaseEqualsASCII(it.name(), "accept-ranges")) { |
+ accept_ranges_bytes = LowerCaseEqualsASCII(it.values(), "bytes"); |
+ } else if (LowerCaseEqualsASCII(it.name(), "content-encoding")) { |
+ content_encoded = true; |
+ } else if (LowerCaseEqualsASCII(it.name(), "content-type")) { |
+ type = it.values(); |
+ size_t semi_colon_pos = type.find(';'); |
+ if (semi_colon_pos != std::string::npos) { |
+ type = type.substr(0, semi_colon_pos); |
+ } |
+ TrimWhitespace(type, base::TRIM_ALL, &type); |
+ } else if (LowerCaseEqualsASCII(it.name(), "content-disposition")) { |
+ disposition = it.values(); |
+ } |
+ } |
+ } |
+ if (!type.empty() && |
+ !EndsWith(type, "/pdf", false) && |
+ !EndsWith(type, ".pdf", false) && |
+ !EndsWith(type, "/x-pdf", false) && |
+ !EndsWith(type, "/*", false) && |
+ !EndsWith(type, "/acrobat", false) && |
+ !EndsWith(type, "/unknown", false) && |
+ !StartsWithASCII(url, "blob:", false)) { |
+ return false; |
+ } |
+ if (StartsWithASCII(disposition, "attachment", false)) { |
+ return false; |
+ } |
+ |
+ if (content_length > 0) |
+ chunk_stream_.Preallocate(content_length); |
+ |
+ document_size_ = content_length; |
+ requests_count_ = 0; |
+ |
+ // Document loading strategy. |
+ // Following table shows the growth on the minimal request size depending |
+ // on the number requests that has been made already. |
+ chunk_size_table_[10] = 32*1024; |
+ chunk_size_table_[20] = 64*1024; |
+ chunk_size_table_[30] = 128*1024; |
+ chunk_size_table_[40] = 256*1024; |
+ chunk_size_table_[50] = 512*1024; |
+ chunk_size_table_[60] = 1024*1024; |
+ chunk_size_table_[70] = 2048*1024; |
+ |
+ // Enable partial loading only if file size is above the threshold. |
+ // It will allow avoiding latency for multiple requests. |
+ if (content_length > kMinFileSize && |
+ accept_ranges_bytes && |
+ !content_encoded) { |
+ LoadPartialDocument(); |
+ } else { |
+ LoadFullDocument(); |
+ } |
+ return true; |
+} |
+ |
+void DocumentLoader::LoadPartialDocument() { |
+ partial_document_ = true; |
+ // Force the main request to be cancelled, since if we're a full-frame plugin |
+ // there could be other references to the loader. |
+ loader_.Close(); |
+ loader_ = pp::URLLoader(); |
+ // Download file header. |
+ header_request_ = true; |
+ RequestData(0, std::min(GetRequestSize(), document_size_)); |
+} |
+ |
+void DocumentLoader::LoadFullDocument() { |
+ partial_document_ = false; |
+ chunk_buffer_.clear(); |
+ ReadMore(); |
+} |
+ |
+bool DocumentLoader::IsDocumentComplete() const { |
+ if (document_size_ == 0) // Document size unknown. |
+ return false; |
+ return IsDataAvailable(0, document_size_); |
+} |
+ |
+uint32 DocumentLoader::GetAvailableData() const { |
+ if (document_size_ == 0) { // If document size is unknown. |
+ return current_pos_; |
+ } |
+ |
+ std::vector<std::pair<size_t, size_t> > ranges; |
+ chunk_stream_.GetMissedRanges(0, document_size_, &ranges); |
+ uint32 available = document_size_; |
+ std::vector<std::pair<size_t, size_t> >::iterator it; |
+ for (it = ranges.begin(); it != ranges.end(); ++it) { |
+ available -= it->second; |
+ } |
+ return available; |
+} |
+ |
+void DocumentLoader::ClearPendingRequests() { |
+ // The first item in the queue is pending (need to keep it in the queue). |
+ if (pending_requests_.size() > 1) { |
+ // Remove all elements except the first one. |
+ pending_requests_.erase(++pending_requests_.begin(), |
+ pending_requests_.end()); |
+ } |
+} |
+ |
+bool DocumentLoader::GetBlock(uint32 position, uint32 size, void* buf) const { |
+ return chunk_stream_.ReadData(position, size, buf); |
+} |
+ |
+bool DocumentLoader::IsDataAvailable(uint32 position, uint32 size) const { |
+ return chunk_stream_.IsRangeAvailable(position, size); |
+} |
+ |
+void DocumentLoader::RequestData(uint32 position, uint32 size) { |
+ DCHECK(partial_document_); |
+ |
+ // We have some artefact request from |
+ // PDFiumEngine::OnDocumentComplete() -> FPDFAvail_IsPageAvail after |
+ // document is complete. |
+ // We need this fix in PDFIum. Adding this as a work around. |
+ // Bug: http://code.google.com/p/chromium/issues/detail?id=79996 |
+ // Test url: |
+ // http://www.icann.org/en/correspondence/holtzman-to-jeffrey-02mar11-en.pdf |
+ if (IsDocumentComplete()) |
+ return; |
+ |
+ pending_requests_.push_back(std::pair<size_t, size_t>(position, size)); |
+ DownloadPendingRequests(); |
+} |
+ |
+void DocumentLoader::DownloadPendingRequests() { |
+ if (request_pending_ || pending_requests_.empty()) |
+ return; |
+ |
+ // Remove already completed requests. |
+ // By design DownloadPendingRequests() should have at least 1 request in the |
+ // queue. ReadComplete() will remove the last pending comment from the queue. |
+ while (pending_requests_.size() > 1) { |
+ if (IsDataAvailable(pending_requests_.front().first, |
+ pending_requests_.front().second)) { |
+ pending_requests_.pop_front(); |
+ } else { |
+ break; |
+ } |
+ } |
+ |
+ uint32 pos = pending_requests_.front().first; |
+ uint32 size = pending_requests_.front().second; |
+ if (IsDataAvailable(pos, size)) { |
+ ReadComplete(); |
+ return; |
+ } |
+ |
+ // If current request has been partially downloaded already, split it into |
+ // a few smaller requests. |
+ std::vector<std::pair<size_t, size_t> > ranges; |
+ chunk_stream_.GetMissedRanges(pos, size, &ranges); |
+ if (ranges.size() > 0) { |
+ pending_requests_.pop_front(); |
+ pending_requests_.insert(pending_requests_.begin(), |
+ ranges.begin(), ranges.end()); |
+ pos = pending_requests_.front().first; |
+ size = pending_requests_.front().second; |
+ } |
+ |
+ uint32 cur_request_size = GetRequestSize(); |
+ // If size is less than default request, try to expand download range for |
+ // more optimal download. |
+ if (size < cur_request_size && partial_document_) { |
+ // First, try to expand block towards the end of the file. |
+ uint32 new_pos = pos; |
+ uint32 new_size = cur_request_size; |
+ if (pos + new_size > document_size_) |
+ new_size = document_size_ - pos; |
+ |
+ std::vector<std::pair<size_t, size_t> > ranges; |
+ if (chunk_stream_.GetMissedRanges(new_pos, new_size, &ranges)) { |
+ new_pos = ranges[0].first; |
+ new_size = ranges[0].second; |
+ } |
+ |
+ // Second, try to expand block towards the beginning of the file. |
+ if (new_size < cur_request_size) { |
+ uint32 block_end = new_pos + new_size; |
+ if (block_end > cur_request_size) { |
+ new_pos = block_end - cur_request_size; |
+ } else { |
+ new_pos = 0; |
+ } |
+ new_size = block_end - new_pos; |
+ |
+ if (chunk_stream_.GetMissedRanges(new_pos, new_size, &ranges)) { |
+ new_pos = ranges.back().first; |
+ new_size = ranges.back().second; |
+ } |
+ } |
+ pos = new_pos; |
+ size = new_size; |
+ } |
+ |
+ size_t last_byte_before = chunk_stream_.GetLastByteBefore(pos); |
+ size_t first_byte_after = chunk_stream_.GetFirstByteAfter(pos + size - 1); |
+ if (pos - last_byte_before < cur_request_size) { |
+ size = pos + size - last_byte_before; |
+ pos = last_byte_before; |
+ } |
+ |
+ if ((pos + size < first_byte_after) && |
+ (pos + size + cur_request_size >= first_byte_after)) |
+ size = first_byte_after - pos; |
+ |
+ request_pending_ = true; |
+ |
+ // Start downloading first pending request. |
+ loader_.Close(); |
+ loader_ = client_->CreateURLLoader(); |
+ pp::CompletionCallback callback = |
+ loader_factory_.NewCallback(&DocumentLoader::DidOpen); |
+ pp::URLRequestInfo request = GetRequest(pos, size); |
+ requests_count_++; |
+ int rv = loader_.Open(request, callback); |
+ if (rv != PP_OK_COMPLETIONPENDING) |
+ callback.Run(rv); |
+} |
+ |
+pp::URLRequestInfo DocumentLoader::GetRequest(uint32 position, |
+ uint32 size) const { |
+ pp::URLRequestInfo request(client_->GetPluginInstance()); |
+ request.SetURL(url_.c_str()); |
+ request.SetMethod("GET"); |
+ request.SetFollowRedirects(true); |
+ |
+ const size_t kBufSize = 100; |
+ char buf[kBufSize]; |
+ // According to rfc2616, byte range specifies position of the first and last |
+ // bytes in the requested range inclusively. Therefore we should subtract 1 |
+ // from the position + size, to get index of the last byte that needs to be |
+ // downloaded. |
+ base::snprintf(buf, kBufSize, "Range: bytes=%d-%d", position, |
+ position + size - 1); |
+ pp::Var header(buf); |
+ request.SetHeaders(header); |
+ |
+ return request; |
+} |
+ |
+void DocumentLoader::DidOpen(int32_t result) { |
+ if (result != PP_OK) { |
+ NOTREACHED(); |
+ return; |
+ } |
+ |
+ is_multipart_ = false; |
+ current_chunk_size_ = 0; |
+ current_chunk_read_ = 0; |
+ |
+ pp::Var headers_var = loader_.GetResponseInfo().GetHeaders(); |
+ std::string headers; |
+ if (headers_var.is_string()) |
+ headers = headers_var.AsString(); |
+ |
+ std::string boundary = GetMultiPartBoundary(headers); |
+ if (boundary.size()) { |
+ // Leave position untouched for now, when we read the data we'll get it. |
+ is_multipart_ = true; |
+ multipart_boundary_ = boundary; |
+ } else { |
+ // Need to make sure that the server returned a byte-range, since it's |
+ // possible for a server to just ignore our bye-range request and just |
+ // return the entire document even if it supports byte-range requests. |
+ // i.e. sniff response to |
+ // http://www.act.org/compass/sample/pdf/geometry.pdf |
+ current_pos_ = 0; |
+ uint32 start_pos, end_pos; |
+ if (GetByteRange(headers, &start_pos, &end_pos)) { |
+ current_pos_ = start_pos; |
+ if (end_pos && end_pos > start_pos) |
+ current_chunk_size_ = end_pos - start_pos + 1; |
+ } |
+ } |
+ |
+ ReadMore(); |
+} |
+ |
+bool DocumentLoader::GetByteRange(const std::string& headers, uint32* start, |
+ uint32* end) { |
+ net::HttpUtil::HeadersIterator it(headers.begin(), headers.end(), "\n"); |
+ while (it.GetNext()) { |
+ if (LowerCaseEqualsASCII(it.name(), "content-range")) { |
+ std::string range = it.values().c_str(); |
+ if (StartsWithASCII(range, "bytes", false)) { |
+ range = range.substr(strlen("bytes")); |
+ std::string::size_type pos = range.find('-'); |
+ std::string range_end; |
+ if (pos != std::string::npos) |
+ range_end = range.substr(pos + 1); |
+ TrimWhitespaceASCII(range, base::TRIM_LEADING, &range); |
+ TrimWhitespaceASCII(range_end, base::TRIM_LEADING, &range_end); |
+ *start = atoi(range.c_str()); |
+ *end = atoi(range_end.c_str()); |
+ return true; |
+ } |
+ } |
+ } |
+ return false; |
+} |
+ |
+std::string DocumentLoader::GetMultiPartBoundary(const std::string& headers) { |
+ net::HttpUtil::HeadersIterator it(headers.begin(), headers.end(), "\n"); |
+ while (it.GetNext()) { |
+ if (LowerCaseEqualsASCII(it.name(), "content-type")) { |
+ std::string type = StringToLowerASCII(it.values()); |
+ if (StartsWithASCII(type, "multipart/", true)) { |
+ const char* boundary = strstr(type.c_str(), "boundary="); |
+ if (!boundary) { |
+ NOTREACHED(); |
+ break; |
+ } |
+ |
+ return std::string(boundary + 9); |
+ } |
+ } |
+ } |
+ return std::string(); |
+} |
+ |
+void DocumentLoader::ReadMore() { |
+ pp::CompletionCallback callback = |
+ loader_factory_.NewCallback(&DocumentLoader::DidRead); |
+ int rv = loader_.ReadResponseBody(buffer_, sizeof(buffer_), callback); |
+ if (rv != PP_OK_COMPLETIONPENDING) |
+ callback.Run(rv); |
+} |
+ |
+void DocumentLoader::DidRead(int32_t result) { |
+ if (result > 0) { |
+ char* start = buffer_; |
+ size_t length = result; |
+ if (is_multipart_ && result > 2) { |
+ for (int i = 2; i < result; ++i) { |
+ if ((buffer_[i - 1] == '\n' && buffer_[i - 2] == '\n') || |
+ (i >= 4 && |
+ buffer_[i - 1] == '\n' && buffer_[i - 2] == '\r' && |
+ buffer_[i - 3] == '\n' && buffer_[i - 4] == '\r')) { |
+ uint32 start_pos, end_pos; |
+ if (GetByteRange(std::string(buffer_, i), &start_pos, &end_pos)) { |
+ current_pos_ = start_pos; |
+ start += i; |
+ length -= i; |
+ if (end_pos && end_pos > start_pos) |
+ current_chunk_size_ = end_pos - start_pos + 1; |
+ } |
+ break; |
+ } |
+ } |
+ |
+ // Reset this flag so we don't look inside the buffer in future calls of |
+ // DidRead for this response. Note that this code DOES NOT handle multi- |
+ // part responses with more than one part (we don't issue them at the |
+ // moment, so they shouldn't arrive). |
+ is_multipart_ = false; |
+ } |
+ |
+ if (current_chunk_size_ && |
+ current_chunk_read_ + length > current_chunk_size_) |
+ length = current_chunk_size_ - current_chunk_read_; |
+ |
+ if (length) { |
+ if (document_size_ > 0) { |
+ chunk_stream_.WriteData(current_pos_, start, length); |
+ } else { |
+ // If we did not get content-length in the response, we can't |
+ // preallocate buffer for the entire document. Resizing array causing |
+ // memory fragmentation issues on the large files and OOM exceptions. |
+ // To fix this, we collect all chunks of the file to the list and |
+ // concatenate them together after request is complete. |
+ chunk_buffer_.push_back(std::vector<unsigned char>()); |
+ chunk_buffer_.back().resize(length); |
+ memcpy(&(chunk_buffer_.back()[0]), start, length); |
+ } |
+ current_pos_ += length; |
+ current_chunk_read_ += length; |
+ client_->OnNewDataAvailable(); |
+ } |
+ ReadMore(); |
+ } else if (result == PP_OK) { |
+ ReadComplete(); |
+ } else { |
+ NOTREACHED(); |
+ } |
+} |
+ |
+void DocumentLoader::ReadComplete() { |
+ if (!partial_document_) { |
+ if (document_size_ == 0) { |
+ // For the document with no 'content-length" specified we've collected all |
+ // the chunks already. Let's allocate final document buffer and copy them |
+ // over. |
+ chunk_stream_.Preallocate(current_pos_); |
+ uint32 pos = 0; |
+ std::list<std::vector<unsigned char> >::iterator it; |
+ for (it = chunk_buffer_.begin(); it != chunk_buffer_.end(); ++it) { |
+ chunk_stream_.WriteData(pos, &((*it)[0]), it->size()); |
+ pos += it->size(); |
+ } |
+ chunk_buffer_.clear(); |
+ } |
+ document_size_ = current_pos_; |
+ client_->OnDocumentComplete(); |
+ return; |
+ } |
+ |
+ request_pending_ = false; |
+ pending_requests_.pop_front(); |
+ |
+ // If there are more pending request - continue downloading. |
+ if (!pending_requests_.empty()) { |
+ DownloadPendingRequests(); |
+ return; |
+ } |
+ |
+ if (IsDocumentComplete()) { |
+ client_->OnDocumentComplete(); |
+ return; |
+ } |
+ |
+ if (header_request_) |
+ client_->OnPartialDocumentLoaded(); |
+ else |
+ client_->OnPendingRequestComplete(); |
+ header_request_ = false; |
+ |
+ // The OnPendingRequestComplete could have added more requests. |
+ if (!pending_requests_.empty()) { |
+ DownloadPendingRequests(); |
+ } else { |
+ // Document is not complete and we have no outstanding requests. |
+ // Let's keep downloading PDF file in small chunks. |
+ uint32 pos = chunk_stream_.GetFirstMissingByte(); |
+ std::vector<std::pair<size_t, size_t> > ranges; |
+ chunk_stream_.GetMissedRanges(pos, GetRequestSize(), &ranges); |
+ DCHECK(ranges.size() > 0); |
+ RequestData(ranges[0].first, ranges[0].second); |
+ } |
+} |
+ |
+uint32 DocumentLoader::GetRequestSize() const { |
+ std::map<uint32, uint32>::const_iterator iter = |
+ chunk_size_table_.lower_bound(requests_count_); |
+ if (iter == chunk_size_table_.end()) |
+ iter--; |
+ return iter->second; |
+} |
+ |
+} // namespace chrome_pdf |
Property changes on: pdf\document_loader.cc |
___________________________________________________________________ |
Added: svn:eol-style |
+ LF |