pdf/document_loader.cc - Issue 294793003: Add the pdf plugin's source in src\pdf.

Unified Diff: pdf/document_loader.cc

Issue 294793003: Add the pdf plugin's source in src\pdf. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src/

Patch Set: review comments and sync past DEPS roll to fix gyp Created 6 years, 7 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: pdf/document_loader.cc

===================================================================

--- pdf/document_loader.cc (revision 0)

+++ pdf/document_loader.cc (revision 0)

@@ -0,0 +1,515 @@

+// Use of this source code is governed by a BSD-style license that can be

+// found in the LICENSE file.

+#include "pdf/document_loader.h"

+#include "base/logging.h"

+#include "base/strings/string_util.h"

+#include "net/http/http_util.h"

+#include "ppapi/c/pp_errors.h"

+#include "ppapi/cpp/url_loader.h"

+#include "ppapi/cpp/url_request_info.h"

+#include "ppapi/cpp/url_response_info.h"

+namespace chrome_pdf {

+// Document below size will be downloaded in one chunk.

+const uint32 kMinFileSize = 64*1024;

+DocumentLoader::DocumentLoader(Client* client)

+ : client_(client), partial_document_(false), request_pending_(false),

+ current_pos_(0), current_chunk_size_(0), current_chunk_read_(0),

+ document_size_(0), header_request_(true), is_multipart_(false) {

+ loader_factory_.Initialize(this);

+DocumentLoader::~DocumentLoader() {

+bool DocumentLoader::Init(const pp::URLLoader& loader,

+ const std::string& url,

+ const std::string& headers) {

+ DCHECK(url_.empty());

+ url_ = url;

+ loader_ = loader;

+ std::string response_headers;

+ if (!headers.empty()) {

+ response_headers = headers;

+ } else {

+ pp::URLResponseInfo response = loader_.GetResponseInfo();

+ pp::Var headers_var = response.GetHeaders();

+ if (headers_var.is_string()) {

+ response_headers = headers_var.AsString();

+ }

+ bool accept_ranges_bytes = false;

+ bool content_encoded = false;

+ uint32 content_length = 0;

+ std::string type;

+ std::string disposition;

+ if (!response_headers.empty()) {

+ net::HttpUtil::HeadersIterator it(response_headers.begin(),

+ response_headers.end(), "\n");

+ while (it.GetNext()) {

+ if (LowerCaseEqualsASCII(it.name(), "content-length")) {

+ content_length = atoi(it.values().c_str());

+ } else if (LowerCaseEqualsASCII(it.name(), "accept-ranges")) {

+ accept_ranges_bytes = LowerCaseEqualsASCII(it.values(), "bytes");

+ } else if (LowerCaseEqualsASCII(it.name(), "content-encoding")) {

+ content_encoded = true;

+ } else if (LowerCaseEqualsASCII(it.name(), "content-type")) {

+ type = it.values();

+ size_t semi_colon_pos = type.find(';');

+ if (semi_colon_pos != std::string::npos) {

+ type = type.substr(0, semi_colon_pos);

+ }

+ TrimWhitespace(type, base::TRIM_ALL, &type);

+ } else if (LowerCaseEqualsASCII(it.name(), "content-disposition")) {

+ disposition = it.values();

+ }

+ if (!type.empty() &&

+ !EndsWith(type, "/pdf", false) &&

+ !EndsWith(type, ".pdf", false) &&

+ !EndsWith(type, "/x-pdf", false) &&

+ !EndsWith(type, "/*", false) &&

+ !EndsWith(type, "/acrobat", false) &&

+ !EndsWith(type, "/unknown", false) &&

+ !StartsWithASCII(url, "blob:", false)) {

+ return false;

+ }

+ if (StartsWithASCII(disposition, "attachment", false)) {

+ return false;

+ }

+ if (content_length > 0)

+ chunk_stream_.Preallocate(content_length);

+ document_size_ = content_length;

+ requests_count_ = 0;

+ // Document loading strategy.

+ // Following table shows the growth on the minimal request size depending

+ // on the number requests that has been made already.

+ chunk_size_table_[10] = 32*1024;

+ chunk_size_table_[20] = 64*1024;

+ chunk_size_table_[30] = 128*1024;

+ chunk_size_table_[40] = 256*1024;

+ chunk_size_table_[50] = 512*1024;

+ chunk_size_table_[60] = 1024*1024;

+ chunk_size_table_[70] = 2048*1024;

+ // Enable partial loading only if file size is above the threshold.

+ // It will allow avoiding latency for multiple requests.

+ if (content_length > kMinFileSize &&

+ accept_ranges_bytes &&

+ !content_encoded) {

+ LoadPartialDocument();

+ } else {

+ LoadFullDocument();

+ }

+ return true;

+void DocumentLoader::LoadPartialDocument() {

+ partial_document_ = true;

+ // Force the main request to be cancelled, since if we're a full-frame plugin

+ // there could be other references to the loader.

+ loader_.Close();

+ loader_ = pp::URLLoader();

+ // Download file header.

+ header_request_ = true;

+ RequestData(0, std::min(GetRequestSize(), document_size_));

+void DocumentLoader::LoadFullDocument() {

+ partial_document_ = false;

+ chunk_buffer_.clear();

+ ReadMore();

+bool DocumentLoader::IsDocumentComplete() const {

+ if (document_size_ == 0) // Document size unknown.

+ return false;

+ return IsDataAvailable(0, document_size_);

+uint32 DocumentLoader::GetAvailableData() const {

+ if (document_size_ == 0) { // If document size is unknown.

+ return current_pos_;

+ }

+ std::vector<std::pair<size_t, size_t> > ranges;

+ chunk_stream_.GetMissedRanges(0, document_size_, &ranges);

+ uint32 available = document_size_;

+ std::vector<std::pair<size_t, size_t> >::iterator it;

+ for (it = ranges.begin(); it != ranges.end(); ++it) {

+ available -= it->second;

+ }

+ return available;

+void DocumentLoader::ClearPendingRequests() {

+ // The first item in the queue is pending (need to keep it in the queue).

+ if (pending_requests_.size() > 1) {

+ // Remove all elements except the first one.

+ pending_requests_.erase(++pending_requests_.begin(),

+ pending_requests_.end());

+ }

+bool DocumentLoader::GetBlock(uint32 position, uint32 size, void* buf) const {

+ return chunk_stream_.ReadData(position, size, buf);

+bool DocumentLoader::IsDataAvailable(uint32 position, uint32 size) const {

+ return chunk_stream_.IsRangeAvailable(position, size);

+void DocumentLoader::RequestData(uint32 position, uint32 size) {

+ DCHECK(partial_document_);

+ // We have some artefact request from

+ // PDFiumEngine::OnDocumentComplete() -> FPDFAvail_IsPageAvail after

+ // document is complete.

+ // We need this fix in PDFIum. Adding this as a work around.

+ // Bug: http://code.google.com/p/chromium/issues/detail?id=79996

+ // Test url:

+ // http://www.icann.org/en/correspondence/holtzman-to-jeffrey-02mar11-en.pdf

+ if (IsDocumentComplete())

+ return;

+ pending_requests_.push_back(std::pair<size_t, size_t>(position, size));

+ DownloadPendingRequests();

+void DocumentLoader::DownloadPendingRequests() {

+ if (request_pending_ || pending_requests_.empty())

+ return;

+ // Remove already completed requests.

+ // By design DownloadPendingRequests() should have at least 1 request in the

+ // queue. ReadComplete() will remove the last pending comment from the queue.

+ while (pending_requests_.size() > 1) {

+ if (IsDataAvailable(pending_requests_.front().first,

+ pending_requests_.front().second)) {

+ pending_requests_.pop_front();

+ } else {

+ break;

+ }

+ uint32 pos = pending_requests_.front().first;

+ uint32 size = pending_requests_.front().second;

+ if (IsDataAvailable(pos, size)) {

+ ReadComplete();

+ return;

+ }

+ // If current request has been partially downloaded already, split it into

+ // a few smaller requests.

+ std::vector<std::pair<size_t, size_t> > ranges;

+ chunk_stream_.GetMissedRanges(pos, size, &ranges);

+ if (ranges.size() > 0) {

+ pending_requests_.pop_front();

+ pending_requests_.insert(pending_requests_.begin(),

+ ranges.begin(), ranges.end());

+ pos = pending_requests_.front().first;

+ size = pending_requests_.front().second;

+ }

+ uint32 cur_request_size = GetRequestSize();

+ // If size is less than default request, try to expand download range for

+ // more optimal download.

+ if (size < cur_request_size && partial_document_) {

+ // First, try to expand block towards the end of the file.

+ uint32 new_pos = pos;

+ uint32 new_size = cur_request_size;

+ if (pos + new_size > document_size_)

+ new_size = document_size_ - pos;

+ std::vector<std::pair<size_t, size_t> > ranges;

+ if (chunk_stream_.GetMissedRanges(new_pos, new_size, &ranges)) {

+ new_pos = ranges[0].first;

+ new_size = ranges[0].second;

+ }

+ // Second, try to expand block towards the beginning of the file.

+ if (new_size < cur_request_size) {

+ uint32 block_end = new_pos + new_size;

+ if (block_end > cur_request_size) {

+ new_pos = block_end - cur_request_size;

+ } else {

+ new_pos = 0;

+ }

+ new_size = block_end - new_pos;

+ if (chunk_stream_.GetMissedRanges(new_pos, new_size, &ranges)) {

+ new_pos = ranges.back().first;

+ new_size = ranges.back().second;

+ }

+ pos = new_pos;

+ size = new_size;

+ }

+ size_t last_byte_before = chunk_stream_.GetLastByteBefore(pos);

+ size_t first_byte_after = chunk_stream_.GetFirstByteAfter(pos + size - 1);

+ if (pos - last_byte_before < cur_request_size) {

+ size = pos + size - last_byte_before;

+ pos = last_byte_before;

+ }

+ if ((pos + size < first_byte_after) &&

+ (pos + size + cur_request_size >= first_byte_after))

+ size = first_byte_after - pos;

+ request_pending_ = true;

+ // Start downloading first pending request.

+ loader_.Close();

+ loader_ = client_->CreateURLLoader();

+ pp::CompletionCallback callback =

+ loader_factory_.NewCallback(&DocumentLoader::DidOpen);

+ pp::URLRequestInfo request = GetRequest(pos, size);

+ requests_count_++;

+ int rv = loader_.Open(request, callback);

+ if (rv != PP_OK_COMPLETIONPENDING)

+ callback.Run(rv);

+pp::URLRequestInfo DocumentLoader::GetRequest(uint32 position,

+ uint32 size) const {

+ pp::URLRequestInfo request(client_->GetPluginInstance());

+ request.SetURL(url_.c_str());

+ request.SetMethod("GET");

+ request.SetFollowRedirects(true);

+ const size_t kBufSize = 100;

+ char buf[kBufSize];

+ // According to rfc2616, byte range specifies position of the first and last

+ // bytes in the requested range inclusively. Therefore we should subtract 1

+ // from the position + size, to get index of the last byte that needs to be

+ // downloaded.

+ base::snprintf(buf, kBufSize, "Range: bytes=%d-%d", position,

+ position + size - 1);

+ pp::Var header(buf);

+ request.SetHeaders(header);

+ return request;

+void DocumentLoader::DidOpen(int32_t result) {

+ if (result != PP_OK) {

+ NOTREACHED();

+ return;

+ }

+ is_multipart_ = false;

+ current_chunk_size_ = 0;

+ current_chunk_read_ = 0;

+ pp::Var headers_var = loader_.GetResponseInfo().GetHeaders();

+ std::string headers;

+ if (headers_var.is_string())

+ headers = headers_var.AsString();

+ std::string boundary = GetMultiPartBoundary(headers);

+ if (boundary.size()) {

+ // Leave position untouched for now, when we read the data we'll get it.

+ is_multipart_ = true;

+ multipart_boundary_ = boundary;

+ } else {

+ // Need to make sure that the server returned a byte-range, since it's

+ // possible for a server to just ignore our bye-range request and just

+ // return the entire document even if it supports byte-range requests.

+ // i.e. sniff response to

+ // http://www.act.org/compass/sample/pdf/geometry.pdf

+ current_pos_ = 0;

+ uint32 start_pos, end_pos;

+ if (GetByteRange(headers, &start_pos, &end_pos)) {

+ current_pos_ = start_pos;

+ if (end_pos && end_pos > start_pos)

+ current_chunk_size_ = end_pos - start_pos + 1;

+ }

+ ReadMore();

+bool DocumentLoader::GetByteRange(const std::string& headers, uint32* start,

+ uint32* end) {

+ net::HttpUtil::HeadersIterator it(headers.begin(), headers.end(), "\n");

+ while (it.GetNext()) {

+ if (LowerCaseEqualsASCII(it.name(), "content-range")) {

+ std::string range = it.values().c_str();

+ if (StartsWithASCII(range, "bytes", false)) {

+ range = range.substr(strlen("bytes"));

+ std::string::size_type pos = range.find('-');

+ std::string range_end;

+ if (pos != std::string::npos)

+ range_end = range.substr(pos + 1);

+ TrimWhitespaceASCII(range, base::TRIM_LEADING, &range);

+ TrimWhitespaceASCII(range_end, base::TRIM_LEADING, &range_end);

+ *start = atoi(range.c_str());

+ *end = atoi(range_end.c_str());

+ return true;

+ }

+ return false;

+std::string DocumentLoader::GetMultiPartBoundary(const std::string& headers) {

+ net::HttpUtil::HeadersIterator it(headers.begin(), headers.end(), "\n");

+ while (it.GetNext()) {

+ if (LowerCaseEqualsASCII(it.name(), "content-type")) {

+ std::string type = StringToLowerASCII(it.values());

+ if (StartsWithASCII(type, "multipart/", true)) {

+ const char* boundary = strstr(type.c_str(), "boundary=");

+ if (!boundary) {

+ NOTREACHED();

+ break;

+ }

+ return std::string(boundary + 9);

+ }

+ return std::string();

+void DocumentLoader::ReadMore() {

+ pp::CompletionCallback callback =

+ loader_factory_.NewCallback(&DocumentLoader::DidRead);

+ int rv = loader_.ReadResponseBody(buffer_, sizeof(buffer_), callback);

+ if (rv != PP_OK_COMPLETIONPENDING)

+ callback.Run(rv);

+void DocumentLoader::DidRead(int32_t result) {

+ if (result > 0) {

+ char* start = buffer_;

+ size_t length = result;

+ if (is_multipart_ && result > 2) {

+ for (int i = 2; i < result; ++i) {

+ if ((buffer_[i - 1] == '\n' && buffer_[i - 2] == '\n') ||

+ (i >= 4 &&

+ buffer_[i - 1] == '\n' && buffer_[i - 2] == '\r' &&

+ buffer_[i - 3] == '\n' && buffer_[i - 4] == '\r')) {

+ uint32 start_pos, end_pos;

+ if (GetByteRange(std::string(buffer_, i), &start_pos, &end_pos)) {

+ current_pos_ = start_pos;

+ start += i;

+ length -= i;

+ if (end_pos && end_pos > start_pos)

+ current_chunk_size_ = end_pos - start_pos + 1;

+ }

+ break;

+ }

+ // Reset this flag so we don't look inside the buffer in future calls of

+ // DidRead for this response. Note that this code DOES NOT handle multi-

+ // part responses with more than one part (we don't issue them at the

+ // moment, so they shouldn't arrive).

+ is_multipart_ = false;

+ }

+ if (current_chunk_size_ &&

+ current_chunk_read_ + length > current_chunk_size_)

+ length = current_chunk_size_ - current_chunk_read_;

+ if (length) {

+ if (document_size_ > 0) {

+ chunk_stream_.WriteData(current_pos_, start, length);

+ } else {

+ // If we did not get content-length in the response, we can't

+ // preallocate buffer for the entire document. Resizing array causing

+ // memory fragmentation issues on the large files and OOM exceptions.

+ // To fix this, we collect all chunks of the file to the list and

+ // concatenate them together after request is complete.

+ chunk_buffer_.push_back(std::vector<unsigned char>());

+ chunk_buffer_.back().resize(length);

+ memcpy(&(chunk_buffer_.back()[0]), start, length);

+ }

+ current_pos_ += length;

+ current_chunk_read_ += length;

+ client_->OnNewDataAvailable();

+ }

+ ReadMore();

+ } else if (result == PP_OK) {

+ ReadComplete();

+ } else {

+ NOTREACHED();

+ }

+void DocumentLoader::ReadComplete() {

+ if (!partial_document_) {

+ if (document_size_ == 0) {

+ // For the document with no 'content-length" specified we've collected all

+ // the chunks already. Let's allocate final document buffer and copy them

+ // over.

+ chunk_stream_.Preallocate(current_pos_);

+ uint32 pos = 0;

+ std::list<std::vector<unsigned char> >::iterator it;

+ for (it = chunk_buffer_.begin(); it != chunk_buffer_.end(); ++it) {

+ chunk_stream_.WriteData(pos, &((*it)[0]), it->size());

+ pos += it->size();

+ }

+ chunk_buffer_.clear();

+ }

+ document_size_ = current_pos_;

+ client_->OnDocumentComplete();

+ return;

+ }

+ request_pending_ = false;

+ pending_requests_.pop_front();

+ // If there are more pending request - continue downloading.

+ if (!pending_requests_.empty()) {

+ DownloadPendingRequests();

+ return;

+ }

+ if (IsDocumentComplete()) {

+ client_->OnDocumentComplete();

+ return;

+ }

+ if (header_request_)

+ client_->OnPartialDocumentLoaded();

+ else

+ client_->OnPendingRequestComplete();

+ header_request_ = false;

+ // The OnPendingRequestComplete could have added more requests.

+ if (!pending_requests_.empty()) {

+ DownloadPendingRequests();

+ } else {

+ // Document is not complete and we have no outstanding requests.

+ // Let's keep downloading PDF file in small chunks.

+ uint32 pos = chunk_stream_.GetFirstMissingByte();

+ std::vector<std::pair<size_t, size_t> > ranges;

+ chunk_stream_.GetMissedRanges(pos, GetRequestSize(), &ranges);

+ DCHECK(ranges.size() > 0);

+ RequestData(ranges[0].first, ranges[0].second);

+ }

+uint32 DocumentLoader::GetRequestSize() const {

+ std::map<uint32, uint32>::const_iterator iter =

+ chunk_size_table_.lower_bound(requests_count_);

+ if (iter == chunk_size_table_.end())

+ iter--;

+ return iter->second;

+} // namespace chrome_pdf

Property changes on: pdf\document_loader.cc

___________________________________________________________________

Added: svn:eol-style

+ LF

« no previous file with comments | « pdf/document_loader.h ('k') | pdf/draw_utils.h » ('j') | pdf/pdf.h » ('J')