pdf/document_loader.cc - Issue 294793003: Add the pdf plugin's source in src\pdf.

Side by Side Diff: pdf/document_loader.cc

Issue 294793003: Add the pdf plugin's source in src\pdf. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src/

Patch Set: review comments and sync past DEPS roll to fix gyp Created 6 years, 7 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

Property Changes:

Added: svn:eol-style
+ LF

OLD	NEW
(Empty)
	1 // Copyright (c) 2010 The Chromium Authors. All rights reserved.

	2 // Use of this source code is governed by a BSD-style license that can be

	3 // found in the LICENSE file.

	4

	5 #include "pdf/document_loader.h"

	6

	7 #include "base/logging.h"

	8 #include "base/strings/string_util.h"

	9 #include "net/http/http_util.h"

	10 #include "ppapi/c/pp_errors.h"

	11 #include "ppapi/cpp/url_loader.h"

	12 #include "ppapi/cpp/url_request_info.h"

	13 #include "ppapi/cpp/url_response_info.h"

	14

	15 namespace chrome_pdf {

	16

	17 // Document below size will be downloaded in one chunk.

	18 const uint32 kMinFileSize = 64*1024;

	19

	20 DocumentLoader::DocumentLoader(Client* client)

	21 : client_(client), partial_document_(false), request_pending_(false),

	22 current_pos_(0), current_chunk_size_(0), current_chunk_read_(0),

	23 document_size_(0), header_request_(true), is_multipart_(false) {

	24 loader_factory_.Initialize(this);

	25 }

	26

	27 DocumentLoader::~DocumentLoader() {

	28 }

	29

	30 bool DocumentLoader::Init(const pp::URLLoader& loader,

	31 const std::string& url,

	32 const std::string& headers) {

	33 DCHECK(url_.empty());

	34 url_ = url;

	35 loader_ = loader;

	36

	37 std::string response_headers;

	38 if (!headers.empty()) {

	39 response_headers = headers;

	40 } else {

	41 pp::URLResponseInfo response = loader_.GetResponseInfo();

	42 pp::Var headers_var = response.GetHeaders();

	43

	44 if (headers_var.is_string()) {

	45 response_headers = headers_var.AsString();

	46 }

	47 }

	48

	49 bool accept_ranges_bytes = false;

	50 bool content_encoded = false;

	51 uint32 content_length = 0;

	52 std::string type;

	53 std::string disposition;

	54 if (!response_headers.empty()) {

	55 net::HttpUtil::HeadersIterator it(response_headers.begin(),

	56 response_headers.end(), "\n");

	57 while (it.GetNext()) {

	58 if (LowerCaseEqualsASCII(it.name(), "content-length")) {

	59 content_length = atoi(it.values().c_str());

	60 } else if (LowerCaseEqualsASCII(it.name(), "accept-ranges")) {

	61 accept_ranges_bytes = LowerCaseEqualsASCII(it.values(), "bytes");

	62 } else if (LowerCaseEqualsASCII(it.name(), "content-encoding")) {

	63 content_encoded = true;

	64 } else if (LowerCaseEqualsASCII(it.name(), "content-type")) {

	65 type = it.values();

	66 size_t semi_colon_pos = type.find(';');

	67 if (semi_colon_pos != std::string::npos) {

	68 type = type.substr(0, semi_colon_pos);

	69 }

	70 TrimWhitespace(type, base::TRIM_ALL, &type);

	71 } else if (LowerCaseEqualsASCII(it.name(), "content-disposition")) {

	72 disposition = it.values();

	73 }

	74 }

	75 }

	76 if (!type.empty() &&

	77 !EndsWith(type, "/pdf", false) &&

	78 !EndsWith(type, ".pdf", false) &&

	79 !EndsWith(type, "/x-pdf", false) &&

	80 !EndsWith(type, "/*", false) &&

	81 !EndsWith(type, "/acrobat", false) &&

	82 !EndsWith(type, "/unknown", false) &&

	83 !StartsWithASCII(url, "blob:", false)) {

	84 return false;

	85 }

	86 if (StartsWithASCII(disposition, "attachment", false)) {

	87 return false;

	88 }

	89

	90 if (content_length > 0)

	91 chunk_stream_.Preallocate(content_length);

	92

	93 document_size_ = content_length;

	94 requests_count_ = 0;

	95

	96 // Document loading strategy.

	97 // Following table shows the growth on the minimal request size depending

	98 // on the number requests that has been made already.

	99 chunk_size_table_[10] = 32*1024;

	100 chunk_size_table_[20] = 64*1024;

	101 chunk_size_table_[30] = 128*1024;

	102 chunk_size_table_[40] = 256*1024;

	103 chunk_size_table_[50] = 512*1024;

	104 chunk_size_table_[60] = 1024*1024;

	105 chunk_size_table_[70] = 2048*1024;

	106

	107 // Enable partial loading only if file size is above the threshold.

	108 // It will allow avoiding latency for multiple requests.

	109 if (content_length > kMinFileSize &&

	110 accept_ranges_bytes &&

	111 !content_encoded) {

	112 LoadPartialDocument();

	113 } else {

	114 LoadFullDocument();

	115 }

	116 return true;

	117 }

	118

	119 void DocumentLoader::LoadPartialDocument() {

	120 partial_document_ = true;

	121 // Force the main request to be cancelled, since if we're a full-frame plugin

	122 // there could be other references to the loader.

	123 loader_.Close();

	124 loader_ = pp::URLLoader();

	125 // Download file header.

	126 header_request_ = true;

	127 RequestData(0, std::min(GetRequestSize(), document_size_));

	128 }

	129

	130 void DocumentLoader::LoadFullDocument() {

	131 partial_document_ = false;

	132 chunk_buffer_.clear();

	133 ReadMore();

	134 }

	135

	136 bool DocumentLoader::IsDocumentComplete() const {

	137 if (document_size_ == 0) // Document size unknown.

	138 return false;

	139 return IsDataAvailable(0, document_size_);

	140 }

	141

	142 uint32 DocumentLoader::GetAvailableData() const {

	143 if (document_size_ == 0) { // If document size is unknown.

	144 return current_pos_;

	145 }

	146

	147 std::vector<std::pair<size_t, size_t> > ranges;

	148 chunk_stream_.GetMissedRanges(0, document_size_, &ranges);

	149 uint32 available = document_size_;

	150 std::vector<std::pair<size_t, size_t> >::iterator it;

	151 for (it = ranges.begin(); it != ranges.end(); ++it) {

	152 available -= it->second;

	153 }

	154 return available;

	155 }

	156

	157 void DocumentLoader::ClearPendingRequests() {

	158 // The first item in the queue is pending (need to keep it in the queue).

	159 if (pending_requests_.size() > 1) {

	160 // Remove all elements except the first one.

	161 pending_requests_.erase(++pending_requests_.begin(),

	162 pending_requests_.end());

	163 }

	164 }

	165

	166 bool DocumentLoader::GetBlock(uint32 position, uint32 size, void* buf) const {

	167 return chunk_stream_.ReadData(position, size, buf);

	168 }

	169

	170 bool DocumentLoader::IsDataAvailable(uint32 position, uint32 size) const {

	171 return chunk_stream_.IsRangeAvailable(position, size);

	172 }

	173

	174 void DocumentLoader::RequestData(uint32 position, uint32 size) {

	175 DCHECK(partial_document_);

	176

	177 // We have some artefact request from

	178 // PDFiumEngine::OnDocumentComplete() -> FPDFAvail_IsPageAvail after

	179 // document is complete.

	180 // We need this fix in PDFIum. Adding this as a work around.

	181 // Bug: http://code.google.com/p/chromium/issues/detail?id=79996

	182 // Test url:

	183 // http://www.icann.org/en/correspondence/holtzman-to-jeffrey-02mar11-en.pdf

	184 if (IsDocumentComplete())

	185 return;

	186

	187 pending_requests_.push_back(std::pair<size_t, size_t>(position, size));

	188 DownloadPendingRequests();

	189 }

	190

	191 void DocumentLoader::DownloadPendingRequests() {

	192 if (request_pending_ \|\| pending_requests_.empty())

	193 return;

	194

	195 // Remove already completed requests.

	196 // By design DownloadPendingRequests() should have at least 1 request in the

	197 // queue. ReadComplete() will remove the last pending comment from the queue.

	198 while (pending_requests_.size() > 1) {

	199 if (IsDataAvailable(pending_requests_.front().first,

	200 pending_requests_.front().second)) {

	201 pending_requests_.pop_front();

	202 } else {

	203 break;

	204 }

	205 }

	206

	207 uint32 pos = pending_requests_.front().first;

	208 uint32 size = pending_requests_.front().second;

	209 if (IsDataAvailable(pos, size)) {

	210 ReadComplete();

	211 return;

	212 }

	213

	214 // If current request has been partially downloaded already, split it into

	215 // a few smaller requests.

	216 std::vector<std::pair<size_t, size_t> > ranges;

	217 chunk_stream_.GetMissedRanges(pos, size, &ranges);

	218 if (ranges.size() > 0) {

	219 pending_requests_.pop_front();

	220 pending_requests_.insert(pending_requests_.begin(),

	221 ranges.begin(), ranges.end());

	222 pos = pending_requests_.front().first;

	223 size = pending_requests_.front().second;

	224 }

	225

	226 uint32 cur_request_size = GetRequestSize();

	227 // If size is less than default request, try to expand download range for

	228 // more optimal download.

	229 if (size < cur_request_size && partial_document_) {

	230 // First, try to expand block towards the end of the file.

	231 uint32 new_pos = pos;

	232 uint32 new_size = cur_request_size;

	233 if (pos + new_size > document_size_)

	234 new_size = document_size_ - pos;

	235

	236 std::vector<std::pair<size_t, size_t> > ranges;

	237 if (chunk_stream_.GetMissedRanges(new_pos, new_size, &ranges)) {

	238 new_pos = ranges[0].first;

	239 new_size = ranges[0].second;

	240 }

	241

	242 // Second, try to expand block towards the beginning of the file.

	243 if (new_size < cur_request_size) {

	244 uint32 block_end = new_pos + new_size;

	245 if (block_end > cur_request_size) {

	246 new_pos = block_end - cur_request_size;

	247 } else {

	248 new_pos = 0;

	249 }

	250 new_size = block_end - new_pos;

	251

	252 if (chunk_stream_.GetMissedRanges(new_pos, new_size, &ranges)) {

	253 new_pos = ranges.back().first;

	254 new_size = ranges.back().second;

	255 }

	256 }

	257 pos = new_pos;

	258 size = new_size;

	259 }

	260

	261 size_t last_byte_before = chunk_stream_.GetLastByteBefore(pos);

	262 size_t first_byte_after = chunk_stream_.GetFirstByteAfter(pos + size - 1);

	263 if (pos - last_byte_before < cur_request_size) {

	264 size = pos + size - last_byte_before;

	265 pos = last_byte_before;

	266 }

	267

	268 if ((pos + size < first_byte_after) &&

	269 (pos + size + cur_request_size >= first_byte_after))

	270 size = first_byte_after - pos;

	271

	272 request_pending_ = true;

	273

	274 // Start downloading first pending request.

	275 loader_.Close();

	276 loader_ = client_->CreateURLLoader();

	277 pp::CompletionCallback callback =

	278 loader_factory_.NewCallback(&DocumentLoader::DidOpen);

	279 pp::URLRequestInfo request = GetRequest(pos, size);

	280 requests_count_++;

	281 int rv = loader_.Open(request, callback);

	282 if (rv != PP_OK_COMPLETIONPENDING)

	283 callback.Run(rv);

	284 }

	285

	286 pp::URLRequestInfo DocumentLoader::GetRequest(uint32 position,

	287 uint32 size) const {

	288 pp::URLRequestInfo request(client_->GetPluginInstance());

	289 request.SetURL(url_.c_str());

	290 request.SetMethod("GET");

	291 request.SetFollowRedirects(true);

	292

	293 const size_t kBufSize = 100;

	294 char buf[kBufSize];

	295 // According to rfc2616, byte range specifies position of the first and last

	296 // bytes in the requested range inclusively. Therefore we should subtract 1

	297 // from the position + size, to get index of the last byte that needs to be

	298 // downloaded.

	299 base::snprintf(buf, kBufSize, "Range: bytes=%d-%d", position,

	300 position + size - 1);

	301 pp::Var header(buf);

	302 request.SetHeaders(header);

	303

	304 return request;

	305 }

	306

	307 void DocumentLoader::DidOpen(int32_t result) {

	308 if (result != PP_OK) {

	309 NOTREACHED();

	310 return;

	311 }

	312

	313 is_multipart_ = false;

	314 current_chunk_size_ = 0;

	315 current_chunk_read_ = 0;

	316

	317 pp::Var headers_var = loader_.GetResponseInfo().GetHeaders();

	318 std::string headers;

	319 if (headers_var.is_string())

	320 headers = headers_var.AsString();

	321

	322 std::string boundary = GetMultiPartBoundary(headers);

	323 if (boundary.size()) {

	324 // Leave position untouched for now, when we read the data we'll get it.

	325 is_multipart_ = true;

	326 multipart_boundary_ = boundary;

	327 } else {

	328 // Need to make sure that the server returned a byte-range, since it's

	329 // possible for a server to just ignore our bye-range request and just

	330 // return the entire document even if it supports byte-range requests.

	331 // i.e. sniff response to

	332 // http://www.act.org/compass/sample/pdf/geometry.pdf

	333 current_pos_ = 0;

	334 uint32 start_pos, end_pos;

	335 if (GetByteRange(headers, &start_pos, &end_pos)) {

	336 current_pos_ = start_pos;

	337 if (end_pos && end_pos > start_pos)

	338 current_chunk_size_ = end_pos - start_pos + 1;

	339 }

	340 }

	341

	342 ReadMore();

	343 }

	344

	345 bool DocumentLoader::GetByteRange(const std::string& headers, uint32* start,

	346 uint32* end) {

	347 net::HttpUtil::HeadersIterator it(headers.begin(), headers.end(), "\n");

	348 while (it.GetNext()) {

	349 if (LowerCaseEqualsASCII(it.name(), "content-range")) {

	350 std::string range = it.values().c_str();

	351 if (StartsWithASCII(range, "bytes", false)) {

	352 range = range.substr(strlen("bytes"));

	353 std::string::size_type pos = range.find('-');

	354 std::string range_end;

	355 if (pos != std::string::npos)

	356 range_end = range.substr(pos + 1);

	357 TrimWhitespaceASCII(range, base::TRIM_LEADING, &range);

	358 TrimWhitespaceASCII(range_end, base::TRIM_LEADING, &range_end);

	359 *start = atoi(range.c_str());

	360 *end = atoi(range_end.c_str());

	361 return true;

	362 }

	363 }

	364 }

	365 return false;

	366 }

	367

	368 std::string DocumentLoader::GetMultiPartBoundary(const std::string& headers) {

	369 net::HttpUtil::HeadersIterator it(headers.begin(), headers.end(), "\n");

	370 while (it.GetNext()) {

	371 if (LowerCaseEqualsASCII(it.name(), "content-type")) {

	372 std::string type = StringToLowerASCII(it.values());

	373 if (StartsWithASCII(type, "multipart/", true)) {

	374 const char* boundary = strstr(type.c_str(), "boundary=");

	375 if (!boundary) {

	376 NOTREACHED();

	377 break;

	378 }

	379

	380 return std::string(boundary + 9);

	381 }

	382 }

	383 }

	384 return std::string();

	385 }

	386

	387 void DocumentLoader::ReadMore() {

	388 pp::CompletionCallback callback =

	389 loader_factory_.NewCallback(&DocumentLoader::DidRead);

	390 int rv = loader_.ReadResponseBody(buffer_, sizeof(buffer_), callback);

	391 if (rv != PP_OK_COMPLETIONPENDING)

	392 callback.Run(rv);

	393 }

	394

	395 void DocumentLoader::DidRead(int32_t result) {

	396 if (result > 0) {

	397 char* start = buffer_;

	398 size_t length = result;

	399 if (is_multipart_ && result > 2) {

	400 for (int i = 2; i < result; ++i) {

	401 if ((buffer_[i - 1] == '\n' && buffer_[i - 2] == '\n') \|\|

	402 (i >= 4 &&

	403 buffer_[i - 1] == '\n' && buffer_[i - 2] == '\r' &&

	404 buffer_[i - 3] == '\n' && buffer_[i - 4] == '\r')) {

	405 uint32 start_pos, end_pos;

	406 if (GetByteRange(std::string(buffer_, i), &start_pos, &end_pos)) {

	407 current_pos_ = start_pos;

	408 start += i;

	409 length -= i;

	410 if (end_pos && end_pos > start_pos)

	411 current_chunk_size_ = end_pos - start_pos + 1;

	412 }

	413 break;

	414 }

	415 }

	416

	417 // Reset this flag so we don't look inside the buffer in future calls of

	418 // DidRead for this response. Note that this code DOES NOT handle multi-

	419 // part responses with more than one part (we don't issue them at the

	420 // moment, so they shouldn't arrive).

	421 is_multipart_ = false;

	422 }

	423

	424 if (current_chunk_size_ &&

	425 current_chunk_read_ + length > current_chunk_size_)

	426 length = current_chunk_size_ - current_chunk_read_;

	427

	428 if (length) {

	429 if (document_size_ > 0) {

	430 chunk_stream_.WriteData(current_pos_, start, length);

	431 } else {

	432 // If we did not get content-length in the response, we can't

	433 // preallocate buffer for the entire document. Resizing array causing

	434 // memory fragmentation issues on the large files and OOM exceptions.

	435 // To fix this, we collect all chunks of the file to the list and

	436 // concatenate them together after request is complete.

	437 chunk_buffer_.push_back(std::vector<unsigned char>());

	438 chunk_buffer_.back().resize(length);

	439 memcpy(&(chunk_buffer_.back()[0]), start, length);

	440 }

	441 current_pos_ += length;

	442 current_chunk_read_ += length;

	443 client_->OnNewDataAvailable();

	444 }

	445 ReadMore();

	446 } else if (result == PP_OK) {

	447 ReadComplete();

	448 } else {

	449 NOTREACHED();

	450 }

	451 }

	452

	453 void DocumentLoader::ReadComplete() {

	454 if (!partial_document_) {

	455 if (document_size_ == 0) {

	456 // For the document with no 'content-length" specified we've collected all

	457 // the chunks already. Let's allocate final document buffer and copy them

	458 // over.

	459 chunk_stream_.Preallocate(current_pos_);

	460 uint32 pos = 0;

	461 std::list<std::vector<unsigned char> >::iterator it;

	462 for (it = chunk_buffer_.begin(); it != chunk_buffer_.end(); ++it) {

	463 chunk_stream_.WriteData(pos, &((*it)[0]), it->size());

	464 pos += it->size();

	465 }

	466 chunk_buffer_.clear();

	467 }

	468 document_size_ = current_pos_;

	469 client_->OnDocumentComplete();

	470 return;

	471 }

	472

	473 request_pending_ = false;

	474 pending_requests_.pop_front();

	475

	476 // If there are more pending request - continue downloading.

	477 if (!pending_requests_.empty()) {

	478 DownloadPendingRequests();

	479 return;

	480 }

	481

	482 if (IsDocumentComplete()) {

	483 client_->OnDocumentComplete();

	484 return;

	485 }

	486

	487 if (header_request_)

	488 client_->OnPartialDocumentLoaded();

	489 else

	490 client_->OnPendingRequestComplete();

	491 header_request_ = false;

	492

	493 // The OnPendingRequestComplete could have added more requests.

	494 if (!pending_requests_.empty()) {

	495 DownloadPendingRequests();

	496 } else {

	497 // Document is not complete and we have no outstanding requests.

	498 // Let's keep downloading PDF file in small chunks.

	499 uint32 pos = chunk_stream_.GetFirstMissingByte();

	500 std::vector<std::pair<size_t, size_t> > ranges;

	501 chunk_stream_.GetMissedRanges(pos, GetRequestSize(), &ranges);

	502 DCHECK(ranges.size() > 0);

	503 RequestData(ranges[0].first, ranges[0].second);

	504 }

	505 }

	506

	507 uint32 DocumentLoader::GetRequestSize() const {

	508 std::map<uint32, uint32>::const_iterator iter =

	509 chunk_size_table_.lower_bound(requests_count_);

	510 if (iter == chunk_size_table_.end())

	511 iter--;

	512 return iter->second;

	513 }

	514

	515 } // namespace chrome_pdf

OLD	NEW

« no previous file with comments | « pdf/document_loader.h ('k') | pdf/draw_utils.h » ('j') | pdf/pdf.h » ('J')