pdf/document_loader.cc - Issue 2349753003: Improve linearized pdf load/show time.

Side by Side Diff: pdf/document_loader.cc

Issue 2349753003: Improve linearized pdf load/show time. (Closed)

Patch Set: fix review issues. Created 4 years, 1 month ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
1 // Copyright (c) 2010 The Chromium Authors. All rights reserved.	1 // Copyright (c) 2010 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 #include "pdf/document_loader.h"	5 #include "pdf/document_loader.h"

6	6

7 #include <stddef.h>	7 #include <stddef.h>

8 #include <stdint.h>	8 #include <stdint.h>

9	9

	10 #include <algorithm>

	11

10 #include "base/logging.h"	12 #include "base/logging.h"

11 #include "base/strings/string_util.h"	13 #include "base/strings/string_util.h"

12 #include "net/http/http_util.h"	14 #include "pdf/url_loader_wrapper.h"

13 #include "ppapi/c/pp_errors.h"	15 #include "ppapi/c/pp_errors.h"

14 #include "ppapi/cpp/url_loader.h"	16 #include "ui/gfx/range/range.h"

15 #include "ppapi/cpp/url_request_info.h"

16 #include "ppapi/cpp/url_response_info.h"

17	17

18 namespace chrome_pdf {	18 namespace chrome_pdf {

19	19

20 namespace {	20 namespace {

21	21

22 // If the headers have a byte-range response, writes the start and end	22 // The distance from last received chunk, when we wait requesting data, using

23 // positions and returns true if at least the start position was parsed.	23 // current connection (like playing a cassette tape) and do not send new range

24 // The end position will be set to 0 if it was not found or parsed from the	24 // request (like rewind a cassette tape, and continue playing after).

25 // response.	25 // Experimentally chosen value.

26 // Returns false if not even a start position could be parsed.	26 const int kChunkCloseDistance = 10;

27 bool GetByteRange(const std::string& headers, uint32_t* start, uint32_t* end) {

28 net::HttpUtil::HeadersIterator it(headers.begin(), headers.end(), "\n");

29 while (it.GetNext()) {

30 if (base::LowerCaseEqualsASCII(it.name(), "content-range")) {

31 std::string range = it.values().c_str();

32 if (base::StartsWith(range, "bytes",

33 base::CompareCase::INSENSITIVE_ASCII)) {

34 range = range.substr(strlen("bytes"));

35 std::string::size_type pos = range.find('-');

36 std::string range_end;

37 if (pos != std::string::npos)

38 range_end = range.substr(pos + 1);

39 base::TrimWhitespaceASCII(range, base::TRIM_LEADING, &range);

40 base::TrimWhitespaceASCII(range_end, base::TRIM_LEADING, &range_end);

41 *start = atoi(range.c_str());

42 *end = atoi(range_end.c_str());

43 return true;

44 }

45 }

46 }

47 return false;

48 }

49

50 // If the headers have a multi-part response, returns the boundary name.

51 // Otherwise returns an empty string.

52 std::string GetMultiPartBoundary(const std::string& headers) {

53 net::HttpUtil::HeadersIterator it(headers.begin(), headers.end(), "\n");

54 while (it.GetNext()) {

55 if (base::LowerCaseEqualsASCII(it.name(), "content-type")) {

56 std::string type = base::ToLowerASCII(it.values());

57 if (base::StartsWith(type, "multipart/", base::CompareCase::SENSITIVE)) {

58 const char* boundary = strstr(type.c_str(), "boundary=");

59 if (!boundary) {

60 NOTREACHED();

61 break;

62 }

63

64 return std::string(boundary + 9);

65 }

66 }

67 }

68 return std::string();

69 }

70	27

71 bool IsValidContentType(const std::string& type) {	28 bool IsValidContentType(const std::string& type) {

72 return (base::EndsWith(type, "/pdf", base::CompareCase::INSENSITIVE_ASCII) \|\|	29 return (base::EndsWith(type, "/pdf", base::CompareCase::INSENSITIVE_ASCII) \|\|

73 base::EndsWith(type, ".pdf", base::CompareCase::INSENSITIVE_ASCII) \|\|	30 base::EndsWith(type, ".pdf", base::CompareCase::INSENSITIVE_ASCII) \|\|

74 base::EndsWith(type, "/x-pdf",	31 base::EndsWith(type, "/x-pdf",

75 base::CompareCase::INSENSITIVE_ASCII) \|\|	32 base::CompareCase::INSENSITIVE_ASCII) \|\|

76 base::EndsWith(type, "/*", base::CompareCase::INSENSITIVE_ASCII) \|\|	33 base::EndsWith(type, "/*", base::CompareCase::INSENSITIVE_ASCII) \|\|

77 base::EndsWith(type, "/acrobat",	34 base::EndsWith(type, "/acrobat",

78 base::CompareCase::INSENSITIVE_ASCII) \|\|	35 base::CompareCase::INSENSITIVE_ASCII) \|\|

79 base::EndsWith(type, "/unknown",	36 base::EndsWith(type, "/unknown",

80 base::CompareCase::INSENSITIVE_ASCII));	37 base::CompareCase::INSENSITIVE_ASCII));

81 }	38 }

82	39

83 } // namespace	40 } // namespace

84	41

85 DocumentLoader::Client::~Client() {	42 DocumentLoader::Client::~Client() {

86 }	43 }

87	44

	45 DocumentLoader::Chunk::Chunk() {}

	46

	47 DocumentLoader::Chunk::~Chunk() {}

	48

	49 void DocumentLoader::Chunk::Clear() {

	50 chunk_index = 0;

	51 data_size = 0;

	52 chunk_data.reset();

	53 }

	54

88 DocumentLoader::DocumentLoader(Client* client)	55 DocumentLoader::DocumentLoader(Client* client)

89 : client_(client), partial_document_(false), request_pending_(false),	56 : client_(client), loader_factory_(this) {}

90 current_pos_(0), current_chunk_size_(0), current_chunk_read_(0),

91 document_size_(0), header_request_(true), is_multipart_(false) {

92 loader_factory_.Initialize(this);

93 }

94	57

95 DocumentLoader::~DocumentLoader() {	58 DocumentLoader::~DocumentLoader() {

96 }	59 }

97	60

98 bool DocumentLoader::Init(const pp::URLLoader& loader,	61 bool DocumentLoader::Init(std::unique_ptr<URLLoaderWrapper> loader,

99 const std::string& url,	62 const std::string& url) {

100 const std::string& headers) {

101 DCHECK(url_.empty());	63 DCHECK(url_.empty());

102 url_ = url;	64 DCHECK(!loader_);

103 loader_ = loader;

104	65

105 std::string response_headers;	66 std::string type = loader->GetContentType();

106 if (!headers.empty()) {

107 response_headers = headers;

108 } else {

109 pp::URLResponseInfo response = loader_.GetResponseInfo();

110 pp::Var headers_var = response.GetHeaders();

111

112 if (headers_var.is_string()) {

113 response_headers = headers_var.AsString();

114 }

115 }

116

117 bool accept_ranges_bytes = false;

118 bool content_encoded = false;

119 uint32_t content_length = 0;

120 std::string type;

121 std::string disposition;

122	67

123 // This happens for PDFs not loaded from http(s) sources.	68 // This happens for PDFs not loaded from http(s) sources.

124 if (response_headers == "Content-Type: text/plain") {	69 if (type == "text/plain") {

125 if (!base::StartsWith(url, "http://",	70 if (!base::StartsWith(url, "http://",

126 base::CompareCase::INSENSITIVE_ASCII) &&	71 base::CompareCase::INSENSITIVE_ASCII) &&

127 !base::StartsWith(url, "https://",	72 !base::StartsWith(url, "https://",

128 base::CompareCase::INSENSITIVE_ASCII)) {	73 base::CompareCase::INSENSITIVE_ASCII)) {

129 type = "application/pdf";	74 type = "application/pdf";

130 }	75 }

131 }	76 }

132 if (type.empty() && !response_headers.empty()) {

133 net::HttpUtil::HeadersIterator it(response_headers.begin(),

134 response_headers.end(), "\n");

135 while (it.GetNext()) {

136 if (base::LowerCaseEqualsASCII(it.name(), "content-length")) {

137 content_length = atoi(it.values().c_str());

138 } else if (base::LowerCaseEqualsASCII(it.name(), "accept-ranges")) {

139 accept_ranges_bytes = base::LowerCaseEqualsASCII(it.values(), "bytes");

140 } else if (base::LowerCaseEqualsASCII(it.name(), "content-encoding")) {

141 content_encoded = true;

142 } else if (base::LowerCaseEqualsASCII(it.name(), "content-type")) {

143 type = it.values();

144 size_t semi_colon_pos = type.find(';');

145 if (semi_colon_pos != std::string::npos) {

146 type = type.substr(0, semi_colon_pos);

147 }

148 TrimWhitespaceASCII(type, base::TRIM_ALL, &type);

149 } else if (base::LowerCaseEqualsASCII(it.name(), "content-disposition")) {

150 disposition = it.values();

151 }

152 }

153 }

154 if (!type.empty() && !IsValidContentType(type))	77 if (!type.empty() && !IsValidContentType(type))

155 return false;	78 return false;

156 if (base::StartsWith(disposition, "attachment",	79

	80 if (base::StartsWith(loader->GetContentDisposition(), "attachment",

157 base::CompareCase::INSENSITIVE_ASCII))	81 base::CompareCase::INSENSITIVE_ASCII))

158 return false;	82 return false;

159	83

160 if (content_length > 0)	84 url_ = url;

161 chunk_stream_.Preallocate(content_length);	85 loader_ = std::move(loader);

162	86

163 document_size_ = content_length;	87 if (!loader_->IsContentEncoded()) {

164 requests_count_ = 0;	88 chunk_stream_.set_eof_pos(std::max(0, loader_->GetContentLength()));

	89 }

	90 int64_t bytes_received = 0;

	91 int64_t total_bytes_to_be_received = 0;

	92 if (!chunk_stream_.eof_pos() &&

	93 loader_->GetDownloadProgress(&bytes_received,

	94 &total_bytes_to_be_received)) {

	95 chunk_stream_.set_eof_pos(

	96 std::max(0, static_cast<int>(total_bytes_to_be_received)));

	97 }

165	98

166 // Enable partial loading only if file size is above the threshold.	99 SetPartialLoadingEnabled(

167 // It will allow avoiding latency for multiple requests.	100 partial_loading_enabled_ &&

168 if (content_length > kMinFileSize &&	101 !base::StartsWith(url, "file://", base::CompareCase::INSENSITIVE_ASCII) &&

169 accept_ranges_bytes &&	102 loader_->IsAcceptRangesBytes() && !loader_->IsContentEncoded() &&

170 !content_encoded) {	103 GetDocumentSize());

171 LoadPartialDocument();	104

172 } else {	105 ReadMore();

173 LoadFullDocument();

174 }

175 return true;	106 return true;

176 }	107 }

177	108

178 void DocumentLoader::LoadPartialDocument() {	109 bool DocumentLoader::IsDocumentComplete() const {

179 // The current request is a full request (not a range request) so it starts at	110 return chunk_stream_.IsComplete();

180 // 0 and ends at \|document_size_\|.

181 current_chunk_size_ = document_size_;

182 current_pos_ = 0;

183 current_request_offset_ = 0;

184 current_request_size_ = 0;

185 current_request_extended_size_ = document_size_;

186 request_pending_ = true;

187

188 partial_document_ = true;

189 header_request_ = true;

190 ReadMore();

191 }	111 }

192	112

193 void DocumentLoader::LoadFullDocument() {	113 uint32_t DocumentLoader::GetDocumentSize() const {

194 partial_document_ = false;	114 return chunk_stream_.eof_pos();

195 chunk_buffer_.clear();

196 ReadMore();

197 }

198

199 bool DocumentLoader::IsDocumentComplete() const {

200 if (document_size_ == 0) // Document size unknown.

201 return false;

202 return IsDataAvailable(0, document_size_);

203 }

204

205 uint32_t DocumentLoader::GetAvailableData() const {

206 if (document_size_ == 0) { // If document size is unknown.

207 return current_pos_;

208 }

209

210 std::vector<std::pair<size_t, size_t> > ranges;

211 chunk_stream_.GetMissedRanges(0, document_size_, &ranges);

212 uint32_t available = document_size_;

213 for (const auto& range : ranges)

214 available -= range.second;

215 return available;

216 }	115 }

217	116

218 void DocumentLoader::ClearPendingRequests() {	117 void DocumentLoader::ClearPendingRequests() {

219 pending_requests_.erase(pending_requests_.begin(),	118 pending_requests_.Clear();

220 pending_requests_.end());

221 }	119 }

222	120

223 bool DocumentLoader::GetBlock(uint32_t position,	121 bool DocumentLoader::GetBlock(uint32_t position,

224 uint32_t size,	122 uint32_t size,

225 void* buf) const {	123 void* buf) const {

226 return chunk_stream_.ReadData(position, size, buf);	124 return chunk_stream_.ReadData(gfx::Range(position, position + size), buf);

227 }	125 }

228	126

229 bool DocumentLoader::IsDataAvailable(uint32_t position, uint32_t size) const {	127 bool DocumentLoader::IsDataAvailable(uint32_t position, uint32_t size) const {

230 return chunk_stream_.IsRangeAvailable(position, size);	128 return chunk_stream_.IsRangeAvailable(gfx::Range(position, position + size));

231 }	129 }

232	130

233 void DocumentLoader::RequestData(uint32_t position, uint32_t size) {	131 void DocumentLoader::RequestData(uint32_t position, uint32_t size) {

234 DCHECK(partial_document_);	132 if (!size \|\| IsDataAvailable(position, size)) {

	133 return;

	134 }

	135

	136 if (GetDocumentSize() && (position + size > GetDocumentSize())) {

	137 return;

	138 }

235	139

236 // We have some artefact request from	140 // We have some artefact request from

237 // PDFiumEngine::OnDocumentComplete() -> FPDFAvail_IsPageAvail after	141 // PDFiumEngine::OnDocumentComplete() -> FPDFAvail_IsPageAvail after

238 // document is complete.	142 // document is complete.

239 // We need this fix in PDFIum. Adding this as a work around.	143 // We need this fix in PDFIum. Adding this as a work around.

240 // Bug: http://code.google.com/p/chromium/issues/detail?id=79996	144 // Bug: http://code.google.com/p/chromium/issues/detail?id=79996

241 // Test url:	145 // Test url:

242 // http://www.icann.org/en/correspondence/holtzman-to-jeffrey-02mar11-en.pdf	146 // http://www.icann.org/en/correspondence/holtzman-to-jeffrey-02mar11-en.pdf

243 if (IsDocumentComplete())	147 if (!loader_)

244 return;	148 return;

245	149

246 pending_requests_.push_back(std::pair<size_t, size_t>(position, size));	150 RangeSet requested_chunks(chunk_stream_.GetChunksRange(position, size));

247 DownloadPendingRequests();	151 requested_chunks.Subtract(chunk_stream_.filled_chunks());

248 }	152 if (requested_chunks.IsEmpty()) {

249

250 void DocumentLoader::RemoveCompletedRanges() {

251 // Split every request that has been partially downloaded already into smaller

252 // requests.

253 std::vector<std::pair<size_t, size_t> > ranges;

254 auto it = pending_requests_.begin();

255 while (it != pending_requests_.end()) {

256 chunk_stream_.GetMissedRanges(it->first, it->second, &ranges);

257 pending_requests_.insert(it, ranges.begin(), ranges.end());

258 ranges.clear();

259 pending_requests_.erase(it++);

260 }

261 }

262

263 void DocumentLoader::DownloadPendingRequests() {

264 if (request_pending_)

265 return;

266

267 uint32_t pos;

268 uint32_t size;

269 if (pending_requests_.empty()) {

270 // If the document is not complete and we have no outstanding requests,

271 // download what's left for as long as no other request gets added to

272 // \|pending_requests_\|.

273 pos = chunk_stream_.GetFirstMissingByte();

274 if (pos >= document_size_) {

275 // We're done downloading the document.

276 return;

277 }

278 // Start with size 0, we'll set \|current_request_extended_size_\| to > 0.

279 // This way this request will get cancelled as soon as the renderer wants

280 // another portion of the document.

281 size = 0;

282 } else {

283 RemoveCompletedRanges();

284

285 pos = pending_requests_.front().first;

286 size = pending_requests_.front().second;

287 if (IsDataAvailable(pos, size)) {

288 ReadComplete();

289 return;

290 }

291 }

292

293 size_t last_byte_before = chunk_stream_.GetFirstMissingByteInInterval(pos);

294 if (size < kDefaultRequestSize) {

295 // Try to extend before pos, up to size \|kDefaultRequestSize\|.

296 if (pos + size - last_byte_before > kDefaultRequestSize) {

297 pos += size - kDefaultRequestSize;

298 size = kDefaultRequestSize;

299 } else {

300 size += pos - last_byte_before;

301 pos = last_byte_before;

302 }

303 }

304 if (pos - last_byte_before < kDefaultRequestSize) {

305 // Don't leave a gap smaller than \|kDefaultRequestSize\|.

306 size += pos - last_byte_before;

307 pos = last_byte_before;

308 }

309

310 current_request_offset_ = pos;

311 current_request_size_ = size;

312

313 // Extend the request until the next downloaded byte or the end of the

314 // document.

315 size_t last_missing_byte =

316 chunk_stream_.GetLastMissingByteInInterval(pos + size - 1);

317 current_request_extended_size_ = last_missing_byte - pos + 1;

318

319 request_pending_ = true;

320

321 // Start downloading first pending request.

322 loader_.Close();

323 loader_ = client_->CreateURLLoader();

324 pp::CompletionCallback callback =

325 loader_factory_.NewCallback(&DocumentLoader::DidOpen);

326 pp::URLRequestInfo request = GetRequest(pos, current_request_extended_size_);

327 requests_count_++;

328 int rv = loader_.Open(request, callback);

329 if (rv != PP_OK_COMPLETIONPENDING)

330 callback.Run(rv);

331 }

332

333 pp::URLRequestInfo DocumentLoader::GetRequest(uint32_t position,

334 uint32_t size) const {

335 pp::URLRequestInfo request(client_->GetPluginInstance());

336 request.SetURL(url_);

337 request.SetMethod("GET");

338 request.SetFollowRedirects(true);

339 request.SetCustomReferrerURL(url_);

340

341 const size_t kBufSize = 100;

342 char buf[kBufSize];

343 // According to rfc2616, byte range specifies position of the first and last

344 // bytes in the requested range inclusively. Therefore we should subtract 1

345 // from the position + size, to get index of the last byte that needs to be

346 // downloaded.

347 base::snprintf(buf, kBufSize, "Range: bytes=%d-%d", position,

348 position + size - 1);

349 pp::Var header(buf);

350 request.SetHeaders(header);

351

352 return request;

353 }

354

355 void DocumentLoader::DidOpen(int32_t result) {

356 if (result != PP_OK) {

357 NOTREACHED();	153 NOTREACHED();

358 return;	154 return;

359 }	155 }

360	156 pending_requests_.Union(requested_chunks);

361 int32_t http_code = loader_.GetResponseInfo().GetStatusCode();	157 }

	158

	159 void DocumentLoader::SetPartialLoadingEnabled(bool enabled) {

	160 partial_loading_enabled_ = enabled;

	161 if (!enabled) {

	162 is_partial_loader_active_ = false;

	163 }

	164 }

	165

	166 bool DocumentLoader::ShouldCancelLoading() const {

	167 if (!loader_)

	168 return true;

	169 if (!partial_loading_enabled_ \|\| pending_requests_.IsEmpty())

	170 return false;

	171 const gfx::Range current_range(chunk_.chunk_index,

	172 chunk_.chunk_index + kChunkCloseDistance);

	173 return !pending_requests_.Intersects(current_range);

	174 }

	175

	176 void DocumentLoader::ContinueDownload() {

	177 if (!ShouldCancelLoading())

	178 return ReadMore();

	179 DCHECK(partial_loading_enabled_);

	180 DCHECK(!IsDocumentComplete());

	181 DCHECK(GetDocumentSize());

	182

	183 const uint32_t range_start =

	184 pending_requests_.IsEmpty() ? 0 : pending_requests_.First().start();

	185 RangeSet candidates_for_request(

	186 gfx::Range(range_start, chunk_stream_.total_chunks_count()));

	187 candidates_for_request.Subtract(chunk_stream_.filled_chunks());

	188 DCHECK(!candidates_for_request.IsEmpty());

	189 gfx::Range next_request = candidates_for_request.First();

	190 if (candidates_for_request.Size() == 1 &&

	191 next_request.length() < kChunkCloseDistance) {

	192 // We have only request at the end, try to enlarge it to improve back order

	193 // reading.

	194 const int additional_chunks_count =

	195 kChunkCloseDistance - next_request.length();

	196 int new_start = std::max(

	197 0, static_cast<int>(next_request.start()) - additional_chunks_count);

	198 candidates_for_request =

	199 RangeSet(gfx::Range(new_start, next_request.end()));

	200 candidates_for_request.Subtract(chunk_stream_.filled_chunks());

	201 next_request = candidates_for_request.Last();

	202 }

	203

	204 loader_.reset();

	205 chunk_.Clear();

	206 if (!is_partial_loader_active_) {

	207 client_->CancelBrowserDownload();

	208 is_partial_loader_active_ = true;

	209 }

	210

	211 const uint32_t start = next_request.start() * DataStream::kChunkSize;

	212 const uint32_t length =

	213 std::min(chunk_stream_.eof_pos() - start,

	214 next_request.length() * DataStream::kChunkSize);

	215

	216 loader_ = client_->CreateURLLoader();

	217

	218 loader_->OpenRange(

	219 url_, url_, start, length,

	220 loader_factory_.NewCallback(&DocumentLoader::DidOpenPartial));

	221 }

	222

	223 void DocumentLoader::DidOpenPartial(int32_t result) {

	224 if (result != PP_OK) {

	225 return ReadComplete();

	226 }

	227

	228 int32_t http_code = loader_->GetStatusCode();

362 if (http_code >= 400 && http_code < 500) {	229 if (http_code >= 400 && http_code < 500) {

363 // Error accessing resource. 4xx error indicate subsequent requests	230 // Error accessing resource. 4xx error indicate subsequent requests

364 // will fail too.	231 // will fail too.

365 // E.g. resource has been removed from the server while loading it.	232 // E.g. resource has been removed from the server while loading it.

366 // https://code.google.com/p/chromium/issues/detail?id=414827	233 return ReadComplete();

367 return;	234 }

368 }	235

369	236 // Leave position untouched for multiparted responce for now, when we read the

370 is_multipart_ = false;	237 // data we'll get it.

371 current_chunk_size_ = 0;	238 if (!loader_->IsMultipart()) {

372 current_chunk_read_ = 0;

373

374 pp::Var headers_var = loader_.GetResponseInfo().GetHeaders();

375 std::string headers;

376 if (headers_var.is_string())

377 headers = headers_var.AsString();

378

379 std::string boundary = GetMultiPartBoundary(headers);

380 if (!boundary.empty()) {

381 // Leave position untouched for now, when we read the data we'll get it.

382 is_multipart_ = true;

383 multipart_boundary_ = boundary;

384 } else {

385 // Need to make sure that the server returned a byte-range, since it's	239 // Need to make sure that the server returned a byte-range, since it's

386 // possible for a server to just ignore our byte-range request and just	240 // possible for a server to just ignore our byte-range request and just

387 // return the entire document even if it supports byte-range requests.	241 // return the entire document even if it supports byte-range requests.

388 // i.e. sniff response to	242 // i.e. sniff response to

389 // http://www.act.org/compass/sample/pdf/geometry.pdf	243 // http://www.act.org/compass/sample/pdf/geometry.pdf

390 current_pos_ = 0;	244 int start_pos = 0;

391 uint32_t start_pos, end_pos;	245 int end_pos = 0;

392 if (GetByteRange(headers, &start_pos, &end_pos)) {	246 if (loader_->GetByteRange(&start_pos, &end_pos)) {

393 current_pos_ = start_pos;	247 if (start_pos % DataStream::kChunkSize != 0) {

394 if (end_pos && end_pos > start_pos)	248 return ReadComplete();

395 current_chunk_size_ = end_pos - start_pos + 1;	249 }

	250 DCHECK(!chunk_.chunk_data);

	251 chunk_.chunk_index = chunk_stream_.GetChunkIndex(start_pos);

396 } else {	252 } else {

397 partial_document_ = false;	253 SetPartialLoadingEnabled(false);

398 }	254 }

399 }	255 return ContinueDownload();

400	256 }

401 ReadMore();	257 // Needs more data to calc chunk index.

	258 return ReadMore();

402 }	259 }

403	260

404 void DocumentLoader::ReadMore() {	261 void DocumentLoader::ReadMore() {

405 pp::CompletionCallback callback =	262 loader_->ReadResponseBody(

406 loader_factory_.NewCallback(&DocumentLoader::DidRead);	263 buffer_, sizeof(buffer_),

407 int rv = loader_.ReadResponseBody(buffer_, sizeof(buffer_), callback);	264 loader_factory_.NewCallback(&DocumentLoader::DidRead));

408 if (rv != PP_OK_COMPLETIONPENDING)

409 callback.Run(rv);

410 }	265 }

411	266

412 void DocumentLoader::DidRead(int32_t result) {	267 void DocumentLoader::DidRead(int32_t result) {

413 if (result <= 0) {	268 if (result < 0) {

414 // If \|result\| == PP_OK, the document was loaded, otherwise an error was	269 // An error occurred.

415 // encountered. Either way we want to stop processing the response. In the	270 // The renderer will detect that we're missing data and will display a

416 // case where an error occurred, the renderer will detect that we're missing	271 // message.

417 // data and will display a message.	272 return ReadComplete();

418 ReadComplete();	273 }

419 return;	274 if (result == 0) {

420 }	275 loader_.reset();

421	276 if (!is_partial_loader_active_)

422 char* start = buffer_;	277 return ReadComplete();

423 size_t length = result;	278 return ContinueDownload();

424 if (is_multipart_ && result > 2) {	279 }

425 for (int i = 2; i < result; ++i) {	280 if (loader_->IsMultipart()) {

426 if ((buffer_[i - 1] == '\n' && buffer_[i - 2] == '\n') \|\|	281 int start_pos = 0;

427 (i >= 4 && buffer_[i - 1] == '\n' && buffer_[i - 2] == '\r' &&	282 int end_pos = 0;

428 buffer_[i - 3] == '\n' && buffer_[i - 4] == '\r')) {	283 if (!loader_->GetByteRange(&start_pos, &end_pos)) {

429 uint32_t start_pos, end_pos;	284 return ReadComplete();

430 if (GetByteRange(std::string(buffer_, i), &start_pos, &end_pos)) {	285 }

431 current_pos_ = start_pos;	286 DCHECK(!chunk_.chunk_data);

432 start += i;	287 chunk_.chunk_index = chunk_stream_.GetChunkIndex(start_pos);

433 length -= i;	288 }

434 if (end_pos && end_pos > start_pos)	289 if (!SaveChunkData(buffer_, result)) {

435 current_chunk_size_ = end_pos - start_pos + 1;	290 return ReadMore();

436 }	291 }

437 break;	292 if (IsDocumentComplete()) {

438 }	293 return ReadComplete();

439 }	294 }

440	295 return ContinueDownload();

441 // Reset this flag so we don't look inside the buffer in future calls of	296 }

442 // DidRead for this response. Note that this code DOES NOT handle multi-	297

443 // part responses with more than one part (we don't issue them at the	298 bool DocumentLoader::SaveChunkData(char* input, uint32_t input_size) {

444 // moment, so they shouldn't arrive).	299 count_of_bytes_received_ += input_size;

445 is_multipart_ = false;	300 bool chunk_saved = false;

446 }	301 bool loading_pending_request = pending_requests_.Contains(chunk_.chunk_index);

447	302 while (input_size > 0) {

448 if (current_chunk_size_ && current_chunk_read_ + length > current_chunk_size_)	303 if (chunk_.data_size == 0) {

449 length = current_chunk_size_ - current_chunk_read_;	304 chunk_.chunk_data.reset(new DataStream::ChunkData());
	Lei Zhang 2016/10/25 19:26:01 MakeUnique. MakeUnique. snake 2016/10/25 20:07:17 Done. Show quoted text On 2016/10/25 19:26:01, Lei Zhang wrote: > MakeUnique. Done.
450	305 }

451 if (length) {	306 const uint32_t new_chunk_data_len =

452 if (document_size_ > 0) {	307 std::min(DataStream::kChunkSize - chunk_.data_size, input_size);

453 chunk_stream_.WriteData(current_pos_, start, length);	308 memcpy(chunk_.chunk_data->data() + chunk_.data_size, input,

454 } else {	309 new_chunk_data_len);

455 // If we did not get content-length in the response, we can't	310 chunk_.data_size += new_chunk_data_len;

456 // preallocate buffer for the entire document. Resizing array causing	311 if (chunk_.data_size == DataStream::kChunkSize \|\|

457 // memory fragmentation issues on the large files and OOM exceptions.	312 chunk_stream_.eof_pos() ==

458 // To fix this, we collect all chunks of the file to the list and	313 chunk_.chunk_index * DataStream::kChunkSize + chunk_.data_size) {

459 // concatenate them together after request is complete.	314 chunk_stream_.SetChunkData(chunk_.chunk_index,

460 std::vector<unsigned char> buf(length);	315 std::move(chunk_.chunk_data));

461 memcpy(buf.data(), start, length);	316 pending_requests_.Subtract(

462 chunk_buffer_.push_back(std::move(buf));	317 gfx::Range(chunk_.chunk_index, chunk_.chunk_index + 1));

463 }	318 chunk_.data_size = 0;

464 current_pos_ += length;	319 ++(chunk_.chunk_index);

465 current_chunk_read_ += length;	320 chunk_saved = true;

466 client_->OnNewDataAvailable();	321 }

467 }	322

468	323 input += new_chunk_data_len;

469 // Only call the renderer if we allow partial loading.	324 input_size -= new_chunk_data_len;

470 if (!partial_document_) {	325 }

471 ReadMore();	326

472 return;	327 if (IsDocumentComplete())

473 }	328 return true;

474	329

475 UpdateRendering();	330 if (!chunk_saved)

476 RemoveCompletedRanges();	331 return false;

477	332

478 if (!pending_requests_.empty()) {	333 if (loading_pending_request &&

479 // If there are pending requests and the current content we're downloading	334 !pending_requests_.Contains(chunk_.chunk_index)) {

480 // doesn't satisfy any of these requests, cancel the current request to	335 client_->OnPendingRequestComplete();

481 // fullfill those more important requests.	336 }

482 bool satisfying_pending_request =	337 client_->OnNewDataAvailable();

483 SatisfyingRequest(current_request_offset_, current_request_size_);	338 return true;

484 for (const auto& pending_request : pending_requests_) {

485 if (SatisfyingRequest(pending_request.first, pending_request.second)) {

486 satisfying_pending_request = true;

487 break;

488 }

489 }

490 // Cancel the request as it's not satisfying any request from the

491 // renderer, unless the current request is finished in which case we let

492 // it finish cleanly.

493 if (!satisfying_pending_request &&

494 current_pos_ <

495 current_request_offset_ + current_request_extended_size_) {

496 loader_.Close();

497 }

498 }

499

500 ReadMore();

501 }

502

503 bool DocumentLoader::SatisfyingRequest(size_t offset, size_t size) const {

504 return offset <= current_pos_ + kDefaultRequestSize &&

505 current_pos_ < offset + size;

506 }	339 }

507	340

508 void DocumentLoader::ReadComplete() {	341 void DocumentLoader::ReadComplete() {

509 if (!partial_document_) {	342 if (!GetDocumentSize()) {

510 if (document_size_ == 0) {	343 uint32_t eof =

511 // For the document with no 'content-length" specified we've collected all	344 chunk_.chunk_index * DataStream::kChunkSize + chunk_.data_size;

512 // the chunks already. Let's allocate final document buffer and copy them	345 if (!chunk_stream_.filled_chunks().IsEmpty()) {

513 // over.	346 eof = std::max(

514 chunk_stream_.Preallocate(current_pos_);	347 chunk_stream_.filled_chunks().Last().end() * DataStream::kChunkSize,

515 uint32_t pos = 0;	348 eof);

516 for (auto& chunk : chunk_buffer_) {	349 }

517 chunk_stream_.WriteData(pos, chunk.data(), chunk.size());	350 chunk_stream_.set_eof_pos(eof);

518 pos += chunk.size();	351 if (eof == chunk_.chunk_index * DataStream::kChunkSize + chunk_.data_size) {

519 }	352 chunk_stream_.SetChunkData(chunk_.chunk_index,

520 chunk_buffer_.clear();	353 std::move(chunk_.chunk_data));

521 }	354 }

522 document_size_ = current_pos_;	355 }

523 client_->OnDocumentComplete();	356 loader_.reset();

524 return;

525 }

526

527 request_pending_ = false;

528

529 if (IsDocumentComplete()) {	357 if (IsDocumentComplete()) {

530 client_->OnDocumentComplete();	358 client_->OnDocumentComplete();

531 return;	359 } else {

532 }	360 client_->OnDocumentCanceled();

533	361 }

534 UpdateRendering();	362 }

535 DownloadPendingRequests();	363

536 }	364 float DocumentLoader::GetProgress() const {

537	365 if (!GetDocumentSize())

538 void DocumentLoader::UpdateRendering() {	366 return -1;

539 if (header_request_)	367 if (IsDocumentComplete())

540 client_->OnPartialDocumentLoaded();	368 return 1;

541 else	369 return static_cast<float>(chunk_stream_.filled_chunks_count()) /

542 client_->OnPendingRequestComplete();	370 chunk_stream_.total_chunks_count();

543 header_request_ = false;

544 }	371 }

545	372

546 } // namespace chrome_pdf	373 } // namespace chrome_pdf

OLD	NEW

« pdf/chunk_stream.h ('K') | « pdf/document_loader.h ('k') | pdf/document_loader_unittest.cc » ('j') | pdf/document_loader_unittest.cc » ('J')