pdf/document_loader.cc - Issue 2349753003: Improve linearized pdf load/show time.

Side by Side Diff: pdf/document_loader.cc

Issue 2349753003: Improve linearized pdf load/show time. (Closed)

Patch Set: Fix review issues. Created 4 years, 2 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
1 // Copyright (c) 2010 The Chromium Authors. All rights reserved.	1 // Copyright (c) 2010 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 #include "pdf/document_loader.h"	5 #include "pdf/document_loader.h"

6	6

7 #include <stddef.h>	7 #include <stddef.h>

8 #include <stdint.h>	8 #include <stdint.h>

9	9

	10 #include <algorithm>

	11

10 #include "base/logging.h"	12 #include "base/logging.h"

11 #include "base/strings/string_util.h"	13 #include "base/strings/string_util.h"

12 #include "net/http/http_util.h"	14 #include "pdf/url_loader_wrapper.h"

13 #include "ppapi/c/pp_errors.h"	15 #include "ppapi/c/pp_errors.h"

14 #include "ppapi/cpp/url_loader.h"	16 #include "ui/gfx/range/range.h"

15 #include "ppapi/cpp/url_request_info.h"

16 #include "ppapi/cpp/url_response_info.h"

17	17

18 namespace chrome_pdf {	18 namespace chrome_pdf {

19	19

20 namespace {	20 namespace {

21	21 const int kChunkCloseDistance = 10;
	Lei Zhang 2016/10/21 09:33:09 Can you document what this is / why this value was Can you document what this is / why this value was chose? snake 2016/10/21 15:13:15 Done. Show quoted text On 2016/10/21 09:33:09, Lei Zhang wrote: > Can you document what this is / why this value was chose? Done.
22 // If the headers have a byte-range response, writes the start and end

23 // positions and returns true if at least the start position was parsed.

24 // The end position will be set to 0 if it was not found or parsed from the

25 // response.

26 // Returns false if not even a start position could be parsed.

27 bool GetByteRange(const std::string& headers, uint32_t* start, uint32_t* end) {

28 net::HttpUtil::HeadersIterator it(headers.begin(), headers.end(), "\n");

29 while (it.GetNext()) {

30 if (base::LowerCaseEqualsASCII(it.name(), "content-range")) {

31 std::string range = it.values().c_str();

32 if (base::StartsWith(range, "bytes",

33 base::CompareCase::INSENSITIVE_ASCII)) {

34 range = range.substr(strlen("bytes"));

35 std::string::size_type pos = range.find('-');

36 std::string range_end;

37 if (pos != std::string::npos)

38 range_end = range.substr(pos + 1);

39 base::TrimWhitespaceASCII(range, base::TRIM_LEADING, &range);

40 base::TrimWhitespaceASCII(range_end, base::TRIM_LEADING, &range_end);

41 *start = atoi(range.c_str());

42 *end = atoi(range_end.c_str());

43 return true;

44 }

45 }

46 }

47 return false;

48 }

49

50 // If the headers have a multi-part response, returns the boundary name.

51 // Otherwise returns an empty string.

52 std::string GetMultiPartBoundary(const std::string& headers) {

53 net::HttpUtil::HeadersIterator it(headers.begin(), headers.end(), "\n");

54 while (it.GetNext()) {

55 if (base::LowerCaseEqualsASCII(it.name(), "content-type")) {

56 std::string type = base::ToLowerASCII(it.values());

57 if (base::StartsWith(type, "multipart/", base::CompareCase::SENSITIVE)) {

58 const char* boundary = strstr(type.c_str(), "boundary=");

59 if (!boundary) {

60 NOTREACHED();

61 break;

62 }

63

64 return std::string(boundary + 9);

65 }

66 }

67 }

68 return std::string();

69 }

70	22

71 bool IsValidContentType(const std::string& type) {	23 bool IsValidContentType(const std::string& type) {

72 return (base::EndsWith(type, "/pdf", base::CompareCase::INSENSITIVE_ASCII) \|\|	24 return (base::EndsWith(type, "/pdf", base::CompareCase::INSENSITIVE_ASCII) \|\|

73 base::EndsWith(type, ".pdf", base::CompareCase::INSENSITIVE_ASCII) \|\|	25 base::EndsWith(type, ".pdf", base::CompareCase::INSENSITIVE_ASCII) \|\|

74 base::EndsWith(type, "/x-pdf",	26 base::EndsWith(type, "/x-pdf",

75 base::CompareCase::INSENSITIVE_ASCII) \|\|	27 base::CompareCase::INSENSITIVE_ASCII) \|\|

76 base::EndsWith(type, "/*", base::CompareCase::INSENSITIVE_ASCII) \|\|	28 base::EndsWith(type, "/*", base::CompareCase::INSENSITIVE_ASCII) \|\|

77 base::EndsWith(type, "/acrobat",	29 base::EndsWith(type, "/acrobat",

78 base::CompareCase::INSENSITIVE_ASCII) \|\|	30 base::CompareCase::INSENSITIVE_ASCII) \|\|

79 base::EndsWith(type, "/unknown",	31 base::EndsWith(type, "/unknown",

80 base::CompareCase::INSENSITIVE_ASCII));	32 base::CompareCase::INSENSITIVE_ASCII));

81 }	33 }

82	34

83 } // namespace	35 } // namespace

84	36

85 DocumentLoader::Client::~Client() {	37 DocumentLoader::Client::~Client() {

86 }	38 }

87	39

	40 DocumentLoader::Chunk::Chunk() {}

	41

	42 DocumentLoader::Chunk::~Chunk() {}

	43

	44 void DocumentLoader::Chunk::Clear() {

	45 chunk_index = 0;

	46 data_size = 0;

	47 chunk_data.reset();

	48 }

	49

88 DocumentLoader::DocumentLoader(Client* client)	50 DocumentLoader::DocumentLoader(Client* client)

89 : client_(client), partial_document_(false), request_pending_(false),	51 : client_(client), loader_factory_(this) {}

90 current_pos_(0), current_chunk_size_(0), current_chunk_read_(0),

91 document_size_(0), header_request_(true), is_multipart_(false) {

92 loader_factory_.Initialize(this);

93 }

94	52

95 DocumentLoader::~DocumentLoader() {	53 DocumentLoader::~DocumentLoader() {

96 }	54 }

97	55

98 bool DocumentLoader::Init(const pp::URLLoader& loader,	56 bool DocumentLoader::Init(std::unique_ptr<URLLoaderWrapper> loader,

99 const std::string& url,	57 const std::string& url) {

100 const std::string& headers) {

101 DCHECK(url_.empty());	58 DCHECK(url_.empty());

	59 DCHECK(!loader_);

102 url_ = url;	60 url_ = url;

103 loader_ = loader;

104	61

105 std::string response_headers;	62 std::string type = loader->GetContentType();

106 if (!headers.empty()) {

107 response_headers = headers;

108 } else {

109 pp::URLResponseInfo response = loader_.GetResponseInfo();

110 pp::Var headers_var = response.GetHeaders();

111

112 if (headers_var.is_string()) {

113 response_headers = headers_var.AsString();

114 }

115 }

116

117 bool accept_ranges_bytes = false;

118 bool content_encoded = false;

119 uint32_t content_length = 0;

120 std::string type;

121 std::string disposition;

122	63

123 // This happens for PDFs not loaded from http(s) sources.	64 // This happens for PDFs not loaded from http(s) sources.

124 if (response_headers == "Content-Type: text/plain") {	65 if (type == "text/plain") {

125 if (!base::StartsWith(url, "http://",	66 if (!base::StartsWith(url, "http://",

126 base::CompareCase::INSENSITIVE_ASCII) &&	67 base::CompareCase::INSENSITIVE_ASCII) &&

127 !base::StartsWith(url, "https://",	68 !base::StartsWith(url, "https://",

128 base::CompareCase::INSENSITIVE_ASCII)) {	69 base::CompareCase::INSENSITIVE_ASCII)) {

129 type = "application/pdf";	70 type = "application/pdf";

130 }	71 }

131 }	72 }

132 if (type.empty() && !response_headers.empty()) {

133 net::HttpUtil::HeadersIterator it(response_headers.begin(),

134 response_headers.end(), "\n");

135 while (it.GetNext()) {

136 if (base::LowerCaseEqualsASCII(it.name(), "content-length")) {

137 content_length = atoi(it.values().c_str());

138 } else if (base::LowerCaseEqualsASCII(it.name(), "accept-ranges")) {

139 accept_ranges_bytes = base::LowerCaseEqualsASCII(it.values(), "bytes");

140 } else if (base::LowerCaseEqualsASCII(it.name(), "content-encoding")) {

141 content_encoded = true;

142 } else if (base::LowerCaseEqualsASCII(it.name(), "content-type")) {

143 type = it.values();

144 size_t semi_colon_pos = type.find(';');

145 if (semi_colon_pos != std::string::npos) {

146 type = type.substr(0, semi_colon_pos);

147 }

148 TrimWhitespaceASCII(type, base::TRIM_ALL, &type);

149 } else if (base::LowerCaseEqualsASCII(it.name(), "content-disposition")) {

150 disposition = it.values();

151 }

152 }

153 }

154 if (!type.empty() && !IsValidContentType(type))	73 if (!type.empty() && !IsValidContentType(type))

155 return false;	74 return false;

156 if (base::StartsWith(disposition, "attachment",	75

	76 if (base::StartsWith(loader->GetContentDisposition(), "attachment",

157 base::CompareCase::INSENSITIVE_ASCII))	77 base::CompareCase::INSENSITIVE_ASCII))

158 return false;	78 return false;

159	79

160 if (content_length > 0)	80 loader_ = std::move(loader);
	Lei Zhang 2016/10/21 09:33:09 Should \|url_\| be set here instead of above? Should \|url_\| be set here instead of above? snake 2016/10/21 15:13:14 Done. Show quoted text On 2016/10/21 09:33:09, Lei Zhang wrote: > Should \|url_\| be set here instead of above? Done.
161 chunk_stream_.Preallocate(content_length);

162	81

163 document_size_ = content_length;	82 if (!loader_->IsContentEncoded()) {

164 requests_count_ = 0;	83 chunk_stream_.set_eof_pos(std::max(0, loader_->GetContentLength()));

	84 }

	85 int64_t bytes_received = 0;

	86 int64_t total_bytes_to_be_received = 0;

	87 if (!chunk_stream_.eof_pos() &&

	88 loader_->GetDownloadProgress(&bytes_received,

	89 &total_bytes_to_be_received)) {

	90 chunk_stream_.set_eof_pos(

	91 std::max(0, static_cast<int>(total_bytes_to_be_received)));

	92 }

165	93

166 // Enable partial loading only if file size is above the threshold.	94 SetPartialLoadingEnabled(

167 // It will allow avoiding latency for multiple requests.	95 partial_loading_enabled_ &&

168 if (content_length > kMinFileSize &&	96 !base::StartsWith(url, "file://", base::CompareCase::INSENSITIVE_ASCII) &&

169 accept_ranges_bytes &&	97 loader_->IsAcceptRangesBytes() && !loader_->IsContentEncoded() &&

170 !content_encoded) {	98 GetDocumentSize());

171 LoadPartialDocument();	99

172 } else {	100 ReadMore();

173 LoadFullDocument();

174 }

175 return true;	101 return true;

176 }	102 }

177	103

178 void DocumentLoader::LoadPartialDocument() {	104 bool DocumentLoader::IsDocumentComplete() const {

179 // The current request is a full request (not a range request) so it starts at	105 return chunk_stream_.IsComplete();

180 // 0 and ends at \|document_size_\|.

181 current_chunk_size_ = document_size_;

182 current_pos_ = 0;

183 current_request_offset_ = 0;

184 current_request_size_ = 0;

185 current_request_extended_size_ = document_size_;

186 request_pending_ = true;

187

188 partial_document_ = true;

189 header_request_ = true;

190 ReadMore();

191 }	106 }

192	107

193 void DocumentLoader::LoadFullDocument() {	108 uint32_t DocumentLoader::GetDocumentSize() const {

194 partial_document_ = false;	109 return chunk_stream_.eof_pos();

195 chunk_buffer_.clear();

196 ReadMore();

197 }

198

199 bool DocumentLoader::IsDocumentComplete() const {

200 if (document_size_ == 0) // Document size unknown.

201 return false;

202 return IsDataAvailable(0, document_size_);

203 }

204

205 uint32_t DocumentLoader::GetAvailableData() const {

206 if (document_size_ == 0) { // If document size is unknown.

207 return current_pos_;

208 }

209

210 std::vector<std::pair<size_t, size_t> > ranges;

211 chunk_stream_.GetMissedRanges(0, document_size_, &ranges);

212 uint32_t available = document_size_;

213 for (const auto& range : ranges)

214 available -= range.second;

215 return available;

216 }	110 }

217	111

218 void DocumentLoader::ClearPendingRequests() {	112 void DocumentLoader::ClearPendingRequests() {

219 pending_requests_.erase(pending_requests_.begin(),	113 pending_requests_.Clear();

220 pending_requests_.end());

221 }	114 }

222	115

223 bool DocumentLoader::GetBlock(uint32_t position,	116 bool DocumentLoader::GetBlock(uint32_t position,

224 uint32_t size,	117 uint32_t size,

225 void* buf) const {	118 void* buf) const {

226 return chunk_stream_.ReadData(position, size, buf);	119 return chunk_stream_.ReadData(gfx::Range(position, position + size), buf);
	Lei Zhang 2016/10/21 09:33:09 Can "position + size" overflow? Can "position + size" overflow? snake 2016/10/21 15:13:15 yes (bacause this method is called from outside), Show quoted text On 2016/10/21 09:33:09, Lei Zhang wrote: > Can "position + size" overflow? yes (bacause this method is called from outside), but this is checking in ChunkStream::ReadData, and will return false in this case ( position > position+size) Lei Zhang 2016/10/25 19:26:00 Can we stop integer overflows here, i.e. earlier r Show quoted text On 2016/10/21 15:13:15, snake wrote: > On 2016/10/21 09:33:09, Lei Zhang wrote: > > Can "position + size" overflow? > > yes (bacause this method is called from outside), but this is checking in > ChunkStream::ReadData, and will return false in this case ( position > > position+size) Can we stop integer overflows here, i.e. earlier rather than later? You can use base/numerics/safe_math.h to make the checking a bit easier. Same for other places in this file. snake 2016/10/25 20:07:17 Done. Show quoted text On 2016/10/25 19:26:00, Lei Zhang wrote: > On 2016/10/21 15:13:15, snake wrote: > > On 2016/10/21 09:33:09, Lei Zhang wrote: > > > Can "position + size" overflow? > > > > yes (bacause this method is called from outside), but this is checking in > > ChunkStream::ReadData, and will return false in this case ( position > > > position+size) > > Can we stop integer overflows here, i.e. earlier rather than later? You can use > base/numerics/safe_math.h to make the checking a bit easier. > > Same for other places in this file. Done.
227 }	120 }

228	121

229 bool DocumentLoader::IsDataAvailable(uint32_t position, uint32_t size) const {	122 bool DocumentLoader::IsDataAvailable(uint32_t position, uint32_t size) const {

230 return chunk_stream_.IsRangeAvailable(position, size);	123 return chunk_stream_.IsRangeAvailable(gfx::Range(position, position + size));

231 }	124 }

232	125

233 void DocumentLoader::RequestData(uint32_t position, uint32_t size) {	126 void DocumentLoader::RequestData(uint32_t position, uint32_t size) {

234 DCHECK(partial_document_);	127 if (!size \|\| IsDataAvailable(position, size)) {

	128 return;

	129 }

	130

	131 if (GetDocumentSize() && (position + size > GetDocumentSize())) {

	132 return;

	133 }

235	134

236 // We have some artefact request from	135 // We have some artefact request from

237 // PDFiumEngine::OnDocumentComplete() -> FPDFAvail_IsPageAvail after	136 // PDFiumEngine::OnDocumentComplete() -> FPDFAvail_IsPageAvail after

238 // document is complete.	137 // document is complete.

239 // We need this fix in PDFIum. Adding this as a work around.	138 // We need this fix in PDFIum. Adding this as a work around.

240 // Bug: http://code.google.com/p/chromium/issues/detail?id=79996	139 // Bug: http://code.google.com/p/chromium/issues/detail?id=79996

241 // Test url:	140 // Test url:

242 // http://www.icann.org/en/correspondence/holtzman-to-jeffrey-02mar11-en.pdf	141 // http://www.icann.org/en/correspondence/holtzman-to-jeffrey-02mar11-en.pdf

243 if (IsDocumentComplete())	142 if (!loader_)

244 return;	143 return;

245	144

246 pending_requests_.push_back(std::pair<size_t, size_t>(position, size));	145 RangeSet requested_chunks(chunk_stream_.GetChunksRange(position, size));

247 DownloadPendingRequests();	146 requested_chunks.Subtract(chunk_stream_.filled_chunks());

248 }	147 if (requested_chunks.IsEmpty()) {

249

250 void DocumentLoader::RemoveCompletedRanges() {

251 // Split every request that has been partially downloaded already into smaller

252 // requests.

253 std::vector<std::pair<size_t, size_t> > ranges;

254 auto it = pending_requests_.begin();

255 while (it != pending_requests_.end()) {

256 chunk_stream_.GetMissedRanges(it->first, it->second, &ranges);

257 pending_requests_.insert(it, ranges.begin(), ranges.end());

258 ranges.clear();

259 pending_requests_.erase(it++);

260 }

261 }

262

263 void DocumentLoader::DownloadPendingRequests() {

264 if (request_pending_)

265 return;

266

267 uint32_t pos;

268 uint32_t size;

269 if (pending_requests_.empty()) {

270 // If the document is not complete and we have no outstanding requests,

271 // download what's left for as long as no other request gets added to

272 // \|pending_requests_\|.

273 pos = chunk_stream_.GetFirstMissingByte();

274 if (pos >= document_size_) {

275 // We're done downloading the document.

276 return;

277 }

278 // Start with size 0, we'll set \|current_request_extended_size_\| to > 0.

279 // This way this request will get cancelled as soon as the renderer wants

280 // another portion of the document.

281 size = 0;

282 } else {

283 RemoveCompletedRanges();

284

285 pos = pending_requests_.front().first;

286 size = pending_requests_.front().second;

287 if (IsDataAvailable(pos, size)) {

288 ReadComplete();

289 return;

290 }

291 }

292

293 size_t last_byte_before = chunk_stream_.GetFirstMissingByteInInterval(pos);

294 if (size < kDefaultRequestSize) {

295 // Try to extend before pos, up to size \|kDefaultRequestSize\|.

296 if (pos + size - last_byte_before > kDefaultRequestSize) {

297 pos += size - kDefaultRequestSize;

298 size = kDefaultRequestSize;

299 } else {

300 size += pos - last_byte_before;

301 pos = last_byte_before;

302 }

303 }

304 if (pos - last_byte_before < kDefaultRequestSize) {

305 // Don't leave a gap smaller than \|kDefaultRequestSize\|.

306 size += pos - last_byte_before;

307 pos = last_byte_before;

308 }

309

310 current_request_offset_ = pos;

311 current_request_size_ = size;

312

313 // Extend the request until the next downloaded byte or the end of the

314 // document.

315 size_t last_missing_byte =

316 chunk_stream_.GetLastMissingByteInInterval(pos + size - 1);

317 current_request_extended_size_ = last_missing_byte - pos + 1;

318

319 request_pending_ = true;

320

321 // Start downloading first pending request.

322 loader_.Close();

323 loader_ = client_->CreateURLLoader();

324 pp::CompletionCallback callback =

325 loader_factory_.NewCallback(&DocumentLoader::DidOpen);

326 pp::URLRequestInfo request = GetRequest(pos, current_request_extended_size_);

327 requests_count_++;

328 int rv = loader_.Open(request, callback);

329 if (rv != PP_OK_COMPLETIONPENDING)

330 callback.Run(rv);

331 }

332

333 pp::URLRequestInfo DocumentLoader::GetRequest(uint32_t position,

334 uint32_t size) const {

335 pp::URLRequestInfo request(client_->GetPluginInstance());

336 request.SetURL(url_);

337 request.SetMethod("GET");

338 request.SetFollowRedirects(true);

339 request.SetCustomReferrerURL(url_);

340

341 const size_t kBufSize = 100;

342 char buf[kBufSize];

343 // According to rfc2616, byte range specifies position of the first and last

344 // bytes in the requested range inclusively. Therefore we should subtract 1

345 // from the position + size, to get index of the last byte that needs to be

346 // downloaded.

347 base::snprintf(buf, kBufSize, "Range: bytes=%d-%d", position,

348 position + size - 1);

349 pp::Var header(buf);

350 request.SetHeaders(header);

351

352 return request;

353 }

354

355 void DocumentLoader::DidOpen(int32_t result) {

356 if (result != PP_OK) {

357 NOTREACHED();	148 NOTREACHED();

358 return;	149 return;

359 }	150 }

360	151 pending_requests_.Union(requested_chunks);

361 int32_t http_code = loader_.GetResponseInfo().GetStatusCode();	152 }

	153

	154 void DocumentLoader::SetPartialLoadingEnabled(bool enabled) {

	155 partial_loading_enabled_ = enabled;

	156 if (!enabled) {

	157 is_partial_loader_active_ = false;

	158 }

	159 }

	160

	161 bool DocumentLoader::ShouldCancelLoading() const {

	162 if (!loader_)

	163 return true;

	164 if (!partial_loading_enabled_ \|\| pending_requests_.IsEmpty())

	165 return false;

	166 const gfx::Range current_range(chunk_.chunk_index,

	167 chunk_.chunk_index + kChunkCloseDistance);

	168 return !pending_requests_.Intersects(current_range);

	169 }

	170

	171 void DocumentLoader::ContinueDownload() {

	172 if (!ShouldCancelLoading())

	173 return ReadMore();

	174 DCHECK(partial_loading_enabled_);

	175 DCHECK(!IsDocumentComplete());

	176 DCHECK(GetDocumentSize());

	177

	178 const uint32_t range_start =

	179 pending_requests_.IsEmpty() ? 0 : pending_requests_.First().start();

	180 RangeSet candidates_for_request(

	181 gfx::Range(range_start, chunk_stream_.total_chunks_count()));

	182 candidates_for_request.Subtract(chunk_stream_.filled_chunks());

	183 DCHECK(!candidates_for_request.IsEmpty());

	184 gfx::Range next_request = candidates_for_request.First();

	185 if (candidates_for_request.Size() == 1 &&

	186 next_request.length() < kChunkCloseDistance) {

	187 // We have only request at the end, try to enlarge it to improve back order

	188 // reading.

	189 const int additional_chunks_count =

	190 kChunkCloseDistance - next_request.length();

	191 int new_start = std::max(

	192 0, static_cast<int>(next_request.start()) - additional_chunks_count);

	193 candidates_for_request =

	194 RangeSet(gfx::Range(new_start, next_request.end()));

	195 candidates_for_request.Subtract(chunk_stream_.filled_chunks());

	196 next_request = candidates_for_request.Last();

	197 }

	198

	199 loader_.reset();

	200 chunk_.Clear();

	201 if (!is_partial_loader_active_) {

	202 client_->CancelBrowserDownload();

	203 is_partial_loader_active_ = true;

	204 }

	205

	206 const uint32_t start = next_request.start() * DataStream::kChunkSize;

	207 const uint32_t length =

	208 std::min(chunk_stream_.eof_pos() - start,

	209 next_request.length() * DataStream::kChunkSize);

	210

	211 loader_ = client_->CreateURLLoader();

	212

	213 loader_->OpenRange(

	214 url_, url_, start, length,

	215 loader_factory_.NewCallback(&DocumentLoader::DidOpenPartial));

	216 }

	217

	218 void DocumentLoader::DidOpenPartial(int32_t result) {

	219 if (result != PP_OK) {

	220 return ReadComplete();

	221 }

	222

	223 int32_t http_code = loader_->GetStatusCode();

362 if (http_code >= 400 && http_code < 500) {	224 if (http_code >= 400 && http_code < 500) {

363 // Error accessing resource. 4xx error indicate subsequent requests	225 // Error accessing resource. 4xx error indicate subsequent requests

364 // will fail too.	226 // will fail too.

365 // E.g. resource has been removed from the server while loading it.	227 // E.g. resource has been removed from the server while loading it.

366 // https://code.google.com/p/chromium/issues/detail?id=414827	228 return ReadComplete();

367 return;	229 }

368 }	230

369	231 // Leave position untouched for multiparted responce for now, when we read the

370 is_multipart_ = false;	232 // data we'll get it.

371 current_chunk_size_ = 0;	233 if (!loader_->IsMultipart()) {

372 current_chunk_read_ = 0;

373

374 pp::Var headers_var = loader_.GetResponseInfo().GetHeaders();

375 std::string headers;

376 if (headers_var.is_string())

377 headers = headers_var.AsString();

378

379 std::string boundary = GetMultiPartBoundary(headers);

380 if (!boundary.empty()) {

381 // Leave position untouched for now, when we read the data we'll get it.

382 is_multipart_ = true;

383 multipart_boundary_ = boundary;

384 } else {

385 // Need to make sure that the server returned a byte-range, since it's	234 // Need to make sure that the server returned a byte-range, since it's

386 // possible for a server to just ignore our byte-range request and just	235 // possible for a server to just ignore our byte-range request and just

387 // return the entire document even if it supports byte-range requests.	236 // return the entire document even if it supports byte-range requests.

388 // i.e. sniff response to	237 // i.e. sniff response to

389 // http://www.act.org/compass/sample/pdf/geometry.pdf	238 // http://www.act.org/compass/sample/pdf/geometry.pdf

390 current_pos_ = 0;	239 int start_pos, end_pos;
	Lei Zhang 2016/10/21 09:33:09 nit: 1 declaration per line please. nit: 1 declaration per line please. snake 2016/10/21 15:13:14 Done. Show quoted text On 2016/10/21 09:33:09, Lei Zhang wrote: > nit: 1 declaration per line please. Done.
391 uint32_t start_pos, end_pos;	240 if (loader_->GetByteRange(&start_pos, &end_pos)) {

392 if (GetByteRange(headers, &start_pos, &end_pos)) {	241 if (start_pos % DataStream::kChunkSize != 0) {

393 current_pos_ = start_pos;	242 return ReadComplete();

394 if (end_pos && end_pos > start_pos)	243 }

395 current_chunk_size_ = end_pos - start_pos + 1;	244 DCHECK(!chunk_.chunk_data);

	245 chunk_.chunk_index = chunk_stream_.GetChunkIndex(start_pos);

396 } else {	246 } else {

397 partial_document_ = false;	247 SetPartialLoadingEnabled(false);

398 }	248 }

399 }	249 return ContinueDownload();

400	250 }

401 ReadMore();	251 // Needs more data to calc chunk index.

	252 return ReadMore();

402 }	253 }

403	254

404 void DocumentLoader::ReadMore() {	255 void DocumentLoader::ReadMore() {

405 pp::CompletionCallback callback =	256 loader_->ReadResponseBody(

406 loader_factory_.NewCallback(&DocumentLoader::DidRead);	257 buffer_, sizeof(buffer_),

407 int rv = loader_.ReadResponseBody(buffer_, sizeof(buffer_), callback);	258 loader_factory_.NewCallback(&DocumentLoader::DidRead));

408 if (rv != PP_OK_COMPLETIONPENDING)

409 callback.Run(rv);

410 }	259 }

411	260

412 void DocumentLoader::DidRead(int32_t result) {	261 void DocumentLoader::DidRead(int32_t result) {

413 if (result <= 0) {	262 if (result < 0) {

414 // If \|result\| == PP_OK, the document was loaded, otherwise an error was	263 // An error occurred.

415 // encountered. Either way we want to stop processing the response. In the	264 // The renderer will detect that we're missing data and will display a

416 // case where an error occurred, the renderer will detect that we're missing	265 // message.

417 // data and will display a message.	266 return ReadComplete();

418 ReadComplete();	267 } else if (result == 0) {
	Lei Zhang 2016/10/21 09:33:09 nit: no else if after a return. nit: no else if after a return. snake 2016/10/21 15:13:15 Done. Show quoted text On 2016/10/21 09:33:09, Lei Zhang wrote: > nit: no else if after a return. Done.
419 return;	268 loader_.reset();

420 }	269 if (!is_partial_loader_active_)

421	270 return ReadComplete();

422 char* start = buffer_;	271 return ContinueDownload();

423 size_t length = result;	272 }

424 if (is_multipart_ && result > 2) {	273 if (loader_->IsMultipart()) {

425 for (int i = 2; i < result; ++i) {	274 int start_pos = 0;

426 if ((buffer_[i - 1] == '\n' && buffer_[i - 2] == '\n') \|\|	275 int end_pos = 0;

427 (i >= 4 && buffer_[i - 1] == '\n' && buffer_[i - 2] == '\r' &&	276 if (!loader_->GetByteRange(&start_pos, &end_pos)) {

428 buffer_[i - 3] == '\n' && buffer_[i - 4] == '\r')) {	277 return ReadComplete();

429 uint32_t start_pos, end_pos;	278 }

430 if (GetByteRange(std::string(buffer_, i), &start_pos, &end_pos)) {	279 DCHECK(!chunk_.chunk_data);

431 current_pos_ = start_pos;	280 chunk_.chunk_index = chunk_stream_.GetChunkIndex(start_pos);

432 start += i;	281 }

433 length -= i;	282 if (!SaveChunkData(buffer_, result)) {

434 if (end_pos && end_pos > start_pos)	283 return ReadMore();

435 current_chunk_size_ = end_pos - start_pos + 1;	284 }

436 }	285 if (IsDocumentComplete()) {

437 break;	286 return ReadComplete();

438 }	287 }

439 }	288 return ContinueDownload();

440	289 }

441 // Reset this flag so we don't look inside the buffer in future calls of	290

442 // DidRead for this response. Note that this code DOES NOT handle multi-	291 bool DocumentLoader::SaveChunkData(char* input, uint32_t input_size) {

443 // part responses with more than one part (we don't issue them at the	292 count_of_bytes_received_ += input_size;
	Lei Zhang 2016/10/21 09:33:09 Can this overflow? Can this overflow? snake 2016/10/21 15:13:15 No, count_of_bytes_received_ can not be great that Show quoted text On 2016/10/21 09:33:09, Lei Zhang wrote: > Can this overflow? No, count_of_bytes_received_ can not be great that the file size, and for it we was/is using uint32_t
444 // moment, so they shouldn't arrive).	293 bool chunk_saved = false;

445 is_multipart_ = false;	294 bool loading_pending_request = pending_requests_.Contains(chunk_.chunk_index);

446 }	295 while (input_size > 0) {

447	296 if (chunk_.data_size == 0) {

448 if (current_chunk_size_ && current_chunk_read_ + length > current_chunk_size_)	297 chunk_.chunk_data.reset(new DataStream::ChunkData());

449 length = current_chunk_size_ - current_chunk_read_;	298 }

450	299 const uint32_t new_chunk_data_len =

451 if (length) {	300 std::min(DataStream::kChunkSize - chunk_.data_size, input_size);

452 if (document_size_ > 0) {	301 memcpy(chunk_.chunk_data->data() + chunk_.data_size, input,

453 chunk_stream_.WriteData(current_pos_, start, length);	302 new_chunk_data_len);

454 } else {	303 chunk_.data_size += new_chunk_data_len;

455 // If we did not get content-length in the response, we can't	304 if (chunk_.data_size == DataStream::kChunkSize \|\|

456 // preallocate buffer for the entire document. Resizing array causing	305 chunk_stream_.eof_pos() ==

457 // memory fragmentation issues on the large files and OOM exceptions.	306 chunk_.chunk_index * DataStream::kChunkSize + chunk_.data_size) {

458 // To fix this, we collect all chunks of the file to the list and	307 chunk_stream_.SetChunkData(chunk_.chunk_index,

459 // concatenate them together after request is complete.	308 std::move(chunk_.chunk_data));

460 std::vector<unsigned char> buf(length);	309 pending_requests_.Subtract(

461 memcpy(buf.data(), start, length);	310 gfx::Range(chunk_.chunk_index, chunk_.chunk_index + 1));

462 chunk_buffer_.push_back(std::move(buf));	311 chunk_.data_size = 0;

463 }	312 ++(chunk_.chunk_index);

464 current_pos_ += length;	313 chunk_saved = true;

465 current_chunk_read_ += length;	314 }

	315

	316 input += new_chunk_data_len;

	317 input_size -= new_chunk_data_len;

	318 }

	319 if (IsDocumentComplete())

	320 return true;

	321 if (chunk_saved) {
	Lei Zhang 2016/10/21 09:33:09 if (!chunk_saved) return false; // do things re if (!chunk_saved) return false; // do things return true; snake 2016/10/21 15:13:14 Done. Show quoted text On 2016/10/21 09:33:09, Lei Zhang wrote: > if (!chunk_saved) > return false; > > // do things > return true; Done.
	322 if (loading_pending_request &&

	323 !pending_requests_.Contains(chunk_.chunk_index)) {

	324 client_->OnPendingRequestComplete();

	325 }

466 client_->OnNewDataAvailable();	326 client_->OnNewDataAvailable();

467 }	327 }

468	328 return chunk_saved;

469 // Only call the renderer if we allow partial loading.

470 if (!partial_document_) {

471 ReadMore();

472 return;

473 }

474

475 UpdateRendering();

476 RemoveCompletedRanges();

477

478 if (!pending_requests_.empty()) {

479 // If there are pending requests and the current content we're downloading

480 // doesn't satisfy any of these requests, cancel the current request to

481 // fullfill those more important requests.

482 bool satisfying_pending_request =

483 SatisfyingRequest(current_request_offset_, current_request_size_);

484 for (const auto& pending_request : pending_requests_) {

485 if (SatisfyingRequest(pending_request.first, pending_request.second)) {

486 satisfying_pending_request = true;

487 break;

488 }

489 }

490 // Cancel the request as it's not satisfying any request from the

491 // renderer, unless the current request is finished in which case we let

492 // it finish cleanly.

493 if (!satisfying_pending_request &&

494 current_pos_ <

495 current_request_offset_ + current_request_extended_size_) {

496 loader_.Close();

497 }

498 }

499

500 ReadMore();

501 }

502

503 bool DocumentLoader::SatisfyingRequest(size_t offset, size_t size) const {

504 return offset <= current_pos_ + kDefaultRequestSize &&

505 current_pos_ < offset + size;

506 }	329 }

507	330

508 void DocumentLoader::ReadComplete() {	331 void DocumentLoader::ReadComplete() {

509 if (!partial_document_) {	332 if (!GetDocumentSize()) {

510 if (document_size_ == 0) {	333 uint32_t eof =

511 // For the document with no 'content-length" specified we've collected all	334 chunk_.chunk_index * DataStream::kChunkSize + chunk_.data_size;

512 // the chunks already. Let's allocate final document buffer and copy them	335 if (!chunk_stream_.filled_chunks().IsEmpty()) {

513 // over.	336 eof = std::max(

514 chunk_stream_.Preallocate(current_pos_);	337 chunk_stream_.filled_chunks().Last().end() * DataStream::kChunkSize,

515 uint32_t pos = 0;	338 eof);

516 for (auto& chunk : chunk_buffer_) {	339 }

517 chunk_stream_.WriteData(pos, chunk.data(), chunk.size());	340 chunk_stream_.set_eof_pos(eof);

518 pos += chunk.size();	341 if (eof == chunk_.chunk_index * DataStream::kChunkSize + chunk_.data_size) {

519 }	342 chunk_stream_.SetChunkData(chunk_.chunk_index,

520 chunk_buffer_.clear();	343 std::move(chunk_.chunk_data));

521 }	344 }

522 document_size_ = current_pos_;	345 }

523 client_->OnDocumentComplete();	346 loader_.reset();

524 return;

525 }

526

527 request_pending_ = false;

528

529 if (IsDocumentComplete()) {	347 if (IsDocumentComplete()) {

530 client_->OnDocumentComplete();	348 client_->OnDocumentComplete();

531 return;	349 } else {

532 }	350 client_->OnDocumentCanceled();

533	351 }

534 UpdateRendering();	352 }

535 DownloadPendingRequests();	353

536 }	354 float DocumentLoader::GetProgress() const {

537	355 if (!GetDocumentSize())

538 void DocumentLoader::UpdateRendering() {	356 return -1;

539 if (header_request_)	357 if (IsDocumentComplete())

540 client_->OnPartialDocumentLoaded();	358 return 1;

541 else	359 return chunk_stream_.filled_chunks_count() * 1. /
	Lei Zhang 2016/10/21 09:33:09 Can you static_cast to a float instead of "* 1." ? Can you static_cast to a float instead of "* 1." ? snake 2016/10/21 15:13:14 Done. Show quoted text On 2016/10/21 09:33:09, Lei Zhang wrote: > Can you static_cast to a float instead of "* 1." ? Done.
542 client_->OnPendingRequestComplete();	360 chunk_stream_.total_chunks_count();

543 header_request_ = false;

544 }	361 }

545	362

546 } // namespace chrome_pdf	363 } // namespace chrome_pdf

OLD	NEW

« pdf/document_loader.h ('K') | « pdf/document_loader.h ('k') | pdf/document_loader_unittest.cc » ('j') | pdf/document_loader_unittest.cc » ('J')