pdf/document_loader.cc - Issue 2349753003: Improve linearized pdf load/show time.

Side by Side Diff: pdf/document_loader.cc

Issue 2349753003: Improve linearized pdf load/show time. (Closed)

Patch Set: Fix heap use after free. Created 4 years, 2 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
1 // Copyright (c) 2010 The Chromium Authors. All rights reserved.	1 // Copyright (c) 2010 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 #include "pdf/document_loader.h"	5 #include "pdf/document_loader.h"

6	6

7 #include <stddef.h>	7 #include <stddef.h>

8 #include <stdint.h>	8 #include <stdint.h>

9	9

	10 #include <algorithm>

	11

10 #include "base/logging.h"	12 #include "base/logging.h"

11 #include "base/strings/string_util.h"	13 #include "base/strings/string_util.h"

12 #include "net/http/http_util.h"	14 #include "pdf/url_loader_wrapper.h"

13 #include "ppapi/c/pp_errors.h"	15 #include "ppapi/c/pp_errors.h"

14 #include "ppapi/cpp/url_loader.h"	16 #include "ui/gfx/range/range.h"

15 #include "ppapi/cpp/url_request_info.h"

16 #include "ppapi/cpp/url_response_info.h"

17	17

18 namespace chrome_pdf {	18 namespace chrome_pdf {

19	19

20 namespace {	20 namespace {

21	21 const int kChunkCloseDistance = 10;

22 // If the headers have a byte-range response, writes the start and end

23 // positions and returns true if at least the start position was parsed.

24 // The end position will be set to 0 if it was not found or parsed from the

25 // response.

26 // Returns false if not even a start position could be parsed.

27 bool GetByteRange(const std::string& headers, uint32_t* start, uint32_t* end) {

28 net::HttpUtil::HeadersIterator it(headers.begin(), headers.end(), "\n");

29 while (it.GetNext()) {

30 if (base::LowerCaseEqualsASCII(it.name(), "content-range")) {

31 std::string range = it.values().c_str();

32 if (base::StartsWith(range, "bytes",

33 base::CompareCase::INSENSITIVE_ASCII)) {

34 range = range.substr(strlen("bytes"));

35 std::string::size_type pos = range.find('-');

36 std::string range_end;

37 if (pos != std::string::npos)

38 range_end = range.substr(pos + 1);

39 base::TrimWhitespaceASCII(range, base::TRIM_LEADING, &range);

40 base::TrimWhitespaceASCII(range_end, base::TRIM_LEADING, &range_end);

41 *start = atoi(range.c_str());

42 *end = atoi(range_end.c_str());

43 return true;

44 }

45 }

46 }

47 return false;

48 }

49

50 // If the headers have a multi-part response, returns the boundary name.

51 // Otherwise returns an empty string.

52 std::string GetMultiPartBoundary(const std::string& headers) {

53 net::HttpUtil::HeadersIterator it(headers.begin(), headers.end(), "\n");

54 while (it.GetNext()) {

55 if (base::LowerCaseEqualsASCII(it.name(), "content-type")) {

56 std::string type = base::ToLowerASCII(it.values());

57 if (base::StartsWith(type, "multipart/", base::CompareCase::SENSITIVE)) {

58 const char* boundary = strstr(type.c_str(), "boundary=");

59 if (!boundary) {

60 NOTREACHED();

61 break;

62 }

63

64 return std::string(boundary + 9);

65 }

66 }

67 }

68 return std::string();

69 }

70	22

71 bool IsValidContentType(const std::string& type) {	23 bool IsValidContentType(const std::string& type) {

72 return (base::EndsWith(type, "/pdf", base::CompareCase::INSENSITIVE_ASCII) \|\|	24 return (base::EndsWith(type, "/pdf", base::CompareCase::INSENSITIVE_ASCII) \|\|

73 base::EndsWith(type, ".pdf", base::CompareCase::INSENSITIVE_ASCII) \|\|	25 base::EndsWith(type, ".pdf", base::CompareCase::INSENSITIVE_ASCII) \|\|

74 base::EndsWith(type, "/x-pdf",	26 base::EndsWith(type, "/x-pdf",

75 base::CompareCase::INSENSITIVE_ASCII) \|\|	27 base::CompareCase::INSENSITIVE_ASCII) \|\|

76 base::EndsWith(type, "/*", base::CompareCase::INSENSITIVE_ASCII) \|\|	28 base::EndsWith(type, "/*", base::CompareCase::INSENSITIVE_ASCII) \|\|

77 base::EndsWith(type, "/acrobat",	29 base::EndsWith(type, "/acrobat",

78 base::CompareCase::INSENSITIVE_ASCII) \|\|	30 base::CompareCase::INSENSITIVE_ASCII) \|\|

79 base::EndsWith(type, "/unknown",	31 base::EndsWith(type, "/unknown",

80 base::CompareCase::INSENSITIVE_ASCII));	32 base::CompareCase::INSENSITIVE_ASCII));

81 }	33 }

82	34

83 } // namespace	35 } // namespace

84	36

	37

85 DocumentLoader::Client::~Client() {	38 DocumentLoader::Client::~Client() {

86 }	39 }

87	40

	41 DocumentLoader::Chunk::Chunk() {

	42 }

	43

	44 DocumentLoader::Chunk::~Chunk() {

	45 }

	46

	47 void DocumentLoader::Chunk::Clear() {

	48 chunk_index = 0;

	49 data_size = 0;

	50 chunk_data.reset();

	51 }

	52

88 DocumentLoader::DocumentLoader(Client* client)	53 DocumentLoader::DocumentLoader(Client* client)

89 : client_(client), partial_document_(false), request_pending_(false),	54 : client_(client), loader_factory_(this) {}

90 current_pos_(0), current_chunk_size_(0), current_chunk_read_(0),

91 document_size_(0), header_request_(true), is_multipart_(false) {

92 loader_factory_.Initialize(this);

93 }

94	55

95 DocumentLoader::~DocumentLoader() {	56 DocumentLoader::~DocumentLoader() {

96 }	57 }

97	58

98 bool DocumentLoader::Init(const pp::URLLoader& loader,	59 bool DocumentLoader::Init(std::unique_ptr<URLLoaderWrapper> loader,

99 const std::string& url,	60 const std::string& url) {

100 const std::string& headers) {

101 DCHECK(url_.empty());	61 DCHECK(url_.empty());

	62 DCHECK(!loader_);

102 url_ = url;	63 url_ = url;

103 loader_ = loader;

104	64

105 std::string response_headers;	65 std::string type = loader->GetContentType();

106 if (!headers.empty()) {

107 response_headers = headers;

108 } else {

109 pp::URLResponseInfo response = loader_.GetResponseInfo();

110 pp::Var headers_var = response.GetHeaders();

111

112 if (headers_var.is_string()) {

113 response_headers = headers_var.AsString();

114 }

115 }

116

117 bool accept_ranges_bytes = false;

118 bool content_encoded = false;

119 uint32_t content_length = 0;

120 std::string type;

121 std::string disposition;

122	66

123 // This happens for PDFs not loaded from http(s) sources.	67 // This happens for PDFs not loaded from http(s) sources.

124 if (response_headers == "Content-Type: text/plain") {	68 if (type == "content-type: text/plain") {
	spelchat 2016/10/13 18:26:54 How can this be "content-type: text/plain"? IIUC t How can this be "content-type: text/plain"? IIUC this will be "text/plain". snake 2016/10/14 11:31:17 Fixed Show quoted text On 2016/10/13 18:26:54, spelchat wrote: > How can this be "content-type: text/plain"? IIUC this will be "text/plain". Fixed
125 if (!base::StartsWith(url, "http://",	69 if (!base::StartsWith(url, "http://",

126 base::CompareCase::INSENSITIVE_ASCII) &&	70 base::CompareCase::INSENSITIVE_ASCII) &&

127 !base::StartsWith(url, "https://",	71 !base::StartsWith(url, "https://",

128 base::CompareCase::INSENSITIVE_ASCII)) {	72 base::CompareCase::INSENSITIVE_ASCII)) {

129 type = "application/pdf";	73 type = "application/pdf";

130 }	74 }

131 }	75 }

132 if (type.empty() && !response_headers.empty()) {

133 net::HttpUtil::HeadersIterator it(response_headers.begin(),

134 response_headers.end(), "\n");

135 while (it.GetNext()) {

136 if (base::LowerCaseEqualsASCII(it.name(), "content-length")) {

137 content_length = atoi(it.values().c_str());

138 } else if (base::LowerCaseEqualsASCII(it.name(), "accept-ranges")) {

139 accept_ranges_bytes = base::LowerCaseEqualsASCII(it.values(), "bytes");

140 } else if (base::LowerCaseEqualsASCII(it.name(), "content-encoding")) {

141 content_encoded = true;

142 } else if (base::LowerCaseEqualsASCII(it.name(), "content-type")) {

143 type = it.values();

144 size_t semi_colon_pos = type.find(';');

145 if (semi_colon_pos != std::string::npos) {

146 type = type.substr(0, semi_colon_pos);

147 }

148 TrimWhitespaceASCII(type, base::TRIM_ALL, &type);

149 } else if (base::LowerCaseEqualsASCII(it.name(), "content-disposition")) {

150 disposition = it.values();

151 }

152 }

153 }

154 if (!type.empty() && !IsValidContentType(type))	76 if (!type.empty() && !IsValidContentType(type))

155 return false;	77 return false;

156 if (base::StartsWith(disposition, "attachment",	78

	79 if (base::StartsWith(loader->GetContentDisposition(), "attachment",

157 base::CompareCase::INSENSITIVE_ASCII))	80 base::CompareCase::INSENSITIVE_ASCII))

158 return false;	81 return false;

159	82

160 if (content_length > 0)	83 loader_ = std::move(loader);

161 chunk_stream_.Preallocate(content_length);

162	84

163 document_size_ = content_length;	85 if (!loader_->IsContentEncoded()) {

164 requests_count_ = 0;	86 chunk_stream_.set_eof_pos(std::max(0, loader_->GetContentLength()));

	87 }

	88 int64_t bytes_received = 0;

	89 int64_t total_bytes_to_be_received = 0;

	90 if (!chunk_stream_.eof_pos() &&

	91 loader_->GetDownloadProgress(&bytes_received,

	92 &total_bytes_to_be_received)) {

	93 chunk_stream_.set_eof_pos(

	94 std::max(0, static_cast<int>(total_bytes_to_be_received)));

	95 }

165	96

166 // Enable partial loading only if file size is above the threshold.	97 SetPartialLoadingEnabled(

167 // It will allow avoiding latency for multiple requests.	98 partial_loading_enabled_ &&

168 if (content_length > kMinFileSize &&	99 !base::StartsWith(url, "file://", base::CompareCase::INSENSITIVE_ASCII) &&

169 accept_ranges_bytes &&	100 loader_->IsAcceptRangesBytes() && !loader_->IsContentEncoded() &&

170 !content_encoded) {	101 GetDocumentSize());

171 LoadPartialDocument();	102

172 } else {	103 ReadMore();

173 LoadFullDocument();

174 }

175 return true;	104 return true;

176 }	105 }

177	106

178 void DocumentLoader::LoadPartialDocument() {	107 bool DocumentLoader::IsDocumentComplete() const {

179 // The current request is a full request (not a range request) so it starts at	108 return chunk_stream_.IsComplete();

180 // 0 and ends at \|document_size_\|.

181 current_chunk_size_ = document_size_;

182 current_pos_ = 0;

183 current_request_offset_ = 0;

184 current_request_size_ = 0;

185 current_request_extended_size_ = document_size_;

186 request_pending_ = true;

187

188 partial_document_ = true;

189 header_request_ = true;

190 ReadMore();

191 }	109 }

192	110

193 void DocumentLoader::LoadFullDocument() {	111 uint32_t DocumentLoader::GetDocumentSize() const {

194 partial_document_ = false;	112 return chunk_stream_.eof_pos();

195 chunk_buffer_.clear();

196 ReadMore();

197 }

198

199 bool DocumentLoader::IsDocumentComplete() const {

200 if (document_size_ == 0) // Document size unknown.

201 return false;

202 return IsDataAvailable(0, document_size_);

203 }

204

205 uint32_t DocumentLoader::GetAvailableData() const {

206 if (document_size_ == 0) { // If document size is unknown.

207 return current_pos_;

208 }

209

210 std::vector<std::pair<size_t, size_t> > ranges;

211 chunk_stream_.GetMissedRanges(0, document_size_, &ranges);

212 uint32_t available = document_size_;

213 for (const auto& range : ranges)

214 available -= range.second;

215 return available;

216 }	113 }

217	114

218 void DocumentLoader::ClearPendingRequests() {	115 void DocumentLoader::ClearPendingRequests() {

219 pending_requests_.erase(pending_requests_.begin(),	116 pending_requests_.Clear();

220 pending_requests_.end());

221 }	117 }

222	118

223 bool DocumentLoader::GetBlock(uint32_t position,	119 bool DocumentLoader::GetBlock(uint32_t position,

224 uint32_t size,	120 uint32_t size,

225 void* buf) const {	121 void* buf) const {

226 return chunk_stream_.ReadData(position, size, buf);	122 return chunk_stream_.ReadData(gfx::Range(position, position + size), buf);

227 }	123 }

228	124

229 bool DocumentLoader::IsDataAvailable(uint32_t position, uint32_t size) const {	125 bool DocumentLoader::IsDataAvailable(uint32_t position, uint32_t size) const {

230 return chunk_stream_.IsRangeAvailable(position, size);	126 return chunk_stream_.IsRangeAvailable(gfx::Range(position, position + size));

231 }	127 }

232	128

233 void DocumentLoader::RequestData(uint32_t position, uint32_t size) {	129 void DocumentLoader::RequestData(uint32_t position, uint32_t size) {

234 DCHECK(partial_document_);	130 if (!size \|\| IsDataAvailable(position, size)) {

	131 return;

	132 }

	133

	134 if (GetDocumentSize() && (position + size > GetDocumentSize())) {

	135 return;

	136 }

235	137

236 // We have some artefact request from	138 // We have some artefact request from

237 // PDFiumEngine::OnDocumentComplete() -> FPDFAvail_IsPageAvail after	139 // PDFiumEngine::OnDocumentComplete() -> FPDFAvail_IsPageAvail after

238 // document is complete.	140 // document is complete.

239 // We need this fix in PDFIum. Adding this as a work around.	141 // We need this fix in PDFIum. Adding this as a work around.

240 // Bug: http://code.google.com/p/chromium/issues/detail?id=79996	142 // Bug: http://code.google.com/p/chromium/issues/detail?id=79996

241 // Test url:	143 // Test url:

242 // http://www.icann.org/en/correspondence/holtzman-to-jeffrey-02mar11-en.pdf	144 // http://www.icann.org/en/correspondence/holtzman-to-jeffrey-02mar11-en.pdf

243 if (IsDocumentComplete())	145 if (!loader_)

244 return;	146 return;

245	147

246 pending_requests_.push_back(std::pair<size_t, size_t>(position, size));	148 RangeSet requested_chunks(chunk_stream_.GetChunksRange(position, size));

247 DownloadPendingRequests();	149 requested_chunks.Subtract(chunk_stream_.filled_chunks());

248 }	150 if (requested_chunks.IsEmpty()) {

249

250 void DocumentLoader::RemoveCompletedRanges() {

251 // Split every request that has been partially downloaded already into smaller

252 // requests.

253 std::vector<std::pair<size_t, size_t> > ranges;

254 auto it = pending_requests_.begin();

255 while (it != pending_requests_.end()) {

256 chunk_stream_.GetMissedRanges(it->first, it->second, &ranges);

257 pending_requests_.insert(it, ranges.begin(), ranges.end());

258 ranges.clear();

259 pending_requests_.erase(it++);

260 }

261 }

262

263 void DocumentLoader::DownloadPendingRequests() {

264 if (request_pending_)

265 return;

266

267 uint32_t pos;

268 uint32_t size;

269 if (pending_requests_.empty()) {

270 // If the document is not complete and we have no outstanding requests,

271 // download what's left for as long as no other request gets added to

272 // \|pending_requests_\|.

273 pos = chunk_stream_.GetFirstMissingByte();

274 if (pos >= document_size_) {

275 // We're done downloading the document.

276 return;

277 }

278 // Start with size 0, we'll set \|current_request_extended_size_\| to > 0.

279 // This way this request will get cancelled as soon as the renderer wants

280 // another portion of the document.

281 size = 0;

282 } else {

283 RemoveCompletedRanges();

284

285 pos = pending_requests_.front().first;

286 size = pending_requests_.front().second;

287 if (IsDataAvailable(pos, size)) {

288 ReadComplete();

289 return;

290 }

291 }

292

293 size_t last_byte_before = chunk_stream_.GetFirstMissingByteInInterval(pos);

294 if (size < kDefaultRequestSize) {

295 // Try to extend before pos, up to size \|kDefaultRequestSize\|.

296 if (pos + size - last_byte_before > kDefaultRequestSize) {

297 pos += size - kDefaultRequestSize;

298 size = kDefaultRequestSize;

299 } else {

300 size += pos - last_byte_before;

301 pos = last_byte_before;

302 }

303 }

304 if (pos - last_byte_before < kDefaultRequestSize) {

305 // Don't leave a gap smaller than \|kDefaultRequestSize\|.

306 size += pos - last_byte_before;

307 pos = last_byte_before;

308 }

309

310 current_request_offset_ = pos;

311 current_request_size_ = size;

312

313 // Extend the request until the next downloaded byte or the end of the

314 // document.

315 size_t last_missing_byte =

316 chunk_stream_.GetLastMissingByteInInterval(pos + size - 1);

317 current_request_extended_size_ = last_missing_byte - pos + 1;

318

319 request_pending_ = true;

320

321 // Start downloading first pending request.

322 loader_.Close();

323 loader_ = client_->CreateURLLoader();

324 pp::CompletionCallback callback =

325 loader_factory_.NewCallback(&DocumentLoader::DidOpen);

326 pp::URLRequestInfo request = GetRequest(pos, current_request_extended_size_);

327 requests_count_++;

328 int rv = loader_.Open(request, callback);

329 if (rv != PP_OK_COMPLETIONPENDING)

330 callback.Run(rv);

331 }

332

333 pp::URLRequestInfo DocumentLoader::GetRequest(uint32_t position,

334 uint32_t size) const {

335 pp::URLRequestInfo request(client_->GetPluginInstance());

336 request.SetURL(url_);

337 request.SetMethod("GET");

338 request.SetFollowRedirects(true);

339 request.SetCustomReferrerURL(url_);

340

341 const size_t kBufSize = 100;

342 char buf[kBufSize];

343 // According to rfc2616, byte range specifies position of the first and last

344 // bytes in the requested range inclusively. Therefore we should subtract 1

345 // from the position + size, to get index of the last byte that needs to be

346 // downloaded.

347 base::snprintf(buf, kBufSize, "Range: bytes=%d-%d", position,

348 position + size - 1);

349 pp::Var header(buf);

350 request.SetHeaders(header);

351

352 return request;

353 }

354

355 void DocumentLoader::DidOpen(int32_t result) {

356 if (result != PP_OK) {

357 NOTREACHED();	151 NOTREACHED();

358 return;	152 return;

359 }	153 }

360	154 pending_requests_.Union(requested_chunks);

361 int32_t http_code = loader_.GetResponseInfo().GetStatusCode();	155 }

	156

	157 void DocumentLoader::SetPartialLoadingEnabled(bool enabled) {

	158 partial_loading_enabled_ = enabled;

	159 if (!enabled) {

	160 is_partial_loader_active_ = false;

	161 }

	162 }

	163

	164 bool DocumentLoader::ShouldCancelLoading() const {

	165 if (!loader_)

	166 return true;

	167 if (!partial_loading_enabled_ \|\| pending_requests_.IsEmpty())

	168 return false;

	169 const gfx::Range current_range(chunk_.chunk_index,

	170 chunk_.chunk_index + kChunkCloseDistance);

	171 return !pending_requests_.Intersects(current_range);

	172 }

	173

	174 void DocumentLoader::ContinueDownload() {

	175 if (!ShouldCancelLoading())

	176 return ReadMore();

	177 DCHECK(partial_loading_enabled_);

	178 DCHECK(!IsDocumentComplete());

	179 DCHECK(GetDocumentSize());

	180 RangeSet next_pending_requests(gfx::Range(

	181 pending_requests_.IsEmpty() ? 0 : pending_requests_.First().start(),

	182 chunk_stream_.total_chunks_count()));
	spelchat 2016/10/13 18:26:54 Can you split this in smaller chunks? It's a littl Can you split this in smaller chunks? It's a little hard to read. e.g. uint32_t range_start = pending_requests_.IsEmpty() ? 0 : pending_requests_.First().start() RangeSet next_pending_requests(gfx::Range(range_start, chunk_stream_.total_chunks_count())); snake 2016/10/14 11:31:17 Done Show quoted text On 2016/10/13 18:26:54, spelchat wrote: > Can you split this in smaller chunks? It's a little hard to read. > > e.g. > > uint32_t range_start = pending_requests_.IsEmpty() ? 0 : > pending_requests_.First().start() > RangeSet next_pending_requests(gfx::Range(range_start, > chunk_stream_.total_chunks_count())); Done
	183 next_pending_requests.Subtract(chunk_stream_.filled_chunks());

	184 DCHECK(!next_pending_requests.IsEmpty());

	185 gfx::Range pending_request = next_pending_requests.First();
	spelchat 2016/10/13 18:26:54 I would add DCHECK(pending_request.Contains(pendi I would add DCHECK(pending_request.Contains(pending_requests_.First())); for documentation snake 2016/10/14 11:31:17 pending_requests_ can be empty, but pending_reques Show quoted text On 2016/10/13 18:26:54, spelchat wrote: > I would add > > DCHECK(pending_request.Contains(pending_requests_.First())); > > for documentation pending_requests_ can be empty, but pending_request should not be empty anywhere. if pending_requests_ are empty, we are using gaps as pending_request.
	186 if (next_pending_requests.Size() == 1 &&

	187 pending_request.length() < kChunkCloseDistance) {

	188 // We have only request at the end, try to enlarge it to improve back order

	189 // reading.
	spelchat 2016/10/13 18:26:54 That comment confused me slightly. pending_reques That comment confused me slightly. pending_request has already been enlarged on the right side. If you're missing chunks [10,19] and the renderer requested [12,15], (i.e. pending_requests_.First() is [12,15]), then I think pending_request is [12,19]. This if branch also extends it toward the left if it's still smaller than kChunkCloseDistance. Is this correct? In particular does this mean that if the renderer requests [100,120] and you're missing [2,10000] then the next request that will go through will be for chunks [100,10000]. That request won't be cancelled if the renderer suddenly needs [2,5]? Can this unbounded expansion toward the right be a problem? snake 2016/10/14 11:31:17 This happens, only in case when requested data is Show quoted text On 2016/10/13 18:26:54, spelchat wrote: > That comment confused me slightly. > > pending_request has already been enlarged on the right side. > > If you're missing chunks [10,19] and the renderer requested [12,15], (i.e. > pending_requests_.First() is [12,15]), then I think pending_request is [12,19]. > This if branch also extends it toward the left if it's still smaller than > kChunkCloseDistance. This happens, only in case when requested data is beside to the and of non filled chunks (it is beside to end of last gap). For example: document have 100 chunks. kChunkCloseDistance = 10 Do request [97,98]-> Start Loading [89,99] // this case. Load Data [89,99] // Enlarged to left and right both Do request [50,55]-> Start Loading [50,89] // not this case (length > kChunkCloseDistance), Load Data [50,55] // Enlarged only to right. Do request [86,87]-> Start Loading [78,88] // this case. Load Data [78,88] // Enlarged to left and right both Do request [46,47]-> Start Loading [46,49] // not this case (next_pending_requests.Size()>1 i.e. next_pending_requests=[46,49],[56,77] ), Load Data [46,49]// Enlarged only to right. ... Show quoted text > > Is this correct? In particular does this mean that if the renderer requests > [100,120] and you're missing [2,10000] then the next request that will go > through will be for chunks [100,10000]. Correct. This request will be started, but can be canceled, and renderer will be notifyed when received exactly [100,120] (not [100,10000]). Show quoted text > That request won't be cancelled if the > renderer suddenly needs [2,5]? Incorrect. If renderer initiate new pending request, the current request will be canceled after download current chunk. (See DocumentLoader::ShouldCancelLoading) Show quoted text > > Can this unbounded expansion toward the right be a problem? > No. Сonversely it is main part of optimization, which decrease the reconnection count. For linearized documents we will read data linearly forward ( in common case), and will be still use same connection (Ideally browser initiated connection). For non linearized documents, main optimization is expansion toward the left, when we request data in back order. ( But i will enable async parsing for non linearized documents in next CLs, because needs more changes for it (this will be true fix for BUG=59400))
	190 const int additional_chunks_count =

	191 kChunkCloseDistance - pending_request.length();

	192 int new_start = std::max(

	193 0, static_cast<int>(pending_request.start()) - additional_chunks_count);

	194 next_pending_requests =

	195 RangeSet(gfx::Range(new_start, pending_request.end()));

	196 next_pending_requests.Subtract(chunk_stream_.filled_chunks());

	197 pending_request = next_pending_requests.Last();

	198 }

	199

	200 loader_.reset();

	201 chunk_.Clear();

	202 if (!is_partial_loader_active_) {

	203 client_->CancelBrowserDownload();

	204 is_partial_loader_active_ = true;

	205 }

	206

	207 const uint32_t start = pending_request.start() * DataStream::kChunkSize;

	208 const uint32_t length =

	209 std::min(chunk_stream_.eof_pos() - start,

	210 pending_request.length() * DataStream::kChunkSize);

	211

	212 loader_ = client_->CreateURLLoader();

	213

	214 loader_->OpenRange(

	215 url_, url_, start, length,

	216 loader_factory_.NewCallback(&DocumentLoader::DidOpenPartial));

	217 }

	218

	219 void DocumentLoader::DidOpenPartial(int32_t result) {

	220 if (result != PP_OK) {

	221 return ReadComplete();

	222 }

	223

	224 int32_t http_code = loader_->GetStatusCode();

362 if (http_code >= 400 && http_code < 500) {	225 if (http_code >= 400 && http_code < 500) {

363 // Error accessing resource. 4xx error indicate subsequent requests	226 // Error accessing resource. 4xx error indicate subsequent requests

364 // will fail too.	227 // will fail too.

365 // E.g. resource has been removed from the server while loading it.	228 // E.g. resource has been removed from the server while loading it.

366 // https://code.google.com/p/chromium/issues/detail?id=414827	229 return ReadComplete();

367 return;	230 }

368 }	231

369	232 // Leave position untouched for multiparted responce for now, when we read the

370 is_multipart_ = false;	233 // data we'll get it.

371 current_chunk_size_ = 0;	234 if (!loader_->IsMultipart()) {

372 current_chunk_read_ = 0;

373

374 pp::Var headers_var = loader_.GetResponseInfo().GetHeaders();

375 std::string headers;

376 if (headers_var.is_string())

377 headers = headers_var.AsString();

378

379 std::string boundary = GetMultiPartBoundary(headers);

380 if (!boundary.empty()) {

381 // Leave position untouched for now, when we read the data we'll get it.

382 is_multipart_ = true;

383 multipart_boundary_ = boundary;

384 } else {

385 // Need to make sure that the server returned a byte-range, since it's	235 // Need to make sure that the server returned a byte-range, since it's

386 // possible for a server to just ignore our byte-range request and just	236 // possible for a server to just ignore our byte-range request and just

387 // return the entire document even if it supports byte-range requests.	237 // return the entire document even if it supports byte-range requests.

388 // i.e. sniff response to	238 // i.e. sniff response to

389 // http://www.act.org/compass/sample/pdf/geometry.pdf	239 // http://www.act.org/compass/sample/pdf/geometry.pdf

390 current_pos_ = 0;	240 int start_pos, end_pos;

391 uint32_t start_pos, end_pos;	241 if (loader_->GetByteRange(&start_pos, &end_pos)) {

392 if (GetByteRange(headers, &start_pos, &end_pos)) {	242 if (start_pos % DataStream::kChunkSize != 0) {

393 current_pos_ = start_pos;	243 return ReadComplete();

394 if (end_pos && end_pos > start_pos)	244 }

395 current_chunk_size_ = end_pos - start_pos + 1;	245 DCHECK(!chunk_.chunk_data);

	246 chunk_.chunk_index = chunk_stream_.GetChunkIndex(start_pos);

396 } else {	247 } else {

397 partial_document_ = false;	248 SetPartialLoadingEnabled(false);

398 }	249 }

399 }	250 return ContinueDownload();

400	251 }

401 ReadMore();	252 // Needs more data to calc chunk index.

	253 return ReadMore();

402 }	254 }

403	255

404 void DocumentLoader::ReadMore() {	256 void DocumentLoader::ReadMore() {

405 pp::CompletionCallback callback =	257 loader_->ReadResponseBody(

406 loader_factory_.NewCallback(&DocumentLoader::DidRead);	258 buffer_, sizeof(buffer_),

407 int rv = loader_.ReadResponseBody(buffer_, sizeof(buffer_), callback);	259 loader_factory_.NewCallback(&DocumentLoader::DidRead));

408 if (rv != PP_OK_COMPLETIONPENDING)

409 callback.Run(rv);

410 }	260 }

411	261

412 void DocumentLoader::DidRead(int32_t result) {	262 void DocumentLoader::DidRead(int32_t result) {

413 if (result <= 0) {	263 if (result < 0) {

414 // If \|result\| == PP_OK, the document was loaded, otherwise an error was	264 return ReadComplete();
	spelchat 2016/10/12 00:35:31 Can you restore the comment that was lost here? It Can you restore the comment that was lost here? It's not obvious here that we're handling the error case correctly. E.g. // An error occurred. The renderer will detect that // we're missing data and will display a message. snake 2016/10/14 11:31:17 Done Show quoted text On 2016/10/12 00:35:31, spelchat wrote: > Can you restore the comment that was lost here? It's not obvious here that we're > handling the error case correctly. > > E.g. > // An error occurred. The renderer will detect that > // we're missing data and will display a message. Done
415 // encountered. Either way we want to stop processing the response. In the	265 } else if (result == 0) {

416 // case where an error occurred, the renderer will detect that we're missing	266 loader_.reset();

417 // data and will display a message.	267 if (!is_partial_loader_active_)

418 ReadComplete();	268 return ReadComplete();

419 return;	269 return ContinueDownload();

420 }	270 }

421	271 if (loader_->IsMultipart()) {

422 char* start = buffer_;	272 int start_pos = 0;

423 size_t length = result;	273 int end_pos = 0;

424 if (is_multipart_ && result > 2) {	274 if (!loader_->GetByteRange(&start_pos, &end_pos)) {

425 for (int i = 2; i < result; ++i) {	275 return ReadComplete();

426 if ((buffer_[i - 1] == '\n' && buffer_[i - 2] == '\n') \|\|	276 }

427 (i >= 4 && buffer_[i - 1] == '\n' && buffer_[i - 2] == '\r' &&	277 DCHECK(!chunk_.chunk_data);

428 buffer_[i - 3] == '\n' && buffer_[i - 4] == '\r')) {	278 chunk_.chunk_index = chunk_stream_.GetChunkIndex(start_pos);

429 uint32_t start_pos, end_pos;	279 }

430 if (GetByteRange(std::string(buffer_, i), &start_pos, &end_pos)) {	280 if (!SaveChunkData(buffer_, result)) {

431 current_pos_ = start_pos;	281 return ReadMore();

432 start += i;	282 }

433 length -= i;	283 if (IsDocumentComplete()) {

434 if (end_pos && end_pos > start_pos)	284 return ReadComplete();

435 current_chunk_size_ = end_pos - start_pos + 1;	285 }

436 }	286 return ContinueDownload();

437 break;	287 }

438 }	288

439 }	289 bool DocumentLoader::SaveChunkData(char* input, uint32_t input_size) {

440	290 bool chunk_saved = false;

441 // Reset this flag so we don't look inside the buffer in future calls of	291 bool loading_pending_request = pending_requests_.Contains(chunk_.chunk_index);

442 // DidRead for this response. Note that this code DOES NOT handle multi-	292 while (input_size > 0) {

443 // part responses with more than one part (we don't issue them at the	293 if (chunk_.data_size == 0) {

444 // moment, so they shouldn't arrive).	294 chunk_.chunk_data.reset(new DataStream::ChunkData());

445 is_multipart_ = false;	295 }

446 }	296 const uint32_t new_chunk_data_len =

447	297 std::min(DataStream::kChunkSize - chunk_.data_size, input_size);

448 if (current_chunk_size_ && current_chunk_read_ + length > current_chunk_size_)	298 memcpy(chunk_.chunk_data->data() + chunk_.data_size, input,

449 length = current_chunk_size_ - current_chunk_read_;	299 new_chunk_data_len);

450	300 chunk_.data_size += new_chunk_data_len;

451 if (length) {	301 if (chunk_.data_size == DataStream::kChunkSize \|\|

452 if (document_size_ > 0) {	302 chunk_stream_.eof_pos() ==

453 chunk_stream_.WriteData(current_pos_, start, length);	303 chunk_.chunk_index * DataStream::kChunkSize + chunk_.data_size) {

454 } else {	304 chunk_stream_.SetChunkData(chunk_.chunk_index,

455 // If we did not get content-length in the response, we can't	305 std::move(chunk_.chunk_data));

456 // preallocate buffer for the entire document. Resizing array causing	306 pending_requests_.Subtract(

457 // memory fragmentation issues on the large files and OOM exceptions.	307 gfx::Range(chunk_.chunk_index, chunk_.chunk_index + 1));

458 // To fix this, we collect all chunks of the file to the list and	308 chunk_.data_size = 0;

459 // concatenate them together after request is complete.	309 ++(chunk_.chunk_index);

460 std::vector<unsigned char> buf(length);	310 chunk_saved = true;

461 memcpy(buf.data(), start, length);	311 }

462 chunk_buffer_.push_back(std::move(buf));	312

463 }	313 input += new_chunk_data_len;

464 current_pos_ += length;	314 input_size -= new_chunk_data_len;

465 current_chunk_read_ += length;	315 }

	316 if (IsDocumentComplete())

	317 return true;

	318 if (chunk_saved) {

	319 if (loading_pending_request &&

	320 !pending_requests_.Contains(chunk_.chunk_index)) {

	321 client_->OnPendingRequestComplete();

	322 }

466 client_->OnNewDataAvailable();	323 client_->OnNewDataAvailable();

467 }	324 }

468	325 return chunk_saved;

469 // Only call the renderer if we allow partial loading.

470 if (!partial_document_) {

471 ReadMore();

472 return;

473 }

474

475 UpdateRendering();

476 RemoveCompletedRanges();

477

478 if (!pending_requests_.empty()) {

479 // If there are pending requests and the current content we're downloading

480 // doesn't satisfy any of these requests, cancel the current request to

481 // fullfill those more important requests.

482 bool satisfying_pending_request =

483 SatisfyingRequest(current_request_offset_, current_request_size_);

484 for (const auto& pending_request : pending_requests_) {

485 if (SatisfyingRequest(pending_request.first, pending_request.second)) {

486 satisfying_pending_request = true;

487 break;

488 }

489 }

490 // Cancel the request as it's not satisfying any request from the

491 // renderer, unless the current request is finished in which case we let

492 // it finish cleanly.

493 if (!satisfying_pending_request &&

494 current_pos_ <

495 current_request_offset_ + current_request_extended_size_) {

496 loader_.Close();

497 }

498 }

499

500 ReadMore();

501 }

502

503 bool DocumentLoader::SatisfyingRequest(size_t offset, size_t size) const {

504 return offset <= current_pos_ + kDefaultRequestSize &&

505 current_pos_ < offset + size;

506 }	326 }

507	327

508 void DocumentLoader::ReadComplete() {	328 void DocumentLoader::ReadComplete() {

509 if (!partial_document_) {	329 if (!GetDocumentSize()) {

510 if (document_size_ == 0) {	330 uint32_t eof =

511 // For the document with no 'content-length" specified we've collected all	331 chunk_.chunk_index * DataStream::kChunkSize + chunk_.data_size;

512 // the chunks already. Let's allocate final document buffer and copy them	332 if (!chunk_stream_.filled_chunks().IsEmpty()) {

513 // over.	333 eof = std::max(

514 chunk_stream_.Preallocate(current_pos_);	334 chunk_stream_.filled_chunks().Last().end() * DataStream::kChunkSize,

515 uint32_t pos = 0;	335 eof);

516 for (auto& chunk : chunk_buffer_) {	336 }

517 chunk_stream_.WriteData(pos, chunk.data(), chunk.size());	337 chunk_stream_.set_eof_pos(eof);

518 pos += chunk.size();	338 if (eof == chunk_.chunk_index * DataStream::kChunkSize + chunk_.data_size) {

519 }	339 chunk_stream_.SetChunkData(chunk_.chunk_index,

520 chunk_buffer_.clear();	340 std::move(chunk_.chunk_data));

521 }	341 }

522 document_size_ = current_pos_;	342 }

523 client_->OnDocumentComplete();	343 loader_.reset();

524 return;

525 }

526

527 request_pending_ = false;

528

529 if (IsDocumentComplete()) {	344 if (IsDocumentComplete()) {

530 client_->OnDocumentComplete();	345 client_->OnDocumentComplete();

531 return;	346 } else {

532 }	347 client_->OnDocumentCanceled();

533	348 }

534 UpdateRendering();	349 }

535 DownloadPendingRequests();	350

536 }	351 float DocumentLoader::GetProgress() const {

537	352 if (!GetDocumentSize())

538 void DocumentLoader::UpdateRendering() {	353 return -1;

539 if (header_request_)	354 if (IsDocumentComplete())

540 client_->OnPartialDocumentLoaded();	355 return 1;

541 else	356 return chunk_stream_.filled_chunks_count() * 1. /

542 client_->OnPendingRequestComplete();	357 chunk_stream_.total_chunks_count();

543 header_request_ = false;

544 }	358 }

545	359

546 } // namespace chrome_pdf	360 } // namespace chrome_pdf

OLD	NEW

« pdf/document_loader.h ('K') | « pdf/document_loader.h ('k') | pdf/document_loader_unittest.cc » ('j') | no next file with comments »