third_party/WebKit/Source/core/fetch/MultipartImageResourceParser.cpp - Issue 1693183002: Move multipart resource handling to core/fetch (1/2)

Side by Side Diff: third_party/WebKit/Source/core/fetch/MultipartImageResourceParser.cpp

Issue 1693183002: Move multipart resource handling to core/fetch (1/2) (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@multipart-cleanup-preliminary

Patch Set: Created 4 years, 10 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

« third_party/WebKit/Source/core/fetch/MultipartImageResourceParser.h ('K') | « third_party/WebKit/Source/core/fetch/MultipartImageResourceParser.h ('k') | third_party/WebKit/Source/core/fetch/MultipartImageResourceParserTest.cpp » ('j') | third_party/WebKit/Source/core/fetch/MultipartImageResourceParserTest.cpp » ('J')
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

OLD	NEW
(Empty)
	1 // Copyright 2016 The Chromium Authors. All rights reserved.

	2 // Use of this source code is governed by a BSD-style license that can be

	3 // found in the LICENSE file.

	4

	5 #include "core/fetch/MultipartImageResourceParser.h"

	6

	7 #include "public/platform/Platform.h"

	8 #include "public/platform/WebURLResponse.h"

	9 #include "wtf/NotFound.h"

	10 #include "wtf/StdLibExtras.h"

	11 #include "wtf/text/WTFString.h"

	12

	13 #include <algorithm>

	14

	15 namespace blink {

	16

	17 namespace {

	18

	19 // The list of response headers that we do not copy from the original

	20 // response when generating a WebURLResponse for a MIME payload.

	21 const char* kReplaceHeaders[] = {
	Nate Chapin 2016/02/25 21:48:29 It seems bad to have this array both here and web_ It seems bad to have this array both here and web_url_loader_impl.cc. Can we avoid that? yhirano 2016/02/26 22:21:50 Done. Show quoted text On 2016/02/25 21:48:29, Nate Chapin wrote: > It seems bad to have this array both here and web_url_loader_impl.cc. Can we > avoid that? Done.
	22 "content-type",

	23 "content-length",

	24 "content-disposition",

	25 "content-range",

	26 "range",

	27 "set-cookie"

	28 };

	29

	30 } // namespace

	31

	32 MultipartImageResourceParser::MultipartImageResourceParser(const ResourceRespons e& response, const Vector<char>& boundary, Client* client)

	33 : m_originalResponse(response)

	34 , m_boundary(boundary)

	35 , m_client(client)

	36 {

	37 // Some servers report a boundary prefixed with "--". See https://crbug.com /5786.

	38 if (m_boundary.size() < 2 \|\| m_boundary[0] != '-' \|\| m_boundary[1] != '-')

	39 m_boundary.prepend("--", 2);

	40 }

	41

	42 void MultipartImageResourceParser::addData(const char* bytes, size_t size)

	43 {

	44 // m_sawLastBoundary means that we've already received the final boundary

	45 // token. The server should stop sending us data at this point, but if it

	46 // does, we just throw it away.

	47 if (m_sawLastBoundary)

	48 return;

	49 m_data.append(bytes, size);

	50

	51 if (m_isParsingTop) {
	Nate Chapin 2016/02/25 21:48:29 This was call first_received_data_ before. That na This was call first_received_data_ before. That name seems more clear to me than m_isParsingTop. Am I missing something? yhirano 2016/02/26 22:21:50 I wondered what "first" means: it may be true in t Show quoted text On 2016/02/25 21:48:29, Nate Chapin wrote: > This was call first_received_data_ before. That name seems more clear to me than > m_isParsingTop. Am I missing something? I wondered what "first" means: it may be true in the second addData call. Is it good to rename it to isParsingFirstBoundary?
	52 // Eat leading \r\n

	53 int pos = pushOverLine(m_data, 0);

	54 if (pos)

	55 m_data.remove(0, pos);

	56

	57 if (m_data.size() < m_boundary.size() + 2) {

	58 // We don't have enough data yet to make a boundary token. Just

	59 // wait until the next chunk of data arrives.

	60 return;

	61 }

	62

	63 // Some servers don't send a boundary token before the first chunk of

	64 // data. We handle this case anyway (Gecko does too).

	65 if (0 != memcmp(m_data.data(), m_boundary.data(), m_boundary.size())) {

	66 m_data.prepend("\n", 1);

	67 const auto& boundary = m_boundary;

	68 m_data.prepend(boundary);

	69 }

	70 m_isParsingTop = false;

	71 }

	72 ASSERT(!m_isParsingTop);

	73

	74 // Headers

	75 if (m_isParsingHeaders) {

	76 // Eat leading \r\n

	77 int pos = pushOverLine(m_data, 0);

	78 if (pos)

	79 m_data.remove(0, pos);

	80

	81 if (parseHeaders()) {

	82 // Successfully parsed headers.

	83 m_isParsingHeaders = false;

	84 } else {

	85 // Get more data before trying again.

	86 return;

	87 }

	88 if (isCancelled())

	89 return;

	90 }

	91 ASSERT(!m_isParsingHeaders);

	92

	93 size_t boundaryPosition;

	94 while ((boundaryPosition = findBoundary(m_data, &m_boundary)) != kNotFound) {

	95 // Strip out trailing \r\n characters in the buffer preceding the

	96 // boundary on the same lines as Firefox.

	97 size_t dataSize = boundaryPosition;

	98 if (boundaryPosition > 0 && m_data[boundaryPosition - 1] == '\n') {

	99 dataSize--;

	100 if (boundaryPosition > 1 && m_data[boundaryPosition - 2] == '\r') {

	101 dataSize--;

	102 }

	103 }

	104 if (dataSize > 0) {

	105 m_client->didReceiveData(m_data.data(), dataSize);

	106 if (isCancelled())

	107 return;

	108 }

	109 size_t boundaryEndPosition = boundaryPosition + m_boundary.size();

	110 if (boundaryEndPosition < m_data.size() && '-' == m_data[boundaryEndPosi tion]) {

	111 // This was the last boundary so we can stop processing.

	112 m_sawLastBoundary = true;

	113 m_data.clear();

	114 return;

	115 }

	116

	117 // We can now throw out data up through the boundary

	118 int offset = pushOverLine(m_data, boundaryEndPosition);

	119 m_data.remove(0, boundaryEndPosition + offset);

	120

	121 // Ok, back to parsing headers

	122 if (!parseHeaders()) {

	123 m_isParsingHeaders = true;

	124 break;

	125 }

	126 }

	127

	128 // At this point, we should send over any data we have, but keep enough data

	129 // buffered to handle a boundary that may have been truncated.

	130 if (!m_isParsingHeaders && m_data.size() > m_boundary.size()) {

	131 // If the last character is a new line character, go ahead and just send

	132 // everything we have buffered. This matches an optimization in Gecko.

	133 size_t sendLength = m_data.size() - m_boundary.size();

	134 if (m_data.last() == '\n')

	135 sendLength = m_data.size();

	136 m_client->didReceiveData(m_data.data(), sendLength);

	137 m_data.remove(0, sendLength);

	138 }

	139 }

	140

	141 void MultipartImageResourceParser::finish()

	142 {

	143 ASSERT(!isCancelled());

	144 // If we have any pending data and we're not in a header, go ahead and send

	145 // it to the client.

	146 if (!m_isParsingHeaders && !m_data.isEmpty() && !m_sawLastBoundary)

	147 m_client->didReceiveData(m_data.data(), m_data.size());

	148 m_data.clear();

	149 m_sawLastBoundary = true;

	150 }

	151

	152 int MultipartImageResourceParser::pushOverLine(const Vector<char>& data, size_t pos)

	153 {

	154 int offset = 0;

	155 if (pos < data.size() && (data[pos] == '\r' \|\| data[pos] == '\n')) {

	156 ++offset;

	157 if (pos + 1 < data.size() && data[pos + 1] == '\n')

	158 ++offset;

	159 }

	160 return offset;

	161 }

	162

	163 bool MultipartImageResourceParser::parseHeaders()

	164 {

	165 // Create a WebURLResponse based on the original set of headers + the

	166 // replacement headers. We only replace the same few headers that gecko

	167 // does. See netwerk/streamconv/converters/nsMultiMixedConv.cpp.

	168 WebURLResponse response(m_originalResponse.url());

	169

	170 size_t end = 0;

	171 if (!Platform::current()->parseAdditionalHeaders(m_data.data(), m_data.size( ), &response, &end))

	172 return false;

	173 m_data.remove(0, end);

	174

	175 // Copy the response headers from the original response.

	176 for (const auto& header : m_originalResponse.httpHeaderFields()) {

	177 bool forbidden = false;

	178 for (size_t i = 0; !forbidden && i < WTF_ARRAY_LENGTH(kReplaceHeaders); ++i)

	179 forbidden = equalIgnoringCase(header.key, kReplaceHeaders[i]);

	180 if (!forbidden)

	181 response.addHTTPHeaderField(header.key, header.value);

	182 }

	183

	184 // To avoid recording every multipart load as a separate visit in

	185 // the history database, we want to keep track of whether the response

	186 // is part of a multipart payload. We do want to record the first visit,

	187 // so we only set isMultipartPayload to true after the first visit.

	188 response.setIsMultipartPayload(!m_isFirstPart);

	189 m_isFirstPart = false;

	190 // Send the response!

	191 m_client->didReceiveResponse(response.toResourceResponse());

	192

	193 return true;

	194 }

	195

	196 // Boundaries are supposed to be preceeded with --, but it looks like gecko

	197 // doesn't require the dashes to exist. See nsMultiMixedConv::FindToken.

	198 size_t MultipartImageResourceParser::findBoundary(const Vector<char>& data, Vect or<char>* boundary)

	199 {

	200 auto it = std::search(data.data(), data.data() + data.size(), boundary->data (), boundary->data() + boundary->size());

	201 if (it == data.data() + data.size())

	202 return kNotFound;

	203

	204 size_t boundaryPosition = it - data.data();

	205 // Back up over -- for backwards compat

	206 // TODO(tc): Don't we only want to do this once? Gecko code doesn't

	207 // seem to care.

	208 if (boundaryPosition >= 2) {

	209 if ('-' == data[boundaryPosition - 1] && '-' == data[boundaryPosition - 2]) {

	210 boundaryPosition -= 2;

	211 Vector<char> v(2, '-');

	212 v.appendVector(*boundary);

	213 *boundary = v;

	214 }

	215 }

	216 return boundaryPosition;

	217 }

	218

	219 DEFINE_TRACE(MultipartImageResourceParser)

	220 {

	221 visitor->trace(m_client);

	222 }

	223

	224 } // namespace blink

OLD	NEW