OLD | NEW |
(Empty) | |
| 1 // Copyright 2016 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. |
| 4 |
| 5 #include "core/fetch/MultipartImageResourceParser.h" |
| 6 |
| 7 #include "public/platform/Platform.h" |
| 8 #include "public/platform/WebURLResponse.h" |
| 9 #include "wtf/NotFound.h" |
| 10 #include "wtf/text/WTFString.h" |
| 11 |
| 12 #include <algorithm> |
| 13 |
| 14 namespace blink { |
| 15 |
| 16 MultipartImageResourceParser::MultipartImageResourceParser(const ResourceRespons
e& response, const Vector<char>& boundary, Client* client) |
| 17 : m_originalResponse(response) |
| 18 , m_boundary(boundary) |
| 19 , m_client(client) |
| 20 { |
| 21 // Some servers report a boundary prefixed with "--". See https://crbug.com
/5786. |
| 22 if (m_boundary.size() < 2 || m_boundary[0] != '-' || m_boundary[1] != '-') |
| 23 m_boundary.prepend("--", 2); |
| 24 } |
| 25 |
| 26 void MultipartImageResourceParser::appendData(const char* bytes, size_t size) |
| 27 { |
| 28 // m_sawLastBoundary means that we've already received the final boundary |
| 29 // token. The server should stop sending us data at this point, but if it |
| 30 // does, we just throw it away. |
| 31 if (m_sawLastBoundary) |
| 32 return; |
| 33 m_data.append(bytes, size); |
| 34 |
| 35 if (m_isParsingTop) { |
| 36 // Eat leading \r\n |
| 37 int pos = pushOverLine(m_data, 0); |
| 38 if (pos) |
| 39 m_data.remove(0, pos); |
| 40 |
| 41 if (m_data.size() < m_boundary.size() + 2) { |
| 42 // We don't have enough data yet to make a boundary token. Just |
| 43 // wait until the next chunk of data arrives. |
| 44 return; |
| 45 } |
| 46 |
| 47 // Some servers don't send a boundary token before the first chunk of |
| 48 // data. We handle this case anyway (Gecko does too). |
| 49 if (0 != memcmp(m_data.data(), m_boundary.data(), m_boundary.size())) { |
| 50 m_data.prepend("\n", 1); |
| 51 const auto& boundary = m_boundary; |
| 52 m_data.prepend(boundary); |
| 53 } |
| 54 m_isParsingTop = false; |
| 55 } |
| 56 ASSERT(!m_isParsingTop); |
| 57 |
| 58 // Headers |
| 59 if (m_isParsingHeaders) { |
| 60 // Eat leading \r\n |
| 61 int pos = pushOverLine(m_data, 0); |
| 62 if (pos) |
| 63 m_data.remove(0, pos); |
| 64 |
| 65 if (parseHeaders()) { |
| 66 // Successfully parsed headers. |
| 67 m_isParsingHeaders = false; |
| 68 } else { |
| 69 // Get more data before trying again. |
| 70 return; |
| 71 } |
| 72 if (isCancelled()) |
| 73 return; |
| 74 } |
| 75 ASSERT(!m_isParsingHeaders); |
| 76 |
| 77 size_t boundaryPosition; |
| 78 while ((boundaryPosition = findBoundary(m_data, &m_boundary)) != kNotFound)
{ |
| 79 // Strip out trailing \r\n characters in the buffer preceding the |
| 80 // boundary on the same lines as Firefox. |
| 81 size_t dataSize = boundaryPosition; |
| 82 if (boundaryPosition > 0 && m_data[boundaryPosition - 1] == '\n') { |
| 83 dataSize--; |
| 84 if (boundaryPosition > 1 && m_data[boundaryPosition - 2] == '\r') { |
| 85 dataSize--; |
| 86 } |
| 87 } |
| 88 if (dataSize > 0) { |
| 89 m_client->multipartDataReceived(m_data.data(), dataSize); |
| 90 if (isCancelled()) |
| 91 return; |
| 92 } |
| 93 size_t boundaryEndPosition = boundaryPosition + m_boundary.size(); |
| 94 if (boundaryEndPosition < m_data.size() && '-' == m_data[boundaryEndPosi
tion]) { |
| 95 // This was the last boundary so we can stop processing. |
| 96 m_sawLastBoundary = true; |
| 97 m_data.clear(); |
| 98 return; |
| 99 } |
| 100 |
| 101 // We can now throw out data up through the boundary |
| 102 int offset = pushOverLine(m_data, boundaryEndPosition); |
| 103 m_data.remove(0, boundaryEndPosition + offset); |
| 104 |
| 105 // Ok, back to parsing headers |
| 106 if (!parseHeaders()) { |
| 107 m_isParsingHeaders = true; |
| 108 break; |
| 109 } |
| 110 } |
| 111 |
| 112 // At this point, we should send over any data we have, but keep enough data |
| 113 // buffered to handle a boundary that may have been truncated. |
| 114 if (!m_isParsingHeaders && m_data.size() > m_boundary.size()) { |
| 115 // If the last character is a new line character, go ahead and just send |
| 116 // everything we have buffered. This matches an optimization in Gecko. |
| 117 size_t sendLength = m_data.size() - m_boundary.size(); |
| 118 if (m_data.last() == '\n') |
| 119 sendLength = m_data.size(); |
| 120 m_client->multipartDataReceived(m_data.data(), sendLength); |
| 121 m_data.remove(0, sendLength); |
| 122 } |
| 123 } |
| 124 |
| 125 void MultipartImageResourceParser::finish() |
| 126 { |
| 127 ASSERT(!isCancelled()); |
| 128 // If we have any pending data and we're not in a header, go ahead and send |
| 129 // it to the client. |
| 130 if (!m_isParsingHeaders && !m_data.isEmpty() && !m_sawLastBoundary) |
| 131 m_client->multipartDataReceived(m_data.data(), m_data.size()); |
| 132 m_data.clear(); |
| 133 m_sawLastBoundary = true; |
| 134 } |
| 135 |
| 136 int MultipartImageResourceParser::pushOverLine(const Vector<char>& data, size_t
pos) |
| 137 { |
| 138 int offset = 0; |
| 139 if (pos < data.size() && (data[pos] == '\r' || data[pos] == '\n')) { |
| 140 ++offset; |
| 141 if (pos + 1 < data.size() && data[pos + 1] == '\n') |
| 142 ++offset; |
| 143 } |
| 144 return offset; |
| 145 } |
| 146 |
| 147 bool MultipartImageResourceParser::parseHeaders() |
| 148 { |
| 149 // Create a WebURLResponse based on the original set of headers + the |
| 150 // replacement headers. We only replace the same few headers that gecko |
| 151 // does. See netwerk/streamconv/converters/nsMultiMixedConv.cpp. |
| 152 WebURLResponse response(m_originalResponse.url()); |
| 153 for (const auto& header : m_originalResponse.httpHeaderFields()) |
| 154 response.addHTTPHeaderField(header.key, header.value); |
| 155 |
| 156 size_t end = 0; |
| 157 if (!Platform::current()->parseMultipartHeadersFromBody(m_data.data(), m_dat
a.size(), &response, &end)) |
| 158 return false; |
| 159 m_data.remove(0, end); |
| 160 |
| 161 // To avoid recording every multipart load as a separate visit in |
| 162 // the history database, we want to keep track of whether the response |
| 163 // is part of a multipart payload. We do want to record the first visit, |
| 164 // so we only set isMultipartPayload to true after the first visit. |
| 165 response.setIsMultipartPayload(!m_isFirstPart); |
| 166 m_isFirstPart = false; |
| 167 // Send the response! |
| 168 m_client->onePartInMultipartReceived(response.toResourceResponse()); |
| 169 |
| 170 return true; |
| 171 } |
| 172 |
| 173 // Boundaries are supposed to be preceeded with --, but it looks like gecko |
| 174 // doesn't require the dashes to exist. See nsMultiMixedConv::FindToken. |
| 175 size_t MultipartImageResourceParser::findBoundary(const Vector<char>& data, Vect
or<char>* boundary) |
| 176 { |
| 177 auto it = std::search(data.data(), data.data() + data.size(), boundary->data
(), boundary->data() + boundary->size()); |
| 178 if (it == data.data() + data.size()) |
| 179 return kNotFound; |
| 180 |
| 181 size_t boundaryPosition = it - data.data(); |
| 182 // Back up over -- for backwards compat |
| 183 // TODO(tc): Don't we only want to do this once? Gecko code doesn't |
| 184 // seem to care. |
| 185 if (boundaryPosition >= 2) { |
| 186 if ('-' == data[boundaryPosition - 1] && '-' == data[boundaryPosition -
2]) { |
| 187 boundaryPosition -= 2; |
| 188 Vector<char> v(2, '-'); |
| 189 v.appendVector(*boundary); |
| 190 *boundary = v; |
| 191 } |
| 192 } |
| 193 return boundaryPosition; |
| 194 } |
| 195 |
| 196 DEFINE_TRACE(MultipartImageResourceParser) |
| 197 { |
| 198 visitor->trace(m_client); |
| 199 } |
| 200 |
| 201 } // namespace blink |
OLD | NEW |