Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(273)

Side by Side Diff: src/parsing/scanner-character-streams.cc

Issue 2663773002: [scanner] Fix bom handling (Closed)
Patch Set: Created 3 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « no previous file | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2011 the V8 project authors. All rights reserved. 1 // Copyright 2011 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "src/parsing/scanner-character-streams.h" 5 #include "src/parsing/scanner-character-streams.h"
6 6
7 #include "include/v8.h" 7 #include "include/v8.h"
8 #include "src/counters.h" 8 #include "src/counters.h"
9 #include "src/globals.h" 9 #include "src/globals.h"
10 #include "src/handles.h" 10 #include "src/handles.h"
11 #include "src/objects-inl.h" 11 #include "src/objects-inl.h"
12 #include "src/parsing/scanner.h" 12 #include "src/parsing/scanner.h"
13 #include "src/unicode-inl.h" 13 #include "src/unicode-inl.h"
14 14
15 namespace v8 { 15 namespace v8 {
16 namespace internal { 16 namespace internal {
17 17
18 namespace {
19 const unibrow::uchar kUtf8Bom = 0xfeff;
20 } // namespace
21
18 // ---------------------------------------------------------------------------- 22 // ----------------------------------------------------------------------------
19 // BufferedUtf16CharacterStreams 23 // BufferedUtf16CharacterStreams
20 // 24 //
21 // A buffered character stream based on a random access character 25 // A buffered character stream based on a random access character
22 // source (ReadBlock can be called with pos() pointing to any position, 26 // source (ReadBlock can be called with pos() pointing to any position,
23 // even positions before the current). 27 // even positions before the current).
24 class BufferedUtf16CharacterStream : public Utf16CharacterStream { 28 class BufferedUtf16CharacterStream : public Utf16CharacterStream {
25 public: 29 public:
26 BufferedUtf16CharacterStream(); 30 BufferedUtf16CharacterStream();
27 31
(...skipping 228 matching lines...) Expand 10 before | Expand all | Expand 10 after
256 const Chunk& chunk = chunks_[current_.chunk_no]; 260 const Chunk& chunk = chunks_[current_.chunk_no];
257 DCHECK(current_.pos.bytes >= chunk.start.bytes); 261 DCHECK(current_.pos.bytes >= chunk.start.bytes);
258 262
259 unibrow::Utf8::Utf8IncrementalBuffer incomplete_char = 263 unibrow::Utf8::Utf8IncrementalBuffer incomplete_char =
260 chunk.start.incomplete_char; 264 chunk.start.incomplete_char;
261 size_t it = current_.pos.bytes - chunk.start.bytes; 265 size_t it = current_.pos.bytes - chunk.start.bytes;
262 size_t chars = chunk.start.chars; 266 size_t chars = chunk.start.chars;
263 while (it < chunk.length && chars < position) { 267 while (it < chunk.length && chars < position) {
264 unibrow::uchar t = 268 unibrow::uchar t =
265 unibrow::Utf8::ValueOfIncremental(chunk.data[it], &incomplete_char); 269 unibrow::Utf8::ValueOfIncremental(chunk.data[it], &incomplete_char);
266 if (t != unibrow::Utf8::kIncomplete) { 270 if (t == kUtf8Bom && current_.pos.chars == 0) {
271 // BOM detected at beginning of the stream. Don't copy it.
vogelheim 2017/01/30 18:59:32 copy -> count. This loop doesn't copy anything; it
272 } else if (t != unibrow::Utf8::kIncomplete) {
267 chars++; 273 chars++;
268 if (t > unibrow::Utf16::kMaxNonSurrogateCharCode) chars++; 274 if (t > unibrow::Utf16::kMaxNonSurrogateCharCode) chars++;
269 } 275 }
270 it++; 276 it++;
271 } 277 }
272 278
273 current_.pos.bytes += it; 279 current_.pos.bytes += it;
274 current_.pos.chars = chars; 280 current_.pos.chars = chars;
275 current_.pos.incomplete_char = incomplete_char; 281 current_.pos.incomplete_char = incomplete_char;
276 current_.chunk_no += (it == chunk.length); 282 current_.chunk_no += (it == chunk.length);
(...skipping 20 matching lines...) Expand all
297 unibrow::Utf8::ValueOfIncrementalFinish(&current_.pos.incomplete_char); 303 unibrow::Utf8::ValueOfIncrementalFinish(&current_.pos.incomplete_char);
298 if (t != unibrow::Utf8::kBufferEmpty) { 304 if (t != unibrow::Utf8::kBufferEmpty) {
299 DCHECK(t < unibrow::Utf16::kMaxNonSurrogateCharCode); 305 DCHECK(t < unibrow::Utf16::kMaxNonSurrogateCharCode);
300 *cursor = static_cast<uc16>(t); 306 *cursor = static_cast<uc16>(t);
301 buffer_end_++; 307 buffer_end_++;
302 current_.pos.chars++; 308 current_.pos.chars++;
303 } 309 }
304 return; 310 return;
305 } 311 }
306 312
307 static const unibrow::uchar kUtf8Bom = 0xfeff;
308
309 unibrow::Utf8::Utf8IncrementalBuffer incomplete_char = 313 unibrow::Utf8::Utf8IncrementalBuffer incomplete_char =
310 current_.pos.incomplete_char; 314 current_.pos.incomplete_char;
311 size_t it; 315 size_t it;
312 for (it = current_.pos.bytes - chunk.start.bytes; 316 for (it = current_.pos.bytes - chunk.start.bytes;
313 it < chunk.length && cursor + 1 < buffer_start_ + kBufferSize; it++) { 317 it < chunk.length && cursor + 1 < buffer_start_ + kBufferSize; it++) {
314 unibrow::uchar t = 318 unibrow::uchar t =
315 unibrow::Utf8::ValueOfIncremental(chunk.data[it], &incomplete_char); 319 unibrow::Utf8::ValueOfIncremental(chunk.data[it], &incomplete_char);
316 if (t == unibrow::Utf8::kIncomplete) continue; 320 if (t == unibrow::Utf8::kIncomplete) continue;
317 if (V8_LIKELY(t < kUtf8Bom)) { 321 if (V8_LIKELY(t < kUtf8Bom)) {
318 *(cursor++) = static_cast<uc16>(t); // The by most frequent case. 322 *(cursor++) = static_cast<uc16>(t); // The by most frequent case.
(...skipping 532 matching lines...) Expand 10 before | Expand all | Expand 10 after
851 return new OneByteExternalStreamingStream(source_stream, stats); 855 return new OneByteExternalStreamingStream(source_stream, stats);
852 case v8::ScriptCompiler::StreamedSource::UTF8: 856 case v8::ScriptCompiler::StreamedSource::UTF8:
853 return new Utf8ExternalStreamingStream(source_stream, stats); 857 return new Utf8ExternalStreamingStream(source_stream, stats);
854 } 858 }
855 UNREACHABLE(); 859 UNREACHABLE();
856 return nullptr; 860 return nullptr;
857 } 861 }
858 862
859 } // namespace internal 863 } // namespace internal
860 } // namespace v8 864 } // namespace v8
OLDNEW
« no previous file with comments | « no previous file | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698