OLD | NEW |
---|---|
1 // Copyright 2011 the V8 project authors. All rights reserved. | 1 // Copyright 2011 the V8 project authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "src/parsing/scanner-character-streams.h" | 5 #include "src/parsing/scanner-character-streams.h" |
6 | 6 |
7 #include "include/v8.h" | 7 #include "include/v8.h" |
8 #include "src/counters.h" | 8 #include "src/counters.h" |
9 #include "src/globals.h" | 9 #include "src/globals.h" |
10 #include "src/handles.h" | 10 #include "src/handles.h" |
11 #include "src/objects-inl.h" | 11 #include "src/objects-inl.h" |
12 #include "src/parsing/scanner.h" | 12 #include "src/parsing/scanner.h" |
13 #include "src/unicode-inl.h" | 13 #include "src/unicode-inl.h" |
14 | 14 |
15 namespace v8 { | 15 namespace v8 { |
16 namespace internal { | 16 namespace internal { |
17 | 17 |
18 namespace { | |
19 const unibrow::uchar kUtf8Bom = 0xfeff; | |
20 } // namespace | |
21 | |
18 // ---------------------------------------------------------------------------- | 22 // ---------------------------------------------------------------------------- |
19 // BufferedUtf16CharacterStreams | 23 // BufferedUtf16CharacterStreams |
20 // | 24 // |
21 // A buffered character stream based on a random access character | 25 // A buffered character stream based on a random access character |
22 // source (ReadBlock can be called with pos() pointing to any position, | 26 // source (ReadBlock can be called with pos() pointing to any position, |
23 // even positions before the current). | 27 // even positions before the current). |
24 class BufferedUtf16CharacterStream : public Utf16CharacterStream { | 28 class BufferedUtf16CharacterStream : public Utf16CharacterStream { |
25 public: | 29 public: |
26 BufferedUtf16CharacterStream(); | 30 BufferedUtf16CharacterStream(); |
27 | 31 |
(...skipping 228 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
256 const Chunk& chunk = chunks_[current_.chunk_no]; | 260 const Chunk& chunk = chunks_[current_.chunk_no]; |
257 DCHECK(current_.pos.bytes >= chunk.start.bytes); | 261 DCHECK(current_.pos.bytes >= chunk.start.bytes); |
258 | 262 |
259 unibrow::Utf8::Utf8IncrementalBuffer incomplete_char = | 263 unibrow::Utf8::Utf8IncrementalBuffer incomplete_char = |
260 chunk.start.incomplete_char; | 264 chunk.start.incomplete_char; |
261 size_t it = current_.pos.bytes - chunk.start.bytes; | 265 size_t it = current_.pos.bytes - chunk.start.bytes; |
262 size_t chars = chunk.start.chars; | 266 size_t chars = chunk.start.chars; |
263 while (it < chunk.length && chars < position) { | 267 while (it < chunk.length && chars < position) { |
264 unibrow::uchar t = | 268 unibrow::uchar t = |
265 unibrow::Utf8::ValueOfIncremental(chunk.data[it], &incomplete_char); | 269 unibrow::Utf8::ValueOfIncremental(chunk.data[it], &incomplete_char); |
266 if (t != unibrow::Utf8::kIncomplete) { | 270 if (t == kUtf8Bom && current_.pos.chars == 0) { |
271 // BOM detected at beginning of the stream. Don't copy it. | |
vogelheim
2017/01/30 18:59:32
copy -> count. This loop doesn't copy anything; it
| |
272 } else if (t != unibrow::Utf8::kIncomplete) { | |
267 chars++; | 273 chars++; |
268 if (t > unibrow::Utf16::kMaxNonSurrogateCharCode) chars++; | 274 if (t > unibrow::Utf16::kMaxNonSurrogateCharCode) chars++; |
269 } | 275 } |
270 it++; | 276 it++; |
271 } | 277 } |
272 | 278 |
273 current_.pos.bytes += it; | 279 current_.pos.bytes += it; |
274 current_.pos.chars = chars; | 280 current_.pos.chars = chars; |
275 current_.pos.incomplete_char = incomplete_char; | 281 current_.pos.incomplete_char = incomplete_char; |
276 current_.chunk_no += (it == chunk.length); | 282 current_.chunk_no += (it == chunk.length); |
(...skipping 20 matching lines...) Expand all Loading... | |
297 unibrow::Utf8::ValueOfIncrementalFinish(¤t_.pos.incomplete_char); | 303 unibrow::Utf8::ValueOfIncrementalFinish(¤t_.pos.incomplete_char); |
298 if (t != unibrow::Utf8::kBufferEmpty) { | 304 if (t != unibrow::Utf8::kBufferEmpty) { |
299 DCHECK(t < unibrow::Utf16::kMaxNonSurrogateCharCode); | 305 DCHECK(t < unibrow::Utf16::kMaxNonSurrogateCharCode); |
300 *cursor = static_cast<uc16>(t); | 306 *cursor = static_cast<uc16>(t); |
301 buffer_end_++; | 307 buffer_end_++; |
302 current_.pos.chars++; | 308 current_.pos.chars++; |
303 } | 309 } |
304 return; | 310 return; |
305 } | 311 } |
306 | 312 |
307 static const unibrow::uchar kUtf8Bom = 0xfeff; | |
308 | |
309 unibrow::Utf8::Utf8IncrementalBuffer incomplete_char = | 313 unibrow::Utf8::Utf8IncrementalBuffer incomplete_char = |
310 current_.pos.incomplete_char; | 314 current_.pos.incomplete_char; |
311 size_t it; | 315 size_t it; |
312 for (it = current_.pos.bytes - chunk.start.bytes; | 316 for (it = current_.pos.bytes - chunk.start.bytes; |
313 it < chunk.length && cursor + 1 < buffer_start_ + kBufferSize; it++) { | 317 it < chunk.length && cursor + 1 < buffer_start_ + kBufferSize; it++) { |
314 unibrow::uchar t = | 318 unibrow::uchar t = |
315 unibrow::Utf8::ValueOfIncremental(chunk.data[it], &incomplete_char); | 319 unibrow::Utf8::ValueOfIncremental(chunk.data[it], &incomplete_char); |
316 if (t == unibrow::Utf8::kIncomplete) continue; | 320 if (t == unibrow::Utf8::kIncomplete) continue; |
317 if (V8_LIKELY(t < kUtf8Bom)) { | 321 if (V8_LIKELY(t < kUtf8Bom)) { |
318 *(cursor++) = static_cast<uc16>(t); // The by most frequent case. | 322 *(cursor++) = static_cast<uc16>(t); // The by most frequent case. |
(...skipping 532 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
851 return new OneByteExternalStreamingStream(source_stream, stats); | 855 return new OneByteExternalStreamingStream(source_stream, stats); |
852 case v8::ScriptCompiler::StreamedSource::UTF8: | 856 case v8::ScriptCompiler::StreamedSource::UTF8: |
853 return new Utf8ExternalStreamingStream(source_stream, stats); | 857 return new Utf8ExternalStreamingStream(source_stream, stats); |
854 } | 858 } |
855 UNREACHABLE(); | 859 UNREACHABLE(); |
856 return nullptr; | 860 return nullptr; |
857 } | 861 } |
858 | 862 |
859 } // namespace internal | 863 } // namespace internal |
860 } // namespace v8 | 864 } // namespace v8 |
OLD | NEW |