| OLD | NEW |
| 1 // Copyright 2011 the V8 project authors. All rights reserved. | 1 // Copyright 2011 the V8 project authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "src/parsing/scanner-character-streams.h" | 5 #include "src/parsing/scanner-character-streams.h" |
| 6 | 6 |
| 7 #include "include/v8.h" | 7 #include "include/v8.h" |
| 8 #include "src/globals.h" | 8 #include "src/globals.h" |
| 9 #include "src/handles.h" | 9 #include "src/handles.h" |
| 10 #include "src/objects-inl.h" | 10 #include "src/objects-inl.h" |
| 11 #include "src/parsing/scanner.h" | 11 #include "src/parsing/scanner.h" |
| 12 #include "src/unicode-inl.h" | 12 #include "src/unicode-inl.h" |
| 13 | 13 |
| 14 namespace v8 { | 14 namespace v8 { |
| 15 namespace internal { | 15 namespace internal { |
| 16 | 16 |
| 17 namespace { |
| 18 const unibrow::uchar kUtf8Bom = 0xfeff; |
| 19 } // namespace |
| 20 |
| 17 // ---------------------------------------------------------------------------- | 21 // ---------------------------------------------------------------------------- |
| 18 // BufferedUtf16CharacterStreams | 22 // BufferedUtf16CharacterStreams |
| 19 // | 23 // |
| 20 // A buffered character stream based on a random access character | 24 // A buffered character stream based on a random access character |
| 21 // source (ReadBlock can be called with pos() pointing to any position, | 25 // source (ReadBlock can be called with pos() pointing to any position, |
| 22 // even positions before the current). | 26 // even positions before the current). |
| 23 class BufferedUtf16CharacterStream : public Utf16CharacterStream { | 27 class BufferedUtf16CharacterStream : public Utf16CharacterStream { |
| 24 public: | 28 public: |
| 25 BufferedUtf16CharacterStream(); | 29 BufferedUtf16CharacterStream(); |
| 26 | 30 |
| (...skipping 225 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 252 const Chunk& chunk = chunks_[current_.chunk_no]; | 256 const Chunk& chunk = chunks_[current_.chunk_no]; |
| 253 DCHECK(current_.pos.bytes >= chunk.start.bytes); | 257 DCHECK(current_.pos.bytes >= chunk.start.bytes); |
| 254 | 258 |
| 255 unibrow::Utf8::Utf8IncrementalBuffer incomplete_char = | 259 unibrow::Utf8::Utf8IncrementalBuffer incomplete_char = |
| 256 chunk.start.incomplete_char; | 260 chunk.start.incomplete_char; |
| 257 size_t it = current_.pos.bytes - chunk.start.bytes; | 261 size_t it = current_.pos.bytes - chunk.start.bytes; |
| 258 size_t chars = chunk.start.chars; | 262 size_t chars = chunk.start.chars; |
| 259 while (it < chunk.length && chars < position) { | 263 while (it < chunk.length && chars < position) { |
| 260 unibrow::uchar t = | 264 unibrow::uchar t = |
| 261 unibrow::Utf8::ValueOfIncremental(chunk.data[it], &incomplete_char); | 265 unibrow::Utf8::ValueOfIncremental(chunk.data[it], &incomplete_char); |
| 262 if (t != unibrow::Utf8::kIncomplete) { | 266 if (t == kUtf8Bom && current_.pos.chars == 0) { |
| 267 // BOM detected at beginning of the stream. Don't copy it. |
| 268 } else if (t != unibrow::Utf8::kIncomplete) { |
| 263 chars++; | 269 chars++; |
| 264 if (t > unibrow::Utf16::kMaxNonSurrogateCharCode) chars++; | 270 if (t > unibrow::Utf16::kMaxNonSurrogateCharCode) chars++; |
| 265 } | 271 } |
| 266 it++; | 272 it++; |
| 267 } | 273 } |
| 268 | 274 |
| 269 current_.pos.bytes += it; | 275 current_.pos.bytes += it; |
| 270 current_.pos.chars = chars; | 276 current_.pos.chars = chars; |
| 271 current_.pos.incomplete_char = incomplete_char; | 277 current_.pos.incomplete_char = incomplete_char; |
| 272 current_.chunk_no += (it == chunk.length); | 278 current_.chunk_no += (it == chunk.length); |
| (...skipping 20 matching lines...) Expand all Loading... |
| 293 unibrow::Utf8::ValueOfIncrementalFinish(¤t_.pos.incomplete_char); | 299 unibrow::Utf8::ValueOfIncrementalFinish(¤t_.pos.incomplete_char); |
| 294 if (t != unibrow::Utf8::kBufferEmpty) { | 300 if (t != unibrow::Utf8::kBufferEmpty) { |
| 295 DCHECK(t < unibrow::Utf16::kMaxNonSurrogateCharCode); | 301 DCHECK(t < unibrow::Utf16::kMaxNonSurrogateCharCode); |
| 296 *cursor = static_cast<uc16>(t); | 302 *cursor = static_cast<uc16>(t); |
| 297 buffer_end_++; | 303 buffer_end_++; |
| 298 current_.pos.chars++; | 304 current_.pos.chars++; |
| 299 } | 305 } |
| 300 return; | 306 return; |
| 301 } | 307 } |
| 302 | 308 |
| 303 static const unibrow::uchar kUtf8Bom = 0xfeff; | |
| 304 | |
| 305 unibrow::Utf8::Utf8IncrementalBuffer incomplete_char = | 309 unibrow::Utf8::Utf8IncrementalBuffer incomplete_char = |
| 306 current_.pos.incomplete_char; | 310 current_.pos.incomplete_char; |
| 307 size_t it; | 311 size_t it; |
| 308 for (it = current_.pos.bytes - chunk.start.bytes; | 312 for (it = current_.pos.bytes - chunk.start.bytes; |
| 309 it < chunk.length && cursor + 1 < buffer_start_ + kBufferSize; it++) { | 313 it < chunk.length && cursor + 1 < buffer_start_ + kBufferSize; it++) { |
| 310 unibrow::uchar t = | 314 unibrow::uchar t = |
| 311 unibrow::Utf8::ValueOfIncremental(chunk.data[it], &incomplete_char); | 315 unibrow::Utf8::ValueOfIncremental(chunk.data[it], &incomplete_char); |
| 312 if (t == unibrow::Utf8::kIncomplete) continue; | 316 if (t == unibrow::Utf8::kIncomplete) continue; |
| 313 if (V8_LIKELY(t < kUtf8Bom)) { | 317 if (V8_LIKELY(t < kUtf8Bom)) { |
| 314 *(cursor++) = static_cast<uc16>(t); // The by most frequent case. | 318 *(cursor++) = static_cast<uc16>(t); // The by most frequent case. |
| (...skipping 521 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 836 return new OneByteExternalStreamingStream(source_stream); | 840 return new OneByteExternalStreamingStream(source_stream); |
| 837 case v8::ScriptCompiler::StreamedSource::UTF8: | 841 case v8::ScriptCompiler::StreamedSource::UTF8: |
| 838 return new Utf8ExternalStreamingStream(source_stream); | 842 return new Utf8ExternalStreamingStream(source_stream); |
| 839 } | 843 } |
| 840 UNREACHABLE(); | 844 UNREACHABLE(); |
| 841 return nullptr; | 845 return nullptr; |
| 842 } | 846 } |
| 843 | 847 |
| 844 } // namespace internal | 848 } // namespace internal |
| 845 } // namespace v8 | 849 } // namespace v8 |
| OLD | NEW |