OLD | NEW |
---|---|
1 // Copyright 2011 the V8 project authors. All rights reserved. | 1 // Copyright 2011 the V8 project authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "src/parsing/scanner-character-streams.h" | 5 #include "src/parsing/scanner-character-streams.h" |
6 | 6 |
7 #include "include/v8.h" | 7 #include "include/v8.h" |
8 #include "src/globals.h" | 8 #include "src/globals.h" |
9 #include "src/handles.h" | 9 #include "src/handles.h" |
10 #include "src/objects-inl.h" | 10 #include "src/objects-inl.h" |
(...skipping 268 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
279 DCHECK_EQ(buffer_start_, buffer_cursor_); | 279 DCHECK_EQ(buffer_start_, buffer_cursor_); |
280 DCHECK_LT(buffer_end_ + 1, buffer_start_ + kBufferSize); | 280 DCHECK_LT(buffer_end_ + 1, buffer_start_ + kBufferSize); |
281 | 281 |
282 const Chunk& chunk = chunks_[current_.chunk_no]; | 282 const Chunk& chunk = chunks_[current_.chunk_no]; |
283 | 283 |
284 // The buffer_ is writable, but buffer_*_ members are const. So we get a | 284 // The buffer_ is writable, but buffer_*_ members are const. So we get a |
285 // non-const pointer into buffer that points to the same char as buffer_end_. | 285 // non-const pointer into buffer that points to the same char as buffer_end_. |
286 uint16_t* cursor = buffer_ + (buffer_end_ - buffer_start_); | 286 uint16_t* cursor = buffer_ + (buffer_end_ - buffer_start_); |
287 DCHECK_EQ(cursor, buffer_end_); | 287 DCHECK_EQ(cursor, buffer_end_); |
288 | 288 |
289 static const unibrow::uchar kUtf8Bom = 0xfeff; | |
290 | |
289 unibrow::Utf8::Utf8IncrementalBuffer incomplete_char = | 291 unibrow::Utf8::Utf8IncrementalBuffer incomplete_char = |
290 current_.pos.incomplete_char; | 292 current_.pos.incomplete_char; |
291 size_t it; | 293 size_t it; |
292 for (it = current_.pos.bytes - chunk.start.bytes; | 294 for (it = current_.pos.bytes - chunk.start.bytes; |
293 it < chunk.length && cursor + 1 < buffer_start_ + kBufferSize; it++) { | 295 it < chunk.length && cursor + 1 < buffer_start_ + kBufferSize; it++) { |
294 unibrow::uchar t = | 296 unibrow::uchar t = |
295 unibrow::Utf8::ValueOfIncremental(chunk.data[it], &incomplete_char); | 297 unibrow::Utf8::ValueOfIncremental(chunk.data[it], &incomplete_char); |
296 if (t == unibrow::Utf8::kIncomplete) continue; | 298 if (t == unibrow::Utf8::kIncomplete) continue; |
297 if (t <= unibrow::Utf16::kMaxNonSurrogateCharCode) { | 299 if (V8_LIKELY(t < kUtf8Bom)) { |
vogelheim
2016/09/20 16:17:58
1st + 3rd branch could be merged, but I wanted to
marja
2016/09/20 18:52:30
I think this version is pretty readable too.
| |
300 *(cursor++) = static_cast<uc16>(t); // The by most frequent case. | |
301 } else if (t == kUtf8Bom && current_.pos.bytes + it == 2) { | |
302 // BOM detected at beginning of the stream. Don't copy it. | |
303 } else if (t <= unibrow::Utf16::kMaxNonSurrogateCharCode) { | |
298 *(cursor++) = static_cast<uc16>(t); | 304 *(cursor++) = static_cast<uc16>(t); |
299 } else { | 305 } else { |
300 *(cursor++) = unibrow::Utf16::LeadSurrogate(t); | 306 *(cursor++) = unibrow::Utf16::LeadSurrogate(t); |
301 *(cursor++) = unibrow::Utf16::TrailSurrogate(t); | 307 *(cursor++) = unibrow::Utf16::TrailSurrogate(t); |
302 } | 308 } |
303 } | 309 } |
304 | 310 |
305 current_.pos.bytes = chunk.start.bytes + it; | 311 current_.pos.bytes = chunk.start.bytes + it; |
306 current_.pos.chars += (cursor - buffer_end_); | 312 current_.pos.chars += (cursor - buffer_end_); |
307 current_.pos.incomplete_char = incomplete_char; | 313 current_.pos.incomplete_char = incomplete_char; |
(...skipping 346 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
654 return new OneByteExternalStreamingStream(source_stream); | 660 return new OneByteExternalStreamingStream(source_stream); |
655 case v8::ScriptCompiler::StreamedSource::UTF8: | 661 case v8::ScriptCompiler::StreamedSource::UTF8: |
656 return new Utf8ExternalStreamingStream(source_stream); | 662 return new Utf8ExternalStreamingStream(source_stream); |
657 } | 663 } |
658 UNREACHABLE(); | 664 UNREACHABLE(); |
659 return nullptr; | 665 return nullptr; |
660 } | 666 } |
661 | 667 |
662 } // namespace internal | 668 } // namespace internal |
663 } // namespace v8 | 669 } // namespace v8 |
OLD | NEW |