| OLD | NEW |
| 1 // Copyright 2011 the V8 project authors. All rights reserved. | 1 // Copyright 2011 the V8 project authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "src/v8.h" | 5 #include "src/v8.h" |
| 6 | 6 |
| 7 #include "src/scanner-character-streams.h" | 7 #include "src/scanner-character-streams.h" |
| 8 | 8 |
| 9 #include "src/handles.h" | 9 #include "src/handles.h" |
| 10 #include "src/unicode-inl.h" | 10 #include "src/unicode-inl.h" |
| (...skipping 37 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 48 // We NULL the pushback_limit_ if pushing all the way back to the | 48 // We NULL the pushback_limit_ if pushing all the way back to the |
| 49 // start of the buffer. | 49 // start of the buffer. |
| 50 | 50 |
| 51 if (pushback_limit_ == NULL) { | 51 if (pushback_limit_ == NULL) { |
| 52 // Enter pushback mode. | 52 // Enter pushback mode. |
| 53 pushback_limit_ = buffer_end_; | 53 pushback_limit_ = buffer_end_; |
| 54 buffer_end_ = buffer_ + kBufferSize; | 54 buffer_end_ = buffer_ + kBufferSize; |
| 55 buffer_cursor_ = buffer_end_; | 55 buffer_cursor_ = buffer_end_; |
| 56 } | 56 } |
| 57 // Ensure that there is room for at least one pushback. | 57 // Ensure that there is room for at least one pushback. |
| 58 ASSERT(buffer_cursor_ > buffer_); | 58 DCHECK(buffer_cursor_ > buffer_); |
| 59 ASSERT(pos_ > 0); | 59 DCHECK(pos_ > 0); |
| 60 buffer_[--buffer_cursor_ - buffer_] = character; | 60 buffer_[--buffer_cursor_ - buffer_] = character; |
| 61 if (buffer_cursor_ == buffer_) { | 61 if (buffer_cursor_ == buffer_) { |
| 62 pushback_limit_ = NULL; | 62 pushback_limit_ = NULL; |
| 63 } else if (buffer_cursor_ < pushback_limit_) { | 63 } else if (buffer_cursor_ < pushback_limit_) { |
| 64 pushback_limit_ = buffer_cursor_; | 64 pushback_limit_ = buffer_cursor_; |
| 65 } | 65 } |
| 66 pos_--; | 66 pos_--; |
| 67 } | 67 } |
| 68 | 68 |
| 69 | 69 |
| (...skipping 25 matching lines...) Expand all Loading... |
| 95 // ---------------------------------------------------------------------------- | 95 // ---------------------------------------------------------------------------- |
| 96 // GenericStringUtf16CharacterStream | 96 // GenericStringUtf16CharacterStream |
| 97 | 97 |
| 98 | 98 |
| 99 GenericStringUtf16CharacterStream::GenericStringUtf16CharacterStream( | 99 GenericStringUtf16CharacterStream::GenericStringUtf16CharacterStream( |
| 100 Handle<String> data, | 100 Handle<String> data, |
| 101 unsigned start_position, | 101 unsigned start_position, |
| 102 unsigned end_position) | 102 unsigned end_position) |
| 103 : string_(data), | 103 : string_(data), |
| 104 length_(end_position) { | 104 length_(end_position) { |
| 105 ASSERT(end_position >= start_position); | 105 DCHECK(end_position >= start_position); |
| 106 pos_ = start_position; | 106 pos_ = start_position; |
| 107 } | 107 } |
| 108 | 108 |
| 109 | 109 |
| 110 GenericStringUtf16CharacterStream::~GenericStringUtf16CharacterStream() { } | 110 GenericStringUtf16CharacterStream::~GenericStringUtf16CharacterStream() { } |
| 111 | 111 |
| 112 | 112 |
| 113 unsigned GenericStringUtf16CharacterStream::BufferSeekForward(unsigned delta) { | 113 unsigned GenericStringUtf16CharacterStream::BufferSeekForward(unsigned delta) { |
| 114 unsigned old_pos = pos_; | 114 unsigned old_pos = pos_; |
| 115 pos_ = Min(pos_ + delta, length_); | 115 pos_ = Min(pos_ + delta, length_); |
| (...skipping 85 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 201 static bool IsUtf8MultiCharacterFollower(byte later_byte) { | 201 static bool IsUtf8MultiCharacterFollower(byte later_byte) { |
| 202 return (later_byte & kUtf8MultiByteMask) == kUtf8MultiByteCharFollower; | 202 return (later_byte & kUtf8MultiByteMask) == kUtf8MultiByteCharFollower; |
| 203 } | 203 } |
| 204 | 204 |
| 205 | 205 |
| 206 // Move the cursor back to point at the preceding UTF-8 character start | 206 // Move the cursor back to point at the preceding UTF-8 character start |
| 207 // in the buffer. | 207 // in the buffer. |
| 208 static inline void Utf8CharacterBack(const byte* buffer, unsigned* cursor) { | 208 static inline void Utf8CharacterBack(const byte* buffer, unsigned* cursor) { |
| 209 byte character = buffer[--*cursor]; | 209 byte character = buffer[--*cursor]; |
| 210 if (character > unibrow::Utf8::kMaxOneByteChar) { | 210 if (character > unibrow::Utf8::kMaxOneByteChar) { |
| 211 ASSERT(IsUtf8MultiCharacterFollower(character)); | 211 DCHECK(IsUtf8MultiCharacterFollower(character)); |
| 212 // Last byte of a multi-byte character encoding. Step backwards until | 212 // Last byte of a multi-byte character encoding. Step backwards until |
| 213 // pointing to the first byte of the encoding, recognized by having the | 213 // pointing to the first byte of the encoding, recognized by having the |
| 214 // top two bits set. | 214 // top two bits set. |
| 215 while (IsUtf8MultiCharacterFollower(buffer[--*cursor])) { } | 215 while (IsUtf8MultiCharacterFollower(buffer[--*cursor])) { } |
| 216 ASSERT(IsUtf8MultiCharacterStart(buffer[*cursor])); | 216 DCHECK(IsUtf8MultiCharacterStart(buffer[*cursor])); |
| 217 } | 217 } |
| 218 } | 218 } |
| 219 | 219 |
| 220 | 220 |
| 221 // Move the cursor forward to point at the next following UTF-8 character start | 221 // Move the cursor forward to point at the next following UTF-8 character start |
| 222 // in the buffer. | 222 // in the buffer. |
| 223 static inline void Utf8CharacterForward(const byte* buffer, unsigned* cursor) { | 223 static inline void Utf8CharacterForward(const byte* buffer, unsigned* cursor) { |
| 224 byte character = buffer[(*cursor)++]; | 224 byte character = buffer[(*cursor)++]; |
| 225 if (character > unibrow::Utf8::kMaxOneByteChar) { | 225 if (character > unibrow::Utf8::kMaxOneByteChar) { |
| 226 // First character of a multi-byte character encoding. | 226 // First character of a multi-byte character encoding. |
| 227 // The number of most-significant one-bits determines the length of the | 227 // The number of most-significant one-bits determines the length of the |
| 228 // encoding: | 228 // encoding: |
| 229 // 110..... - (0xCx, 0xDx) one additional byte (minimum). | 229 // 110..... - (0xCx, 0xDx) one additional byte (minimum). |
| 230 // 1110.... - (0xEx) two additional bytes. | 230 // 1110.... - (0xEx) two additional bytes. |
| 231 // 11110... - (0xFx) three additional bytes (maximum). | 231 // 11110... - (0xFx) three additional bytes (maximum). |
| 232 ASSERT(IsUtf8MultiCharacterStart(character)); | 232 DCHECK(IsUtf8MultiCharacterStart(character)); |
| 233 // Additional bytes is: | 233 // Additional bytes is: |
| 234 // 1 if value in range 0xC0 .. 0xDF. | 234 // 1 if value in range 0xC0 .. 0xDF. |
| 235 // 2 if value in range 0xE0 .. 0xEF. | 235 // 2 if value in range 0xE0 .. 0xEF. |
| 236 // 3 if value in range 0xF0 .. 0xF7. | 236 // 3 if value in range 0xF0 .. 0xF7. |
| 237 // Encode that in a single value. | 237 // Encode that in a single value. |
| 238 unsigned additional_bytes = | 238 unsigned additional_bytes = |
| 239 ((0x3211u) >> (((character - 0xC0) >> 2) & 0xC)) & 0x03; | 239 ((0x3211u) >> (((character - 0xC0) >> 2) & 0xC)) & 0x03; |
| 240 *cursor += additional_bytes; | 240 *cursor += additional_bytes; |
| 241 ASSERT(!IsUtf8MultiCharacterFollower(buffer[1 + additional_bytes])); | 241 DCHECK(!IsUtf8MultiCharacterFollower(buffer[1 + additional_bytes])); |
| 242 } | 242 } |
| 243 } | 243 } |
| 244 | 244 |
| 245 | 245 |
| 246 // This can't set a raw position between two surrogate pairs, since there | 246 // This can't set a raw position between two surrogate pairs, since there |
| 247 // is no position in the UTF8 stream that corresponds to that. This assumes | 247 // is no position in the UTF8 stream that corresponds to that. This assumes |
| 248 // that the surrogate pair is correctly coded as a 4 byte UTF-8 sequence. If | 248 // that the surrogate pair is correctly coded as a 4 byte UTF-8 sequence. If |
| 249 // it is illegally coded as two 3 byte sequences then there is no problem here. | 249 // it is illegally coded as two 3 byte sequences then there is no problem here. |
| 250 void Utf8ToUtf16CharacterStream::SetRawPosition(unsigned target_position) { | 250 void Utf8ToUtf16CharacterStream::SetRawPosition(unsigned target_position) { |
| 251 if (raw_character_position_ > target_position) { | 251 if (raw_character_position_ > target_position) { |
| 252 // Spool backwards in utf8 buffer. | 252 // Spool backwards in utf8 buffer. |
| 253 do { | 253 do { |
| 254 int old_pos = raw_data_pos_; | 254 int old_pos = raw_data_pos_; |
| 255 Utf8CharacterBack(raw_data_, &raw_data_pos_); | 255 Utf8CharacterBack(raw_data_, &raw_data_pos_); |
| 256 raw_character_position_--; | 256 raw_character_position_--; |
| 257 ASSERT(old_pos - raw_data_pos_ <= 4); | 257 DCHECK(old_pos - raw_data_pos_ <= 4); |
| 258 // Step back over both code units for surrogate pairs. | 258 // Step back over both code units for surrogate pairs. |
| 259 if (old_pos - raw_data_pos_ == 4) raw_character_position_--; | 259 if (old_pos - raw_data_pos_ == 4) raw_character_position_--; |
| 260 } while (raw_character_position_ > target_position); | 260 } while (raw_character_position_ > target_position); |
| 261 // No surrogate pair splitting. | 261 // No surrogate pair splitting. |
| 262 ASSERT(raw_character_position_ == target_position); | 262 DCHECK(raw_character_position_ == target_position); |
| 263 return; | 263 return; |
| 264 } | 264 } |
| 265 // Spool forwards in the utf8 buffer. | 265 // Spool forwards in the utf8 buffer. |
| 266 while (raw_character_position_ < target_position) { | 266 while (raw_character_position_ < target_position) { |
| 267 if (raw_data_pos_ == raw_data_length_) return; | 267 if (raw_data_pos_ == raw_data_length_) return; |
| 268 int old_pos = raw_data_pos_; | 268 int old_pos = raw_data_pos_; |
| 269 Utf8CharacterForward(raw_data_, &raw_data_pos_); | 269 Utf8CharacterForward(raw_data_, &raw_data_pos_); |
| 270 raw_character_position_++; | 270 raw_character_position_++; |
| 271 ASSERT(raw_data_pos_ - old_pos <= 4); | 271 DCHECK(raw_data_pos_ - old_pos <= 4); |
| 272 if (raw_data_pos_ - old_pos == 4) raw_character_position_++; | 272 if (raw_data_pos_ - old_pos == 4) raw_character_position_++; |
| 273 } | 273 } |
| 274 // No surrogate pair splitting. | 274 // No surrogate pair splitting. |
| 275 ASSERT(raw_character_position_ == target_position); | 275 DCHECK(raw_character_position_ == target_position); |
| 276 } | 276 } |
| 277 | 277 |
| 278 | 278 |
| 279 // ---------------------------------------------------------------------------- | 279 // ---------------------------------------------------------------------------- |
| 280 // ExternalTwoByteStringUtf16CharacterStream | 280 // ExternalTwoByteStringUtf16CharacterStream |
| 281 | 281 |
| 282 ExternalTwoByteStringUtf16CharacterStream:: | 282 ExternalTwoByteStringUtf16CharacterStream:: |
| 283 ~ExternalTwoByteStringUtf16CharacterStream() { } | 283 ~ExternalTwoByteStringUtf16CharacterStream() { } |
| 284 | 284 |
| 285 | 285 |
| 286 ExternalTwoByteStringUtf16CharacterStream | 286 ExternalTwoByteStringUtf16CharacterStream |
| 287 ::ExternalTwoByteStringUtf16CharacterStream( | 287 ::ExternalTwoByteStringUtf16CharacterStream( |
| 288 Handle<ExternalTwoByteString> data, | 288 Handle<ExternalTwoByteString> data, |
| 289 int start_position, | 289 int start_position, |
| 290 int end_position) | 290 int end_position) |
| 291 : Utf16CharacterStream(), | 291 : Utf16CharacterStream(), |
| 292 source_(data), | 292 source_(data), |
| 293 raw_data_(data->GetTwoByteData(start_position)) { | 293 raw_data_(data->GetTwoByteData(start_position)) { |
| 294 buffer_cursor_ = raw_data_, | 294 buffer_cursor_ = raw_data_, |
| 295 buffer_end_ = raw_data_ + (end_position - start_position); | 295 buffer_end_ = raw_data_ + (end_position - start_position); |
| 296 pos_ = start_position; | 296 pos_ = start_position; |
| 297 } | 297 } |
| 298 | 298 |
| 299 } } // namespace v8::internal | 299 } } // namespace v8::internal |
| OLD | NEW |