OLD | NEW |
1 // Copyright 2011 the V8 project authors. All rights reserved. | 1 // Copyright 2011 the V8 project authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "src/v8.h" | 5 #include "src/v8.h" |
6 | 6 |
7 #include "src/scanner-character-streams.h" | 7 #include "src/scanner-character-streams.h" |
8 | 8 |
9 #include "src/handles.h" | 9 #include "src/handles.h" |
10 #include "src/unicode-inl.h" | 10 #include "src/unicode-inl.h" |
(...skipping 37 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
48 // We NULL the pushback_limit_ if pushing all the way back to the | 48 // We NULL the pushback_limit_ if pushing all the way back to the |
49 // start of the buffer. | 49 // start of the buffer. |
50 | 50 |
51 if (pushback_limit_ == NULL) { | 51 if (pushback_limit_ == NULL) { |
52 // Enter pushback mode. | 52 // Enter pushback mode. |
53 pushback_limit_ = buffer_end_; | 53 pushback_limit_ = buffer_end_; |
54 buffer_end_ = buffer_ + kBufferSize; | 54 buffer_end_ = buffer_ + kBufferSize; |
55 buffer_cursor_ = buffer_end_; | 55 buffer_cursor_ = buffer_end_; |
56 } | 56 } |
57 // Ensure that there is room for at least one pushback. | 57 // Ensure that there is room for at least one pushback. |
58 ASSERT(buffer_cursor_ > buffer_); | 58 DCHECK(buffer_cursor_ > buffer_); |
59 ASSERT(pos_ > 0); | 59 DCHECK(pos_ > 0); |
60 buffer_[--buffer_cursor_ - buffer_] = character; | 60 buffer_[--buffer_cursor_ - buffer_] = character; |
61 if (buffer_cursor_ == buffer_) { | 61 if (buffer_cursor_ == buffer_) { |
62 pushback_limit_ = NULL; | 62 pushback_limit_ = NULL; |
63 } else if (buffer_cursor_ < pushback_limit_) { | 63 } else if (buffer_cursor_ < pushback_limit_) { |
64 pushback_limit_ = buffer_cursor_; | 64 pushback_limit_ = buffer_cursor_; |
65 } | 65 } |
66 pos_--; | 66 pos_--; |
67 } | 67 } |
68 | 68 |
69 | 69 |
(...skipping 25 matching lines...) Expand all Loading... |
95 // ---------------------------------------------------------------------------- | 95 // ---------------------------------------------------------------------------- |
96 // GenericStringUtf16CharacterStream | 96 // GenericStringUtf16CharacterStream |
97 | 97 |
98 | 98 |
99 GenericStringUtf16CharacterStream::GenericStringUtf16CharacterStream( | 99 GenericStringUtf16CharacterStream::GenericStringUtf16CharacterStream( |
100 Handle<String> data, | 100 Handle<String> data, |
101 unsigned start_position, | 101 unsigned start_position, |
102 unsigned end_position) | 102 unsigned end_position) |
103 : string_(data), | 103 : string_(data), |
104 length_(end_position) { | 104 length_(end_position) { |
105 ASSERT(end_position >= start_position); | 105 DCHECK(end_position >= start_position); |
106 pos_ = start_position; | 106 pos_ = start_position; |
107 } | 107 } |
108 | 108 |
109 | 109 |
110 GenericStringUtf16CharacterStream::~GenericStringUtf16CharacterStream() { } | 110 GenericStringUtf16CharacterStream::~GenericStringUtf16CharacterStream() { } |
111 | 111 |
112 | 112 |
113 unsigned GenericStringUtf16CharacterStream::BufferSeekForward(unsigned delta) { | 113 unsigned GenericStringUtf16CharacterStream::BufferSeekForward(unsigned delta) { |
114 unsigned old_pos = pos_; | 114 unsigned old_pos = pos_; |
115 pos_ = Min(pos_ + delta, length_); | 115 pos_ = Min(pos_ + delta, length_); |
(...skipping 85 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
201 static bool IsUtf8MultiCharacterFollower(byte later_byte) { | 201 static bool IsUtf8MultiCharacterFollower(byte later_byte) { |
202 return (later_byte & kUtf8MultiByteMask) == kUtf8MultiByteCharFollower; | 202 return (later_byte & kUtf8MultiByteMask) == kUtf8MultiByteCharFollower; |
203 } | 203 } |
204 | 204 |
205 | 205 |
206 // Move the cursor back to point at the preceding UTF-8 character start | 206 // Move the cursor back to point at the preceding UTF-8 character start |
207 // in the buffer. | 207 // in the buffer. |
208 static inline void Utf8CharacterBack(const byte* buffer, unsigned* cursor) { | 208 static inline void Utf8CharacterBack(const byte* buffer, unsigned* cursor) { |
209 byte character = buffer[--*cursor]; | 209 byte character = buffer[--*cursor]; |
210 if (character > unibrow::Utf8::kMaxOneByteChar) { | 210 if (character > unibrow::Utf8::kMaxOneByteChar) { |
211 ASSERT(IsUtf8MultiCharacterFollower(character)); | 211 DCHECK(IsUtf8MultiCharacterFollower(character)); |
212 // Last byte of a multi-byte character encoding. Step backwards until | 212 // Last byte of a multi-byte character encoding. Step backwards until |
213 // pointing to the first byte of the encoding, recognized by having the | 213 // pointing to the first byte of the encoding, recognized by having the |
214 // top two bits set. | 214 // top two bits set. |
215 while (IsUtf8MultiCharacterFollower(buffer[--*cursor])) { } | 215 while (IsUtf8MultiCharacterFollower(buffer[--*cursor])) { } |
216 ASSERT(IsUtf8MultiCharacterStart(buffer[*cursor])); | 216 DCHECK(IsUtf8MultiCharacterStart(buffer[*cursor])); |
217 } | 217 } |
218 } | 218 } |
219 | 219 |
220 | 220 |
221 // Move the cursor forward to point at the next following UTF-8 character start | 221 // Move the cursor forward to point at the next following UTF-8 character start |
222 // in the buffer. | 222 // in the buffer. |
223 static inline void Utf8CharacterForward(const byte* buffer, unsigned* cursor) { | 223 static inline void Utf8CharacterForward(const byte* buffer, unsigned* cursor) { |
224 byte character = buffer[(*cursor)++]; | 224 byte character = buffer[(*cursor)++]; |
225 if (character > unibrow::Utf8::kMaxOneByteChar) { | 225 if (character > unibrow::Utf8::kMaxOneByteChar) { |
226 // First character of a multi-byte character encoding. | 226 // First character of a multi-byte character encoding. |
227 // The number of most-significant one-bits determines the length of the | 227 // The number of most-significant one-bits determines the length of the |
228 // encoding: | 228 // encoding: |
229 // 110..... - (0xCx, 0xDx) one additional byte (minimum). | 229 // 110..... - (0xCx, 0xDx) one additional byte (minimum). |
230 // 1110.... - (0xEx) two additional bytes. | 230 // 1110.... - (0xEx) two additional bytes. |
231 // 11110... - (0xFx) three additional bytes (maximum). | 231 // 11110... - (0xFx) three additional bytes (maximum). |
232 ASSERT(IsUtf8MultiCharacterStart(character)); | 232 DCHECK(IsUtf8MultiCharacterStart(character)); |
233 // Additional bytes is: | 233 // Additional bytes is: |
234 // 1 if value in range 0xC0 .. 0xDF. | 234 // 1 if value in range 0xC0 .. 0xDF. |
235 // 2 if value in range 0xE0 .. 0xEF. | 235 // 2 if value in range 0xE0 .. 0xEF. |
236 // 3 if value in range 0xF0 .. 0xF7. | 236 // 3 if value in range 0xF0 .. 0xF7. |
237 // Encode that in a single value. | 237 // Encode that in a single value. |
238 unsigned additional_bytes = | 238 unsigned additional_bytes = |
239 ((0x3211u) >> (((character - 0xC0) >> 2) & 0xC)) & 0x03; | 239 ((0x3211u) >> (((character - 0xC0) >> 2) & 0xC)) & 0x03; |
240 *cursor += additional_bytes; | 240 *cursor += additional_bytes; |
241 ASSERT(!IsUtf8MultiCharacterFollower(buffer[1 + additional_bytes])); | 241 DCHECK(!IsUtf8MultiCharacterFollower(buffer[1 + additional_bytes])); |
242 } | 242 } |
243 } | 243 } |
244 | 244 |
245 | 245 |
246 // This can't set a raw position between two surrogate pairs, since there | 246 // This can't set a raw position between two surrogate pairs, since there |
247 // is no position in the UTF8 stream that corresponds to that. This assumes | 247 // is no position in the UTF8 stream that corresponds to that. This assumes |
248 // that the surrogate pair is correctly coded as a 4 byte UTF-8 sequence. If | 248 // that the surrogate pair is correctly coded as a 4 byte UTF-8 sequence. If |
249 // it is illegally coded as two 3 byte sequences then there is no problem here. | 249 // it is illegally coded as two 3 byte sequences then there is no problem here. |
250 void Utf8ToUtf16CharacterStream::SetRawPosition(unsigned target_position) { | 250 void Utf8ToUtf16CharacterStream::SetRawPosition(unsigned target_position) { |
251 if (raw_character_position_ > target_position) { | 251 if (raw_character_position_ > target_position) { |
252 // Spool backwards in utf8 buffer. | 252 // Spool backwards in utf8 buffer. |
253 do { | 253 do { |
254 int old_pos = raw_data_pos_; | 254 int old_pos = raw_data_pos_; |
255 Utf8CharacterBack(raw_data_, &raw_data_pos_); | 255 Utf8CharacterBack(raw_data_, &raw_data_pos_); |
256 raw_character_position_--; | 256 raw_character_position_--; |
257 ASSERT(old_pos - raw_data_pos_ <= 4); | 257 DCHECK(old_pos - raw_data_pos_ <= 4); |
258 // Step back over both code units for surrogate pairs. | 258 // Step back over both code units for surrogate pairs. |
259 if (old_pos - raw_data_pos_ == 4) raw_character_position_--; | 259 if (old_pos - raw_data_pos_ == 4) raw_character_position_--; |
260 } while (raw_character_position_ > target_position); | 260 } while (raw_character_position_ > target_position); |
261 // No surrogate pair splitting. | 261 // No surrogate pair splitting. |
262 ASSERT(raw_character_position_ == target_position); | 262 DCHECK(raw_character_position_ == target_position); |
263 return; | 263 return; |
264 } | 264 } |
265 // Spool forwards in the utf8 buffer. | 265 // Spool forwards in the utf8 buffer. |
266 while (raw_character_position_ < target_position) { | 266 while (raw_character_position_ < target_position) { |
267 if (raw_data_pos_ == raw_data_length_) return; | 267 if (raw_data_pos_ == raw_data_length_) return; |
268 int old_pos = raw_data_pos_; | 268 int old_pos = raw_data_pos_; |
269 Utf8CharacterForward(raw_data_, &raw_data_pos_); | 269 Utf8CharacterForward(raw_data_, &raw_data_pos_); |
270 raw_character_position_++; | 270 raw_character_position_++; |
271 ASSERT(raw_data_pos_ - old_pos <= 4); | 271 DCHECK(raw_data_pos_ - old_pos <= 4); |
272 if (raw_data_pos_ - old_pos == 4) raw_character_position_++; | 272 if (raw_data_pos_ - old_pos == 4) raw_character_position_++; |
273 } | 273 } |
274 // No surrogate pair splitting. | 274 // No surrogate pair splitting. |
275 ASSERT(raw_character_position_ == target_position); | 275 DCHECK(raw_character_position_ == target_position); |
276 } | 276 } |
277 | 277 |
278 | 278 |
279 // ---------------------------------------------------------------------------- | 279 // ---------------------------------------------------------------------------- |
280 // ExternalTwoByteStringUtf16CharacterStream | 280 // ExternalTwoByteStringUtf16CharacterStream |
281 | 281 |
282 ExternalTwoByteStringUtf16CharacterStream:: | 282 ExternalTwoByteStringUtf16CharacterStream:: |
283 ~ExternalTwoByteStringUtf16CharacterStream() { } | 283 ~ExternalTwoByteStringUtf16CharacterStream() { } |
284 | 284 |
285 | 285 |
286 ExternalTwoByteStringUtf16CharacterStream | 286 ExternalTwoByteStringUtf16CharacterStream |
287 ::ExternalTwoByteStringUtf16CharacterStream( | 287 ::ExternalTwoByteStringUtf16CharacterStream( |
288 Handle<ExternalTwoByteString> data, | 288 Handle<ExternalTwoByteString> data, |
289 int start_position, | 289 int start_position, |
290 int end_position) | 290 int end_position) |
291 : Utf16CharacterStream(), | 291 : Utf16CharacterStream(), |
292 source_(data), | 292 source_(data), |
293 raw_data_(data->GetTwoByteData(start_position)) { | 293 raw_data_(data->GetTwoByteData(start_position)) { |
294 buffer_cursor_ = raw_data_, | 294 buffer_cursor_ = raw_data_, |
295 buffer_end_ = raw_data_ + (end_position - start_position); | 295 buffer_end_ = raw_data_ + (end_position - start_position); |
296 pos_ = start_position; | 296 pos_ = start_position; |
297 } | 297 } |
298 | 298 |
299 } } // namespace v8::internal | 299 } } // namespace v8::internal |
OLD | NEW |