OLD | NEW |
1 // Copyright 2011 the V8 project authors. All rights reserved. | 1 // Copyright 2011 the V8 project authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "src/v8.h" | 5 #include "src/v8.h" |
6 | 6 |
7 #include "src/scanner-character-streams.h" | 7 #include "src/scanner-character-streams.h" |
8 | 8 |
| 9 #include "include/v8.h" |
9 #include "src/handles.h" | 10 #include "src/handles.h" |
10 #include "src/unicode-inl.h" | 11 #include "src/unicode-inl.h" |
11 | 12 |
12 namespace v8 { | 13 namespace v8 { |
13 namespace internal { | 14 namespace internal { |
14 | 15 |
| 16 namespace { |
| 17 |
| 18 unsigned CopyCharsHelper(uint16_t* dest, unsigned length, const char* src, |
| 19 unsigned* src_pos, unsigned src_length, |
| 20 ExternalSourceStream::Encoding encoding) { |
| 21 if (encoding == ExternalSourceStream::UTF8) { |
| 22 return v8::internal::Utf8ToUtf16CharacterStream::CopyChars( |
| 23 dest, length, reinterpret_cast<const uint8_t*>(src), src_pos, |
| 24 src_length); |
| 25 } |
| 26 |
| 27 unsigned to_fill = length; |
| 28 if (to_fill > src_length - *src_pos) to_fill = src_length - *src_pos; |
| 29 |
| 30 if (encoding == ExternalSourceStream::ONE_BYTE) { |
| 31 v8::internal::CopyChars<uint8_t, uint16_t>( |
| 32 dest, reinterpret_cast<const uint8_t*>(src + *src_pos), to_fill); |
| 33 } else { |
| 34 DCHECK(encoding == ExternalSourceStream::TWO_BYTE); |
| 35 v8::internal::CopyChars<uint16_t, uint16_t>( |
| 36 dest, reinterpret_cast<const uint16_t*>(src + *src_pos), to_fill); |
| 37 } |
| 38 *src_pos += to_fill; |
| 39 return to_fill; |
| 40 } |
| 41 |
| 42 } // namespace |
| 43 |
| 44 |
15 // ---------------------------------------------------------------------------- | 45 // ---------------------------------------------------------------------------- |
16 // BufferedUtf16CharacterStreams | 46 // BufferedUtf16CharacterStreams |
17 | 47 |
18 BufferedUtf16CharacterStream::BufferedUtf16CharacterStream() | 48 BufferedUtf16CharacterStream::BufferedUtf16CharacterStream() |
19 : Utf16CharacterStream(), | 49 : Utf16CharacterStream(), |
20 pushback_limit_(NULL) { | 50 pushback_limit_(NULL) { |
21 // Initialize buffer as being empty. First read will fill the buffer. | 51 // Initialize buffer as being empty. First read will fill the buffer. |
22 buffer_cursor_ = buffer_; | 52 buffer_cursor_ = buffer_; |
23 buffer_end_ = buffer_; | 53 buffer_end_ = buffer_; |
24 } | 54 } |
(...skipping 113 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
138 raw_data_length_(length), | 168 raw_data_length_(length), |
139 raw_data_pos_(0), | 169 raw_data_pos_(0), |
140 raw_character_position_(0) { | 170 raw_character_position_(0) { |
141 ReadBlock(); | 171 ReadBlock(); |
142 } | 172 } |
143 | 173 |
144 | 174 |
145 Utf8ToUtf16CharacterStream::~Utf8ToUtf16CharacterStream() { } | 175 Utf8ToUtf16CharacterStream::~Utf8ToUtf16CharacterStream() { } |
146 | 176 |
147 | 177 |
| 178 unsigned Utf8ToUtf16CharacterStream::CopyChars(uint16_t* dest, unsigned length, |
| 179 const byte* src, |
| 180 unsigned* src_pos, |
| 181 unsigned src_length) { |
| 182 static const unibrow::uchar kMaxUtf16Character = 0xffff; |
| 183 unsigned i = 0; |
| 184 // Because of the UTF-16 lead and trail surrogates, we stop filling the buffer |
| 185 // one character early (in the normal case), because we need to have at least |
| 186 // two free spaces in the buffer to be sure that the next character will fit. |
| 187 while (i < length - 1) { |
| 188 if (*src_pos == src_length) break; |
| 189 unibrow::uchar c = src[*src_pos]; |
| 190 if (c <= unibrow::Utf8::kMaxOneByteChar) { |
| 191 *src_pos = *src_pos + 1; |
| 192 } else { |
| 193 c = unibrow::Utf8::CalculateValue(src + *src_pos, src_length - *src_pos, |
| 194 src_pos); |
| 195 } |
| 196 if (c > kMaxUtf16Character) { |
| 197 dest[i++] = unibrow::Utf16::LeadSurrogate(c); |
| 198 dest[i++] = unibrow::Utf16::TrailSurrogate(c); |
| 199 } else { |
| 200 dest[i++] = static_cast<uc16>(c); |
| 201 } |
| 202 } |
| 203 return i; |
| 204 } |
| 205 |
| 206 |
148 unsigned Utf8ToUtf16CharacterStream::BufferSeekForward(unsigned delta) { | 207 unsigned Utf8ToUtf16CharacterStream::BufferSeekForward(unsigned delta) { |
149 unsigned old_pos = pos_; | 208 unsigned old_pos = pos_; |
150 unsigned target_pos = pos_ + delta; | 209 unsigned target_pos = pos_ + delta; |
151 SetRawPosition(target_pos); | 210 SetRawPosition(target_pos); |
152 pos_ = raw_character_position_; | 211 pos_ = raw_character_position_; |
153 ReadBlock(); | 212 ReadBlock(); |
154 return pos_ - old_pos; | 213 return pos_ - old_pos; |
155 } | 214 } |
156 | 215 |
157 | 216 |
158 unsigned Utf8ToUtf16CharacterStream::FillBuffer(unsigned char_position) { | 217 unsigned Utf8ToUtf16CharacterStream::FillBuffer(unsigned char_position) { |
159 static const unibrow::uchar kMaxUtf16Character = 0xffff; | |
160 SetRawPosition(char_position); | 218 SetRawPosition(char_position); |
161 if (raw_character_position_ != char_position) { | 219 if (raw_character_position_ != char_position) { |
162 // char_position was not a valid position in the stream (hit the end | 220 // char_position was not a valid position in the stream (hit the end |
163 // while spooling to it). | 221 // while spooling to it). |
164 return 0u; | 222 return 0u; |
165 } | 223 } |
166 unsigned i = 0; | 224 unsigned i = CopyChars(buffer_, kBufferSize, raw_data_, &raw_data_pos_, |
167 while (i < kBufferSize - 1) { | 225 raw_data_length_); |
168 if (raw_data_pos_ == raw_data_length_) break; | |
169 unibrow::uchar c = raw_data_[raw_data_pos_]; | |
170 if (c <= unibrow::Utf8::kMaxOneByteChar) { | |
171 raw_data_pos_++; | |
172 } else { | |
173 c = unibrow::Utf8::CalculateValue(raw_data_ + raw_data_pos_, | |
174 raw_data_length_ - raw_data_pos_, | |
175 &raw_data_pos_); | |
176 } | |
177 if (c > kMaxUtf16Character) { | |
178 buffer_[i++] = unibrow::Utf16::LeadSurrogate(c); | |
179 buffer_[i++] = unibrow::Utf16::TrailSurrogate(c); | |
180 } else { | |
181 buffer_[i++] = static_cast<uc16>(c); | |
182 } | |
183 } | |
184 raw_character_position_ = char_position + i; | 226 raw_character_position_ = char_position + i; |
185 return i; | 227 return i; |
186 } | 228 } |
187 | 229 |
188 | 230 |
189 static const byte kUtf8MultiByteMask = 0xC0; | 231 static const byte kUtf8MultiByteMask = 0xC0; |
190 static const byte kUtf8MultiByteCharFollower = 0x80; | 232 static const byte kUtf8MultiByteCharFollower = 0x80; |
191 | 233 |
192 | 234 |
193 #ifdef DEBUG | 235 #ifdef DEBUG |
(...skipping 75 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
269 Utf8CharacterForward(raw_data_, &raw_data_pos_); | 311 Utf8CharacterForward(raw_data_, &raw_data_pos_); |
270 raw_character_position_++; | 312 raw_character_position_++; |
271 DCHECK(raw_data_pos_ - old_pos <= 4); | 313 DCHECK(raw_data_pos_ - old_pos <= 4); |
272 if (raw_data_pos_ - old_pos == 4) raw_character_position_++; | 314 if (raw_data_pos_ - old_pos == 4) raw_character_position_++; |
273 } | 315 } |
274 // No surrogate pair splitting. | 316 // No surrogate pair splitting. |
275 DCHECK(raw_character_position_ == target_position); | 317 DCHECK(raw_character_position_ == target_position); |
276 } | 318 } |
277 | 319 |
278 | 320 |
| 321 unsigned ExternalStreamingStream::FillBuffer(unsigned position) { |
| 322 // Ignore "position" which is the position in the decoded data. Instead, |
| 323 // ExternalStringStream keeps track of the position in the raw data. |
| 324 unsigned data_in_buffer = 0; |
| 325 // Note that the UTF-8 decoder might not be able to fill the buffer |
| 326 // completely; it will typically leave the last character empty (see |
| 327 // Utf8ToUtf16CharacterStream::CopyChars). |
| 328 while (data_in_buffer < kBufferSize - 1) { |
| 329 if (current_data_ == NULL) { |
| 330 // GetSomeData will wait until the embedder has enough data. |
| 331 current_data_length_ = |
| 332 source_stream_->GetSomeData(¤t_data_, raw_position_); |
| 333 raw_position_ += current_data_length_; |
| 334 current_data_offset_ = 0; |
| 335 // Did the data stream end? |
| 336 if (current_data_length_ == 0) { |
| 337 return data_in_buffer; |
| 338 } |
| 339 } |
| 340 // Fill the buffer from current_data_. |
| 341 unsigned new_offset = 0; |
| 342 unsigned new_chars_in_buffer = CopyCharsHelper( |
| 343 buffer_ + data_in_buffer, kBufferSize - data_in_buffer, |
| 344 current_data_ + current_data_offset_, &new_offset, |
| 345 current_data_length_ - current_data_offset_, source_stream_->encoding); |
| 346 data_in_buffer += new_chars_in_buffer; |
| 347 current_data_offset_ += new_offset; |
| 348 DCHECK(data_in_buffer <= kBufferSize); |
| 349 // Did we use all the data? |
| 350 if (current_data_offset_ == current_data_length_) { |
| 351 delete[] current_data_; |
| 352 current_data_ = NULL; |
| 353 current_data_length_ = 0; |
| 354 current_data_offset_ = 0; |
| 355 } |
| 356 } |
| 357 return data_in_buffer; |
| 358 } |
| 359 |
| 360 |
279 // ---------------------------------------------------------------------------- | 361 // ---------------------------------------------------------------------------- |
280 // ExternalTwoByteStringUtf16CharacterStream | 362 // ExternalTwoByteStringUtf16CharacterStream |
281 | 363 |
282 ExternalTwoByteStringUtf16CharacterStream:: | 364 ExternalTwoByteStringUtf16CharacterStream:: |
283 ~ExternalTwoByteStringUtf16CharacterStream() { } | 365 ~ExternalTwoByteStringUtf16CharacterStream() { } |
284 | 366 |
285 | 367 |
286 ExternalTwoByteStringUtf16CharacterStream | 368 ExternalTwoByteStringUtf16CharacterStream |
287 ::ExternalTwoByteStringUtf16CharacterStream( | 369 ::ExternalTwoByteStringUtf16CharacterStream( |
288 Handle<ExternalTwoByteString> data, | 370 Handle<ExternalTwoByteString> data, |
289 int start_position, | 371 int start_position, |
290 int end_position) | 372 int end_position) |
291 : Utf16CharacterStream(), | 373 : Utf16CharacterStream(), |
292 source_(data), | 374 source_(data), |
293 raw_data_(data->GetTwoByteData(start_position)) { | 375 raw_data_(data->GetTwoByteData(start_position)) { |
294 buffer_cursor_ = raw_data_, | 376 buffer_cursor_ = raw_data_, |
295 buffer_end_ = raw_data_ + (end_position - start_position); | 377 buffer_end_ = raw_data_ + (end_position - start_position); |
296 pos_ = start_position; | 378 pos_ = start_position; |
297 } | 379 } |
298 | 380 |
299 } } // namespace v8::internal | 381 } } // namespace v8::internal |
OLD | NEW |