OLD | NEW |
1 // Copyright 2011 the V8 project authors. All rights reserved. | 1 // Copyright 2011 the V8 project authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "src/v8.h" | 5 #include "src/v8.h" |
6 | 6 |
7 #include "src/scanner-character-streams.h" | 7 #include "src/scanner-character-streams.h" |
8 | 8 |
| 9 #include "include/v8.h" |
9 #include "src/handles.h" | 10 #include "src/handles.h" |
10 #include "src/unicode-inl.h" | 11 #include "src/unicode-inl.h" |
11 | 12 |
12 namespace v8 { | 13 namespace v8 { |
13 namespace internal { | 14 namespace internal { |
14 | 15 |
| 16 namespace { |
| 17 |
| 18 unsigned CopyCharsHelper( |
| 19 uint16_t* dest, unsigned length, const char* src, unsigned* src_pos, |
| 20 unsigned src_length, |
| 21 ScriptCompiler::ExternalSourceStream::Encoding encoding) { |
| 22 if (encoding == ScriptCompiler::ExternalSourceStream::UTF8) { |
| 23 return v8::internal::Utf8ToUtf16CharacterStream::CopyChars( |
| 24 dest, length, reinterpret_cast<const uint8_t*>(src), src_pos, |
| 25 src_length); |
| 26 } |
| 27 |
| 28 unsigned to_fill = length; |
| 29 if (to_fill > src_length - *src_pos) to_fill = src_length - *src_pos; |
| 30 |
| 31 if (encoding == ScriptCompiler::ExternalSourceStream::ONE_BYTE) { |
| 32 v8::internal::CopyChars<uint8_t, uint16_t>( |
| 33 dest, reinterpret_cast<const uint8_t*>(src + *src_pos), to_fill); |
| 34 } else { |
| 35 DCHECK(encoding == ScriptCompiler::ExternalSourceStream::TWO_BYTE); |
| 36 v8::internal::CopyChars<uint16_t, uint16_t>( |
| 37 dest, reinterpret_cast<const uint16_t*>(src + *src_pos), to_fill); |
| 38 } |
| 39 *src_pos += to_fill; |
| 40 return to_fill; |
| 41 } |
| 42 |
| 43 } // namespace |
| 44 |
| 45 |
15 // ---------------------------------------------------------------------------- | 46 // ---------------------------------------------------------------------------- |
16 // BufferedUtf16CharacterStreams | 47 // BufferedUtf16CharacterStreams |
17 | 48 |
18 BufferedUtf16CharacterStream::BufferedUtf16CharacterStream() | 49 BufferedUtf16CharacterStream::BufferedUtf16CharacterStream() |
19 : Utf16CharacterStream(), | 50 : Utf16CharacterStream(), |
20 pushback_limit_(NULL) { | 51 pushback_limit_(NULL) { |
21 // Initialize buffer as being empty. First read will fill the buffer. | 52 // Initialize buffer as being empty. First read will fill the buffer. |
22 buffer_cursor_ = buffer_; | 53 buffer_cursor_ = buffer_; |
23 buffer_end_ = buffer_; | 54 buffer_end_ = buffer_; |
24 } | 55 } |
(...skipping 113 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
138 raw_data_length_(length), | 169 raw_data_length_(length), |
139 raw_data_pos_(0), | 170 raw_data_pos_(0), |
140 raw_character_position_(0) { | 171 raw_character_position_(0) { |
141 ReadBlock(); | 172 ReadBlock(); |
142 } | 173 } |
143 | 174 |
144 | 175 |
145 Utf8ToUtf16CharacterStream::~Utf8ToUtf16CharacterStream() { } | 176 Utf8ToUtf16CharacterStream::~Utf8ToUtf16CharacterStream() { } |
146 | 177 |
147 | 178 |
| 179 unsigned Utf8ToUtf16CharacterStream::CopyChars(uint16_t* dest, unsigned length, |
| 180 const byte* src, |
| 181 unsigned* src_pos, |
| 182 unsigned src_length) { |
| 183 static const unibrow::uchar kMaxUtf16Character = 0xffff; |
| 184 unsigned i = 0; |
| 185 // Because of the UTF-16 lead and trail surrogates, we stop filling the buffer |
| 186 // one character early (in the normal case), because we need to have at least |
| 187 // two free spaces in the buffer to be sure that the next character will fit. |
| 188 while (i < length - 1) { |
| 189 if (*src_pos == src_length) break; |
| 190 unibrow::uchar c = src[*src_pos]; |
| 191 if (c <= unibrow::Utf8::kMaxOneByteChar) { |
| 192 *src_pos = *src_pos + 1; |
| 193 } else { |
| 194 c = unibrow::Utf8::CalculateValue(src + *src_pos, src_length - *src_pos, |
| 195 src_pos); |
| 196 } |
| 197 if (c > kMaxUtf16Character) { |
| 198 dest[i++] = unibrow::Utf16::LeadSurrogate(c); |
| 199 dest[i++] = unibrow::Utf16::TrailSurrogate(c); |
| 200 } else { |
| 201 dest[i++] = static_cast<uc16>(c); |
| 202 } |
| 203 } |
| 204 return i; |
| 205 } |
| 206 |
| 207 |
148 unsigned Utf8ToUtf16CharacterStream::BufferSeekForward(unsigned delta) { | 208 unsigned Utf8ToUtf16CharacterStream::BufferSeekForward(unsigned delta) { |
149 unsigned old_pos = pos_; | 209 unsigned old_pos = pos_; |
150 unsigned target_pos = pos_ + delta; | 210 unsigned target_pos = pos_ + delta; |
151 SetRawPosition(target_pos); | 211 SetRawPosition(target_pos); |
152 pos_ = raw_character_position_; | 212 pos_ = raw_character_position_; |
153 ReadBlock(); | 213 ReadBlock(); |
154 return pos_ - old_pos; | 214 return pos_ - old_pos; |
155 } | 215 } |
156 | 216 |
157 | 217 |
158 unsigned Utf8ToUtf16CharacterStream::FillBuffer(unsigned char_position) { | 218 unsigned Utf8ToUtf16CharacterStream::FillBuffer(unsigned char_position) { |
159 static const unibrow::uchar kMaxUtf16Character = 0xffff; | |
160 SetRawPosition(char_position); | 219 SetRawPosition(char_position); |
161 if (raw_character_position_ != char_position) { | 220 if (raw_character_position_ != char_position) { |
162 // char_position was not a valid position in the stream (hit the end | 221 // char_position was not a valid position in the stream (hit the end |
163 // while spooling to it). | 222 // while spooling to it). |
164 return 0u; | 223 return 0u; |
165 } | 224 } |
166 unsigned i = 0; | 225 unsigned i = CopyChars(buffer_, kBufferSize, raw_data_, &raw_data_pos_, |
167 while (i < kBufferSize - 1) { | 226 raw_data_length_); |
168 if (raw_data_pos_ == raw_data_length_) break; | |
169 unibrow::uchar c = raw_data_[raw_data_pos_]; | |
170 if (c <= unibrow::Utf8::kMaxOneByteChar) { | |
171 raw_data_pos_++; | |
172 } else { | |
173 c = unibrow::Utf8::CalculateValue(raw_data_ + raw_data_pos_, | |
174 raw_data_length_ - raw_data_pos_, | |
175 &raw_data_pos_); | |
176 } | |
177 if (c > kMaxUtf16Character) { | |
178 buffer_[i++] = unibrow::Utf16::LeadSurrogate(c); | |
179 buffer_[i++] = unibrow::Utf16::TrailSurrogate(c); | |
180 } else { | |
181 buffer_[i++] = static_cast<uc16>(c); | |
182 } | |
183 } | |
184 raw_character_position_ = char_position + i; | 227 raw_character_position_ = char_position + i; |
185 return i; | 228 return i; |
186 } | 229 } |
187 | 230 |
188 | 231 |
189 static const byte kUtf8MultiByteMask = 0xC0; | 232 static const byte kUtf8MultiByteMask = 0xC0; |
190 static const byte kUtf8MultiByteCharFollower = 0x80; | 233 static const byte kUtf8MultiByteCharFollower = 0x80; |
191 | 234 |
192 | 235 |
193 #ifdef DEBUG | 236 #ifdef DEBUG |
(...skipping 75 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
269 Utf8CharacterForward(raw_data_, &raw_data_pos_); | 312 Utf8CharacterForward(raw_data_, &raw_data_pos_); |
270 raw_character_position_++; | 313 raw_character_position_++; |
271 DCHECK(raw_data_pos_ - old_pos <= 4); | 314 DCHECK(raw_data_pos_ - old_pos <= 4); |
272 if (raw_data_pos_ - old_pos == 4) raw_character_position_++; | 315 if (raw_data_pos_ - old_pos == 4) raw_character_position_++; |
273 } | 316 } |
274 // No surrogate pair splitting. | 317 // No surrogate pair splitting. |
275 DCHECK(raw_character_position_ == target_position); | 318 DCHECK(raw_character_position_ == target_position); |
276 } | 319 } |
277 | 320 |
278 | 321 |
| 322 unsigned ExternalStreamingStream::FillBuffer(unsigned position) { |
| 323 // Ignore "position" which is the position in the decoded data. Instead, |
| 324 // ExternalStreamingStream keeps track of the position in the raw data. |
| 325 unsigned data_in_buffer = 0; |
| 326 // Note that the UTF-8 decoder might not be able to fill the buffer |
| 327 // completely; it will typically leave the last character empty (see |
| 328 // Utf8ToUtf16CharacterStream::CopyChars). |
| 329 while (data_in_buffer < kBufferSize - 1) { |
| 330 if (current_data_ == NULL) { |
| 331 // GetSomeData will wait until the embedder has enough data. |
| 332 current_data_length_ = source_stream_->GetMoreData(¤t_data_); |
| 333 current_data_offset_ = 0; |
| 334 // Did the data stream end? |
| 335 if (current_data_length_ == 0) { |
| 336 return data_in_buffer; |
| 337 } |
| 338 } |
| 339 // Fill the buffer from current_data_. FIXME: this doesn't work if the data |
| 340 // chunk ends in the middle of an UTF-8 character. |
| 341 unsigned new_offset = 0; |
| 342 unsigned new_chars_in_buffer = CopyCharsHelper( |
| 343 buffer_ + data_in_buffer, kBufferSize - data_in_buffer, |
| 344 current_data_ + current_data_offset_, &new_offset, |
| 345 current_data_length_ - current_data_offset_, source_stream_->encoding); |
| 346 data_in_buffer += new_chars_in_buffer; |
| 347 current_data_offset_ += new_offset; |
| 348 DCHECK(data_in_buffer <= kBufferSize); |
| 349 // Did we use all the data? |
| 350 if (current_data_offset_ == current_data_length_) { |
| 351 delete[] current_data_; |
| 352 current_data_ = NULL; |
| 353 current_data_length_ = 0; |
| 354 current_data_offset_ = 0; |
| 355 } |
| 356 } |
| 357 return data_in_buffer; |
| 358 } |
| 359 |
| 360 |
279 // ---------------------------------------------------------------------------- | 361 // ---------------------------------------------------------------------------- |
280 // ExternalTwoByteStringUtf16CharacterStream | 362 // ExternalTwoByteStringUtf16CharacterStream |
281 | 363 |
282 ExternalTwoByteStringUtf16CharacterStream:: | 364 ExternalTwoByteStringUtf16CharacterStream:: |
283 ~ExternalTwoByteStringUtf16CharacterStream() { } | 365 ~ExternalTwoByteStringUtf16CharacterStream() { } |
284 | 366 |
285 | 367 |
286 ExternalTwoByteStringUtf16CharacterStream | 368 ExternalTwoByteStringUtf16CharacterStream |
287 ::ExternalTwoByteStringUtf16CharacterStream( | 369 ::ExternalTwoByteStringUtf16CharacterStream( |
288 Handle<ExternalTwoByteString> data, | 370 Handle<ExternalTwoByteString> data, |
289 int start_position, | 371 int start_position, |
290 int end_position) | 372 int end_position) |
291 : Utf16CharacterStream(), | 373 : Utf16CharacterStream(), |
292 source_(data), | 374 source_(data), |
293 raw_data_(data->GetTwoByteData(start_position)) { | 375 raw_data_(data->GetTwoByteData(start_position)) { |
294 buffer_cursor_ = raw_data_, | 376 buffer_cursor_ = raw_data_, |
295 buffer_end_ = raw_data_ + (end_position - start_position); | 377 buffer_end_ = raw_data_ + (end_position - start_position); |
296 pos_ = start_position; | 378 pos_ = start_position; |
297 } | 379 } |
298 | 380 |
299 } } // namespace v8::internal | 381 } } // namespace v8::internal |
OLD | NEW |