Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(215)

Side by Side Diff: src/scanner-character-streams.cc

Issue 366153002: Add script streaming API (Closed) Base URL: https://v8.googlecode.com/svn/branches/bleeding_edge
Patch Set: rebased again? Created 6 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « src/scanner-character-streams.h ('k') | test/cctest/test-api.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2011 the V8 project authors. All rights reserved. 1 // Copyright 2011 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "src/v8.h" 5 #include "src/v8.h"
6 6
7 #include "src/scanner-character-streams.h" 7 #include "src/scanner-character-streams.h"
8 8
9 #include "include/v8.h"
9 #include "src/handles.h" 10 #include "src/handles.h"
10 #include "src/unicode-inl.h" 11 #include "src/unicode-inl.h"
11 12
12 namespace v8 { 13 namespace v8 {
13 namespace internal { 14 namespace internal {
14 15
16 namespace {
17
18 unsigned CopyCharsHelper(uint16_t* dest, unsigned length, const uint8_t* src,
19 unsigned* src_pos, unsigned src_length,
20 ScriptCompiler::StreamedSource::Encoding encoding) {
21 if (encoding == ScriptCompiler::StreamedSource::UTF8) {
22 return v8::internal::Utf8ToUtf16CharacterStream::CopyChars(
23 dest, length, src, src_pos, src_length);
24 }
25
26 unsigned to_fill = length;
27 if (to_fill > src_length - *src_pos) to_fill = src_length - *src_pos;
28
29 if (encoding == ScriptCompiler::StreamedSource::ONE_BYTE) {
30 v8::internal::CopyChars<uint8_t, uint16_t>(dest, src + *src_pos, to_fill);
31 } else {
32 DCHECK(encoding == ScriptCompiler::StreamedSource::TWO_BYTE);
33 v8::internal::CopyChars<uint16_t, uint16_t>(
34 dest, reinterpret_cast<const uint16_t*>(src + *src_pos), to_fill);
35 }
36 *src_pos += to_fill;
37 return to_fill;
38 }
39
40 } // namespace
41
42
15 // ---------------------------------------------------------------------------- 43 // ----------------------------------------------------------------------------
16 // BufferedUtf16CharacterStreams 44 // BufferedUtf16CharacterStreams
17 45
18 BufferedUtf16CharacterStream::BufferedUtf16CharacterStream() 46 BufferedUtf16CharacterStream::BufferedUtf16CharacterStream()
19 : Utf16CharacterStream(), 47 : Utf16CharacterStream(),
20 pushback_limit_(NULL) { 48 pushback_limit_(NULL) {
21 // Initialize buffer as being empty. First read will fill the buffer. 49 // Initialize buffer as being empty. First read will fill the buffer.
22 buffer_cursor_ = buffer_; 50 buffer_cursor_ = buffer_;
23 buffer_end_ = buffer_; 51 buffer_end_ = buffer_;
24 } 52 }
(...skipping 113 matching lines...) Expand 10 before | Expand all | Expand 10 after
138 raw_data_length_(length), 166 raw_data_length_(length),
139 raw_data_pos_(0), 167 raw_data_pos_(0),
140 raw_character_position_(0) { 168 raw_character_position_(0) {
141 ReadBlock(); 169 ReadBlock();
142 } 170 }
143 171
144 172
145 Utf8ToUtf16CharacterStream::~Utf8ToUtf16CharacterStream() { } 173 Utf8ToUtf16CharacterStream::~Utf8ToUtf16CharacterStream() { }
146 174
147 175
176 unsigned Utf8ToUtf16CharacterStream::CopyChars(uint16_t* dest, unsigned length,
177 const byte* src,
178 unsigned* src_pos,
179 unsigned src_length) {
180 static const unibrow::uchar kMaxUtf16Character = 0xffff;
181 unsigned i = 0;
182 // Because of the UTF-16 lead and trail surrogates, we stop filling the buffer
183 // one character early (in the normal case), because we need to have at least
184 // two free spaces in the buffer to be sure that the next character will fit.
185 while (i < length - 1) {
186 if (*src_pos == src_length) break;
187 unibrow::uchar c = src[*src_pos];
188 if (c <= unibrow::Utf8::kMaxOneByteChar) {
189 *src_pos = *src_pos + 1;
190 } else {
191 c = unibrow::Utf8::CalculateValue(src + *src_pos, src_length - *src_pos,
192 src_pos);
193 }
194 if (c > kMaxUtf16Character) {
195 dest[i++] = unibrow::Utf16::LeadSurrogate(c);
196 dest[i++] = unibrow::Utf16::TrailSurrogate(c);
197 } else {
198 dest[i++] = static_cast<uc16>(c);
199 }
200 }
201 return i;
202 }
203
204
148 unsigned Utf8ToUtf16CharacterStream::BufferSeekForward(unsigned delta) { 205 unsigned Utf8ToUtf16CharacterStream::BufferSeekForward(unsigned delta) {
149 unsigned old_pos = pos_; 206 unsigned old_pos = pos_;
150 unsigned target_pos = pos_ + delta; 207 unsigned target_pos = pos_ + delta;
151 SetRawPosition(target_pos); 208 SetRawPosition(target_pos);
152 pos_ = raw_character_position_; 209 pos_ = raw_character_position_;
153 ReadBlock(); 210 ReadBlock();
154 return pos_ - old_pos; 211 return pos_ - old_pos;
155 } 212 }
156 213
157 214
158 unsigned Utf8ToUtf16CharacterStream::FillBuffer(unsigned char_position) { 215 unsigned Utf8ToUtf16CharacterStream::FillBuffer(unsigned char_position) {
159 static const unibrow::uchar kMaxUtf16Character = 0xffff;
160 SetRawPosition(char_position); 216 SetRawPosition(char_position);
161 if (raw_character_position_ != char_position) { 217 if (raw_character_position_ != char_position) {
162 // char_position was not a valid position in the stream (hit the end 218 // char_position was not a valid position in the stream (hit the end
163 // while spooling to it). 219 // while spooling to it).
164 return 0u; 220 return 0u;
165 } 221 }
166 unsigned i = 0; 222 unsigned i = CopyChars(buffer_, kBufferSize, raw_data_, &raw_data_pos_,
167 while (i < kBufferSize - 1) { 223 raw_data_length_);
168 if (raw_data_pos_ == raw_data_length_) break;
169 unibrow::uchar c = raw_data_[raw_data_pos_];
170 if (c <= unibrow::Utf8::kMaxOneByteChar) {
171 raw_data_pos_++;
172 } else {
173 c = unibrow::Utf8::CalculateValue(raw_data_ + raw_data_pos_,
174 raw_data_length_ - raw_data_pos_,
175 &raw_data_pos_);
176 }
177 if (c > kMaxUtf16Character) {
178 buffer_[i++] = unibrow::Utf16::LeadSurrogate(c);
179 buffer_[i++] = unibrow::Utf16::TrailSurrogate(c);
180 } else {
181 buffer_[i++] = static_cast<uc16>(c);
182 }
183 }
184 raw_character_position_ = char_position + i; 224 raw_character_position_ = char_position + i;
185 return i; 225 return i;
186 } 226 }
187 227
188 228
189 static const byte kUtf8MultiByteMask = 0xC0; 229 static const byte kUtf8MultiByteMask = 0xC0;
190 static const byte kUtf8MultiByteCharFollower = 0x80; 230 static const byte kUtf8MultiByteCharFollower = 0x80;
191 231
192 232
193 #ifdef DEBUG 233 #ifdef DEBUG
(...skipping 75 matching lines...) Expand 10 before | Expand all | Expand 10 after
269 Utf8CharacterForward(raw_data_, &raw_data_pos_); 309 Utf8CharacterForward(raw_data_, &raw_data_pos_);
270 raw_character_position_++; 310 raw_character_position_++;
271 DCHECK(raw_data_pos_ - old_pos <= 4); 311 DCHECK(raw_data_pos_ - old_pos <= 4);
272 if (raw_data_pos_ - old_pos == 4) raw_character_position_++; 312 if (raw_data_pos_ - old_pos == 4) raw_character_position_++;
273 } 313 }
274 // No surrogate pair splitting. 314 // No surrogate pair splitting.
275 DCHECK(raw_character_position_ == target_position); 315 DCHECK(raw_character_position_ == target_position);
276 } 316 }
277 317
278 318
319 unsigned ExternalStreamingStream::FillBuffer(unsigned position) {
320 // Ignore "position" which is the position in the decoded data. Instead,
321 // ExternalStreamingStream keeps track of the position in the raw data.
322 unsigned data_in_buffer = 0;
323 // Note that the UTF-8 decoder might not be able to fill the buffer
324 // completely; it will typically leave the last character empty (see
325 // Utf8ToUtf16CharacterStream::CopyChars).
326 while (data_in_buffer < kBufferSize - 1) {
327 if (current_data_ == NULL) {
328 // GetSomeData will wait until the embedder has enough data.
329 current_data_length_ = source_stream_->GetMoreData(&current_data_);
330 current_data_offset_ = 0;
331 bool data_ends = current_data_length_ == 0;
332
333 // A caveat: a data chunk might end with bytes from an incomplete UTF-8
334 // character (the rest of the bytes will be in the next chunk).
335 if (encoding_ == ScriptCompiler::StreamedSource::UTF8) {
336 HandleUtf8SplitCharacters(&data_in_buffer);
337 if (!data_ends && current_data_offset_ == current_data_length_) {
338 // The data stream didn't end, but we used all the data in the
339 // chunk. This will only happen when the chunk was really small. We
340 // don't handle the case where a UTF-8 character is split over several
341 // chunks; in that case V8 won't crash, but it will be a parse error.
342 delete[] current_data_;
343 current_data_ = NULL;
344 current_data_length_ = 0;
345 current_data_offset_ = 0;
346 continue; // Request a new chunk.
347 }
348 }
349
350 // Did the data stream end?
351 if (data_ends) {
352 DCHECK(utf8_split_char_buffer_length_ == 0);
353 return data_in_buffer;
354 }
355 }
356
357 // Fill the buffer from current_data_.
358 unsigned new_offset = 0;
359 unsigned new_chars_in_buffer =
360 CopyCharsHelper(buffer_ + data_in_buffer, kBufferSize - data_in_buffer,
361 current_data_ + current_data_offset_, &new_offset,
362 current_data_length_ - current_data_offset_, encoding_);
363 data_in_buffer += new_chars_in_buffer;
364 current_data_offset_ += new_offset;
365 DCHECK(data_in_buffer <= kBufferSize);
366
367 // Did we use all the data in the data chunk?
368 if (current_data_offset_ == current_data_length_) {
369 delete[] current_data_;
370 current_data_ = NULL;
371 current_data_length_ = 0;
372 current_data_offset_ = 0;
373 }
374 }
375 return data_in_buffer;
376 }
377
378 void ExternalStreamingStream::HandleUtf8SplitCharacters(
379 unsigned* data_in_buffer) {
380 // First check if we have leftover data from the last chunk.
381 unibrow::uchar c;
382 if (utf8_split_char_buffer_length_ > 0) {
383 // Move the bytes which are part of the split character (which started in
384 // the previous chunk) into utf8_split_char_buffer_.
385 while (current_data_offset_ < current_data_length_ &&
386 utf8_split_char_buffer_length_ < 4 &&
387 (c = current_data_[current_data_offset_]) >
388 unibrow::Utf8::kMaxOneByteChar) {
389 utf8_split_char_buffer_[utf8_split_char_buffer_length_] = c;
390 ++utf8_split_char_buffer_length_;
391 ++current_data_offset_;
392 }
393
394 // Convert the data in utf8_split_char_buffer_.
395 unsigned new_offset = 0;
396 unsigned new_chars_in_buffer =
397 CopyCharsHelper(buffer_ + *data_in_buffer,
398 kBufferSize - *data_in_buffer, utf8_split_char_buffer_,
399 &new_offset, utf8_split_char_buffer_length_, encoding_);
400 *data_in_buffer += new_chars_in_buffer;
401 // Make sure we used all the data.
402 DCHECK(new_offset == utf8_split_char_buffer_length_);
403 DCHECK(*data_in_buffer <= kBufferSize);
404
405 utf8_split_char_buffer_length_ = 0;
406 }
407
408 // Move bytes which are part of an incomplete character from the end of the
409 // current chunk to utf8_split_char_buffer_. They will be converted when the
410 // next data chunk arrives.
411 while (current_data_length_ > current_data_offset_ &&
412 (c = current_data_[current_data_length_ - 1]) >
413 unibrow::Utf8::kMaxOneByteChar) {
414 --current_data_length_;
415 ++utf8_split_char_buffer_length_;
416 }
417 for (unsigned i = 0; i < utf8_split_char_buffer_length_; ++i) {
418 utf8_split_char_buffer_[i] = current_data_[current_data_length_ + i];
419 }
420 }
421
422
279 // ---------------------------------------------------------------------------- 423 // ----------------------------------------------------------------------------
280 // ExternalTwoByteStringUtf16CharacterStream 424 // ExternalTwoByteStringUtf16CharacterStream
281 425
282 ExternalTwoByteStringUtf16CharacterStream:: 426 ExternalTwoByteStringUtf16CharacterStream::
283 ~ExternalTwoByteStringUtf16CharacterStream() { } 427 ~ExternalTwoByteStringUtf16CharacterStream() { }
284 428
285 429
286 ExternalTwoByteStringUtf16CharacterStream 430 ExternalTwoByteStringUtf16CharacterStream
287 ::ExternalTwoByteStringUtf16CharacterStream( 431 ::ExternalTwoByteStringUtf16CharacterStream(
288 Handle<ExternalTwoByteString> data, 432 Handle<ExternalTwoByteString> data,
289 int start_position, 433 int start_position,
290 int end_position) 434 int end_position)
291 : Utf16CharacterStream(), 435 : Utf16CharacterStream(),
292 source_(data), 436 source_(data),
293 raw_data_(data->GetTwoByteData(start_position)) { 437 raw_data_(data->GetTwoByteData(start_position)) {
294 buffer_cursor_ = raw_data_, 438 buffer_cursor_ = raw_data_,
295 buffer_end_ = raw_data_ + (end_position - start_position); 439 buffer_end_ = raw_data_ + (end_position - start_position);
296 pos_ = start_position; 440 pos_ = start_position;
297 } 441 }
298 442
299 } } // namespace v8::internal 443 } } // namespace v8::internal
OLDNEW
« no previous file with comments | « src/scanner-character-streams.h ('k') | test/cctest/test-api.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698