Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(109)

Side by Side Diff: src/scanner-character-streams.cc

Issue 366153002: Add script streaming API (Closed) Base URL: https://v8.googlecode.com/svn/branches/bleeding_edge
Patch Set: cleanup Created 6 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 // Copyright 2011 the V8 project authors. All rights reserved. 1 // Copyright 2011 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "src/v8.h" 5 #include "src/v8.h"
6 6
7 #include "src/scanner-character-streams.h" 7 #include "src/scanner-character-streams.h"
8 8
9 #include "include/v8.h"
9 #include "src/handles.h" 10 #include "src/handles.h"
10 #include "src/unicode-inl.h" 11 #include "src/unicode-inl.h"
11 12
12 namespace v8 { 13 namespace v8 {
13 namespace internal { 14 namespace internal {
14 15
16 namespace {
17
18 unsigned CopyCharsHelper(
19 uint16_t* dest, unsigned length, const uint8_t* src, unsigned* src_pos,
20 unsigned src_length,
21 ScriptCompiler::ExternalSourceStream::Encoding encoding) {
22 if (encoding == ScriptCompiler::ExternalSourceStream::UTF8) {
23 return v8::internal::Utf8ToUtf16CharacterStream::CopyChars(
24 dest, length, src, src_pos, src_length);
25 }
26
27 unsigned to_fill = length;
28 if (to_fill > src_length - *src_pos) to_fill = src_length - *src_pos;
29
30 if (encoding == ScriptCompiler::ExternalSourceStream::ONE_BYTE) {
31 v8::internal::CopyChars<uint8_t, uint16_t>(dest, src + *src_pos, to_fill);
32 } else {
33 DCHECK(encoding == ScriptCompiler::ExternalSourceStream::TWO_BYTE);
34 v8::internal::CopyChars<uint16_t, uint16_t>(
35 dest, reinterpret_cast<const uint16_t*>(src + *src_pos), to_fill);
36 }
37 *src_pos += to_fill;
38 return to_fill;
39 }
40
41 } // namespace
42
43
15 // ---------------------------------------------------------------------------- 44 // ----------------------------------------------------------------------------
16 // BufferedUtf16CharacterStreams 45 // BufferedUtf16CharacterStreams
17 46
18 BufferedUtf16CharacterStream::BufferedUtf16CharacterStream() 47 BufferedUtf16CharacterStream::BufferedUtf16CharacterStream()
19 : Utf16CharacterStream(), 48 : Utf16CharacterStream(),
20 pushback_limit_(NULL) { 49 pushback_limit_(NULL) {
21 // Initialize buffer as being empty. First read will fill the buffer. 50 // Initialize buffer as being empty. First read will fill the buffer.
22 buffer_cursor_ = buffer_; 51 buffer_cursor_ = buffer_;
23 buffer_end_ = buffer_; 52 buffer_end_ = buffer_;
24 } 53 }
(...skipping 113 matching lines...) Expand 10 before | Expand all | Expand 10 after
138 raw_data_length_(length), 167 raw_data_length_(length),
139 raw_data_pos_(0), 168 raw_data_pos_(0),
140 raw_character_position_(0) { 169 raw_character_position_(0) {
141 ReadBlock(); 170 ReadBlock();
142 } 171 }
143 172
144 173
145 Utf8ToUtf16CharacterStream::~Utf8ToUtf16CharacterStream() { } 174 Utf8ToUtf16CharacterStream::~Utf8ToUtf16CharacterStream() { }
146 175
147 176
177 unsigned Utf8ToUtf16CharacterStream::CopyChars(uint16_t* dest, unsigned length,
178 const byte* src,
179 unsigned* src_pos,
180 unsigned src_length) {
181 static const unibrow::uchar kMaxUtf16Character = 0xffff;
182 unsigned i = 0;
183 // Because of the UTF-16 lead and trail surrogates, we stop filling the buffer
184 // one character early (in the normal case), because we need to have at least
185 // two free spaces in the buffer to be sure that the next character will fit.
186 while (i < length - 1) {
187 if (*src_pos == src_length) break;
188 unibrow::uchar c = src[*src_pos];
189 if (c <= unibrow::Utf8::kMaxOneByteChar) {
190 *src_pos = *src_pos + 1;
191 } else {
192 c = unibrow::Utf8::CalculateValue(src + *src_pos, src_length - *src_pos,
193 src_pos);
194 }
195 if (c > kMaxUtf16Character) {
196 dest[i++] = unibrow::Utf16::LeadSurrogate(c);
197 dest[i++] = unibrow::Utf16::TrailSurrogate(c);
198 } else {
199 dest[i++] = static_cast<uc16>(c);
200 }
201 }
202 return i;
203 }
204
205
148 unsigned Utf8ToUtf16CharacterStream::BufferSeekForward(unsigned delta) { 206 unsigned Utf8ToUtf16CharacterStream::BufferSeekForward(unsigned delta) {
149 unsigned old_pos = pos_; 207 unsigned old_pos = pos_;
150 unsigned target_pos = pos_ + delta; 208 unsigned target_pos = pos_ + delta;
151 SetRawPosition(target_pos); 209 SetRawPosition(target_pos);
152 pos_ = raw_character_position_; 210 pos_ = raw_character_position_;
153 ReadBlock(); 211 ReadBlock();
154 return pos_ - old_pos; 212 return pos_ - old_pos;
155 } 213 }
156 214
157 215
158 unsigned Utf8ToUtf16CharacterStream::FillBuffer(unsigned char_position) { 216 unsigned Utf8ToUtf16CharacterStream::FillBuffer(unsigned char_position) {
159 static const unibrow::uchar kMaxUtf16Character = 0xffff;
160 SetRawPosition(char_position); 217 SetRawPosition(char_position);
161 if (raw_character_position_ != char_position) { 218 if (raw_character_position_ != char_position) {
162 // char_position was not a valid position in the stream (hit the end 219 // char_position was not a valid position in the stream (hit the end
163 // while spooling to it). 220 // while spooling to it).
164 return 0u; 221 return 0u;
165 } 222 }
166 unsigned i = 0; 223 unsigned i = CopyChars(buffer_, kBufferSize, raw_data_, &raw_data_pos_,
167 while (i < kBufferSize - 1) { 224 raw_data_length_);
168 if (raw_data_pos_ == raw_data_length_) break;
169 unibrow::uchar c = raw_data_[raw_data_pos_];
170 if (c <= unibrow::Utf8::kMaxOneByteChar) {
171 raw_data_pos_++;
172 } else {
173 c = unibrow::Utf8::CalculateValue(raw_data_ + raw_data_pos_,
174 raw_data_length_ - raw_data_pos_,
175 &raw_data_pos_);
176 }
177 if (c > kMaxUtf16Character) {
178 buffer_[i++] = unibrow::Utf16::LeadSurrogate(c);
179 buffer_[i++] = unibrow::Utf16::TrailSurrogate(c);
180 } else {
181 buffer_[i++] = static_cast<uc16>(c);
182 }
183 }
184 raw_character_position_ = char_position + i; 225 raw_character_position_ = char_position + i;
185 return i; 226 return i;
186 } 227 }
187 228
188 229
189 static const byte kUtf8MultiByteMask = 0xC0; 230 static const byte kUtf8MultiByteMask = 0xC0;
190 static const byte kUtf8MultiByteCharFollower = 0x80; 231 static const byte kUtf8MultiByteCharFollower = 0x80;
191 232
192 233
193 #ifdef DEBUG 234 #ifdef DEBUG
(...skipping 75 matching lines...) Expand 10 before | Expand all | Expand 10 after
269 Utf8CharacterForward(raw_data_, &raw_data_pos_); 310 Utf8CharacterForward(raw_data_, &raw_data_pos_);
270 raw_character_position_++; 311 raw_character_position_++;
271 DCHECK(raw_data_pos_ - old_pos <= 4); 312 DCHECK(raw_data_pos_ - old_pos <= 4);
272 if (raw_data_pos_ - old_pos == 4) raw_character_position_++; 313 if (raw_data_pos_ - old_pos == 4) raw_character_position_++;
273 } 314 }
274 // No surrogate pair splitting. 315 // No surrogate pair splitting.
275 DCHECK(raw_character_position_ == target_position); 316 DCHECK(raw_character_position_ == target_position);
276 } 317 }
277 318
278 319
320 unsigned ExternalStreamingStream::FillBuffer(unsigned position) {
321 // Ignore "position" which is the position in the decoded data. Instead,
322 // ExternalStreamingStream keeps track of the position in the raw data.
323 unsigned data_in_buffer = 0;
324 // Note that the UTF-8 decoder might not be able to fill the buffer
325 // completely; it will typically leave the last character empty (see
326 // Utf8ToUtf16CharacterStream::CopyChars).
327 while (data_in_buffer < kBufferSize - 1) {
328 if (current_data_ == NULL) {
329 // GetSomeData will wait until the embedder has enough data.
330 current_data_length_ = source_stream_->GetMoreData(&current_data_);
331 current_data_offset_ = 0;
332
333 // A caveat: a data chunk might end with bytes from an incomplete UTF-8
334 // character (the rest of the bytes will be in the next chunk).
335 if (source_stream_->encoding ==
336 ScriptCompiler::ExternalSourceStream::UTF8) {
337 bool data_ends = current_data_length_ == 0;
338 HandleUtf8SplitCharacters(&data_in_buffer);
339 // Did we use all the data in the data chunk? Note that this would mean
340 // the chunk was really small. We don't handle the case where a UTF-8
341 // character is split over several chunks; in that case V8 won't crash,
342 // but it will be a parse error.
343 if (!data_ends && current_data_offset_ == current_data_length_) {
344 delete[] current_data_;
345 current_data_ = NULL;
346 current_data_length_ = 0;
347 current_data_offset_ = 0;
348 continue;
349 }
350 }
351
352 // Did the data stream end?
353 if (current_data_length_ == 0 && utf8_split_char_buffer_length_ == 0) {
354 return data_in_buffer;
355 }
356 }
357
358 // Fill the buffer from current_data_.
359 unsigned new_offset = 0;
360 unsigned new_chars_in_buffer = CopyCharsHelper(
361 buffer_ + data_in_buffer, kBufferSize - data_in_buffer,
362 current_data_ + current_data_offset_, &new_offset,
363 current_data_length_ - current_data_offset_, source_stream_->encoding);
364 data_in_buffer += new_chars_in_buffer;
365 current_data_offset_ += new_offset;
366 DCHECK(data_in_buffer <= kBufferSize);
367
368 // Did we use all the data in the data chunk?
369 if (current_data_offset_ == current_data_length_) {
370 delete[] current_data_;
371 current_data_ = NULL;
372 current_data_length_ = 0;
373 current_data_offset_ = 0;
374 }
375 }
376 return data_in_buffer;
377 }
378
379 void ExternalStreamingStream::HandleUtf8SplitCharacters(
380 unsigned* data_in_buffer) {
381 // First check if we have leftover data from the last chunk.
382 unibrow::uchar c;
383 if (utf8_split_char_buffer_length_ > 0) {
384 // Move the bytes which are part of the split character (which started in
385 // the previous chunk) into utf8_split_char_buffer_.
386 while (current_data_offset_ < current_data_length_ &&
387 utf8_split_char_buffer_length_ < 4 &&
388 (c = current_data_[current_data_offset_]) >
389 unibrow::Utf8::kMaxOneByteChar) {
390 utf8_split_char_buffer_[utf8_split_char_buffer_length_] = c;
391 ++utf8_split_char_buffer_length_;
392 ++current_data_offset_;
393 }
394
395 // Convert the data in utf8_split_char_buffer_.
396 unsigned new_offset = 0;
397 unsigned new_chars_in_buffer = CopyCharsHelper(
398 buffer_ + *data_in_buffer, kBufferSize - *data_in_buffer,
399 utf8_split_char_buffer_, &new_offset, utf8_split_char_buffer_length_,
400 source_stream_->encoding);
401 *data_in_buffer += new_chars_in_buffer;
402 // Make sure we used all the data.
403 DCHECK(new_offset == utf8_split_char_buffer_length_);
404 DCHECK(*data_in_buffer <= kBufferSize);
405
406 utf8_split_char_buffer_length_ = 0;
407 }
408
409 // Move bytes which are part of an incomplete character from the end of the
410 // current chunk to utf8_split_char_buffer_. They will be converted when the
411 // next data chunk arrives.
412 while (current_data_length_ > current_data_offset_ &&
413 (c = current_data_[current_data_length_ - 1]) >
414 unibrow::Utf8::kMaxOneByteChar) {
415 --current_data_length_;
416 ++utf8_split_char_buffer_length_;
417 }
418 for (unsigned i = 0; i < utf8_split_char_buffer_length_; ++i) {
419 utf8_split_char_buffer_[i] = current_data_[current_data_length_ + i];
420 }
421 }
422
423
279 // ---------------------------------------------------------------------------- 424 // ----------------------------------------------------------------------------
280 // ExternalTwoByteStringUtf16CharacterStream 425 // ExternalTwoByteStringUtf16CharacterStream
281 426
282 ExternalTwoByteStringUtf16CharacterStream:: 427 ExternalTwoByteStringUtf16CharacterStream::
283 ~ExternalTwoByteStringUtf16CharacterStream() { } 428 ~ExternalTwoByteStringUtf16CharacterStream() { }
284 429
285 430
286 ExternalTwoByteStringUtf16CharacterStream 431 ExternalTwoByteStringUtf16CharacterStream
287 ::ExternalTwoByteStringUtf16CharacterStream( 432 ::ExternalTwoByteStringUtf16CharacterStream(
288 Handle<ExternalTwoByteString> data, 433 Handle<ExternalTwoByteString> data,
289 int start_position, 434 int start_position,
290 int end_position) 435 int end_position)
291 : Utf16CharacterStream(), 436 : Utf16CharacterStream(),
292 source_(data), 437 source_(data),
293 raw_data_(data->GetTwoByteData(start_position)) { 438 raw_data_(data->GetTwoByteData(start_position)) {
294 buffer_cursor_ = raw_data_, 439 buffer_cursor_ = raw_data_,
295 buffer_end_ = raw_data_ + (end_position - start_position); 440 buffer_end_ = raw_data_ + (end_position - start_position);
296 pos_ = start_position; 441 pos_ = start_position;
297 } 442 }
298 443
299 } } // namespace v8::internal 444 } } // namespace v8::internal
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698