Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1)

Side by Side Diff: src/scanner-character-streams.cc

Issue 566553002: Add script streaming API. (Closed) Base URL: https://v8.googlecode.com/svn/branches/bleeding_edge
Patch Set: rebased Created 6 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « src/scanner-character-streams.h ('k') | test/cctest/test-api.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2011 the V8 project authors. All rights reserved. 1 // Copyright 2011 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "src/v8.h" 5 #include "src/v8.h"
6 6
7 #include "src/scanner-character-streams.h" 7 #include "src/scanner-character-streams.h"
8 8
9 #include "include/v8.h"
9 #include "src/handles.h" 10 #include "src/handles.h"
10 #include "src/unicode-inl.h" 11 #include "src/unicode-inl.h"
11 12
12 namespace v8 { 13 namespace v8 {
13 namespace internal { 14 namespace internal {
14 15
16 namespace {
17
18 unsigned CopyCharsHelper(uint16_t* dest, unsigned length, const uint8_t* src,
19 unsigned* src_pos, unsigned src_length,
20 ScriptCompiler::StreamedSource::Encoding encoding) {
21 if (encoding == ScriptCompiler::StreamedSource::UTF8) {
22 return v8::internal::Utf8ToUtf16CharacterStream::CopyChars(
23 dest, length, src, src_pos, src_length);
24 }
25
26 unsigned to_fill = length;
27 if (to_fill > src_length - *src_pos) to_fill = src_length - *src_pos;
28
29 if (encoding == ScriptCompiler::StreamedSource::ONE_BYTE) {
30 v8::internal::CopyChars<uint8_t, uint16_t>(dest, src + *src_pos, to_fill);
31 } else {
32 DCHECK(encoding == ScriptCompiler::StreamedSource::TWO_BYTE);
33 v8::internal::CopyChars<uint16_t, uint16_t>(
34 dest, reinterpret_cast<const uint16_t*>(src + *src_pos), to_fill);
35 }
36 *src_pos += to_fill;
37 return to_fill;
38 }
39
40 } // namespace
41
42
15 // ---------------------------------------------------------------------------- 43 // ----------------------------------------------------------------------------
16 // BufferedUtf16CharacterStreams 44 // BufferedUtf16CharacterStreams
17 45
18 BufferedUtf16CharacterStream::BufferedUtf16CharacterStream() 46 BufferedUtf16CharacterStream::BufferedUtf16CharacterStream()
19 : Utf16CharacterStream(), 47 : Utf16CharacterStream(),
20 pushback_limit_(NULL) { 48 pushback_limit_(NULL) {
21 // Initialize buffer as being empty. First read will fill the buffer. 49 // Initialize buffer as being empty. First read will fill the buffer.
22 buffer_cursor_ = buffer_; 50 buffer_cursor_ = buffer_;
23 buffer_end_ = buffer_; 51 buffer_end_ = buffer_;
24 } 52 }
(...skipping 113 matching lines...) Expand 10 before | Expand all | Expand 10 after
138 raw_data_length_(length), 166 raw_data_length_(length),
139 raw_data_pos_(0), 167 raw_data_pos_(0),
140 raw_character_position_(0) { 168 raw_character_position_(0) {
141 ReadBlock(); 169 ReadBlock();
142 } 170 }
143 171
144 172
145 Utf8ToUtf16CharacterStream::~Utf8ToUtf16CharacterStream() { } 173 Utf8ToUtf16CharacterStream::~Utf8ToUtf16CharacterStream() { }
146 174
147 175
176 unsigned Utf8ToUtf16CharacterStream::CopyChars(uint16_t* dest, unsigned length,
177 const byte* src,
178 unsigned* src_pos,
179 unsigned src_length) {
180 static const unibrow::uchar kMaxUtf16Character = 0xffff;
181 unsigned i = 0;
182 // Because of the UTF-16 lead and trail surrogates, we stop filling the buffer
183 // one character early (in the normal case), because we need to have at least
184 // two free spaces in the buffer to be sure that the next character will fit.
185 while (i < length - 1) {
186 if (*src_pos == src_length) break;
187 unibrow::uchar c = src[*src_pos];
188 if (c <= unibrow::Utf8::kMaxOneByteChar) {
189 *src_pos = *src_pos + 1;
190 } else {
191 c = unibrow::Utf8::CalculateValue(src + *src_pos, src_length - *src_pos,
192 src_pos);
193 }
194 if (c > kMaxUtf16Character) {
195 dest[i++] = unibrow::Utf16::LeadSurrogate(c);
196 dest[i++] = unibrow::Utf16::TrailSurrogate(c);
197 } else {
198 dest[i++] = static_cast<uc16>(c);
199 }
200 }
201 return i;
202 }
203
204
148 unsigned Utf8ToUtf16CharacterStream::BufferSeekForward(unsigned delta) { 205 unsigned Utf8ToUtf16CharacterStream::BufferSeekForward(unsigned delta) {
149 unsigned old_pos = pos_; 206 unsigned old_pos = pos_;
150 unsigned target_pos = pos_ + delta; 207 unsigned target_pos = pos_ + delta;
151 SetRawPosition(target_pos); 208 SetRawPosition(target_pos);
152 pos_ = raw_character_position_; 209 pos_ = raw_character_position_;
153 ReadBlock(); 210 ReadBlock();
154 return pos_ - old_pos; 211 return pos_ - old_pos;
155 } 212 }
156 213
157 214
158 unsigned Utf8ToUtf16CharacterStream::FillBuffer(unsigned char_position) { 215 unsigned Utf8ToUtf16CharacterStream::FillBuffer(unsigned char_position) {
159 static const unibrow::uchar kMaxUtf16Character = 0xffff;
160 SetRawPosition(char_position); 216 SetRawPosition(char_position);
161 if (raw_character_position_ != char_position) { 217 if (raw_character_position_ != char_position) {
162 // char_position was not a valid position in the stream (hit the end 218 // char_position was not a valid position in the stream (hit the end
163 // while spooling to it). 219 // while spooling to it).
164 return 0u; 220 return 0u;
165 } 221 }
166 unsigned i = 0; 222 unsigned i = CopyChars(buffer_, kBufferSize, raw_data_, &raw_data_pos_,
167 while (i < kBufferSize - 1) { 223 raw_data_length_);
168 if (raw_data_pos_ == raw_data_length_) break;
169 unibrow::uchar c = raw_data_[raw_data_pos_];
170 if (c <= unibrow::Utf8::kMaxOneByteChar) {
171 raw_data_pos_++;
172 } else {
173 c = unibrow::Utf8::CalculateValue(raw_data_ + raw_data_pos_,
174 raw_data_length_ - raw_data_pos_,
175 &raw_data_pos_);
176 }
177 if (c > kMaxUtf16Character) {
178 buffer_[i++] = unibrow::Utf16::LeadSurrogate(c);
179 buffer_[i++] = unibrow::Utf16::TrailSurrogate(c);
180 } else {
181 buffer_[i++] = static_cast<uc16>(c);
182 }
183 }
184 raw_character_position_ = char_position + i; 224 raw_character_position_ = char_position + i;
185 return i; 225 return i;
186 } 226 }
187 227
188 228
189 static const byte kUtf8MultiByteMask = 0xC0; 229 static const byte kUtf8MultiByteMask = 0xC0;
190 static const byte kUtf8MultiByteCharFollower = 0x80; 230 static const byte kUtf8MultiByteCharFollower = 0x80;
191 231
192 232
193 #ifdef DEBUG 233 #ifdef DEBUG
(...skipping 75 matching lines...) Expand 10 before | Expand all | Expand 10 after
269 Utf8CharacterForward(raw_data_, &raw_data_pos_); 309 Utf8CharacterForward(raw_data_, &raw_data_pos_);
270 raw_character_position_++; 310 raw_character_position_++;
271 DCHECK(raw_data_pos_ - old_pos <= 4); 311 DCHECK(raw_data_pos_ - old_pos <= 4);
272 if (raw_data_pos_ - old_pos == 4) raw_character_position_++; 312 if (raw_data_pos_ - old_pos == 4) raw_character_position_++;
273 } 313 }
274 // No surrogate pair splitting. 314 // No surrogate pair splitting.
275 DCHECK(raw_character_position_ == target_position); 315 DCHECK(raw_character_position_ == target_position);
276 } 316 }
277 317
278 318
319 unsigned ExternalStreamingStream::FillBuffer(unsigned position) {
320 // Ignore "position" which is the position in the decoded data. Instead,
321 // ExternalStreamingStream keeps track of the position in the raw data.
322 unsigned data_in_buffer = 0;
323 // Note that the UTF-8 decoder might not be able to fill the buffer
324 // completely; it will typically leave the last character empty (see
325 // Utf8ToUtf16CharacterStream::CopyChars).
326 while (data_in_buffer < kBufferSize - 1) {
327 if (current_data_ == NULL) {
328 // GetSomeData will wait until the embedder has enough data. Here's an
329 // interface between the API which uses size_t (which is the correct type
330 // here) and the internal parts which use unsigned. TODO(marja): make the
331 // internal parts use size_t too.
332 current_data_length_ =
333 static_cast<unsigned>(source_stream_->GetMoreData(&current_data_));
334 current_data_offset_ = 0;
335 bool data_ends = current_data_length_ == 0;
336
337 // A caveat: a data chunk might end with bytes from an incomplete UTF-8
338 // character (the rest of the bytes will be in the next chunk).
339 if (encoding_ == ScriptCompiler::StreamedSource::UTF8) {
340 HandleUtf8SplitCharacters(&data_in_buffer);
341 if (!data_ends && current_data_offset_ == current_data_length_) {
342 // The data stream didn't end, but we used all the data in the
343 // chunk. This will only happen when the chunk was really small. We
344 // don't handle the case where a UTF-8 character is split over several
345 // chunks; in that case V8 won't crash, but it will be a parse error.
346 delete[] current_data_;
347 current_data_ = NULL;
348 current_data_length_ = 0;
349 current_data_offset_ = 0;
350 continue; // Request a new chunk.
351 }
352 }
353
354 // Did the data stream end?
355 if (data_ends) {
356 DCHECK(utf8_split_char_buffer_length_ == 0);
357 return data_in_buffer;
358 }
359 }
360
361 // Fill the buffer from current_data_.
362 unsigned new_offset = 0;
363 unsigned new_chars_in_buffer =
364 CopyCharsHelper(buffer_ + data_in_buffer, kBufferSize - data_in_buffer,
365 current_data_ + current_data_offset_, &new_offset,
366 current_data_length_ - current_data_offset_, encoding_);
367 data_in_buffer += new_chars_in_buffer;
368 current_data_offset_ += new_offset;
369 DCHECK(data_in_buffer <= kBufferSize);
370
371 // Did we use all the data in the data chunk?
372 if (current_data_offset_ == current_data_length_) {
373 delete[] current_data_;
374 current_data_ = NULL;
375 current_data_length_ = 0;
376 current_data_offset_ = 0;
377 }
378 }
379 return data_in_buffer;
380 }
381
382 void ExternalStreamingStream::HandleUtf8SplitCharacters(
383 unsigned* data_in_buffer) {
384 // First check if we have leftover data from the last chunk.
385 unibrow::uchar c;
386 if (utf8_split_char_buffer_length_ > 0) {
387 // Move the bytes which are part of the split character (which started in
388 // the previous chunk) into utf8_split_char_buffer_.
389 while (current_data_offset_ < current_data_length_ &&
390 utf8_split_char_buffer_length_ < 4 &&
391 (c = current_data_[current_data_offset_]) >
392 unibrow::Utf8::kMaxOneByteChar) {
393 utf8_split_char_buffer_[utf8_split_char_buffer_length_] = c;
394 ++utf8_split_char_buffer_length_;
395 ++current_data_offset_;
396 }
397
398 // Convert the data in utf8_split_char_buffer_.
399 unsigned new_offset = 0;
400 unsigned new_chars_in_buffer =
401 CopyCharsHelper(buffer_ + *data_in_buffer,
402 kBufferSize - *data_in_buffer, utf8_split_char_buffer_,
403 &new_offset, utf8_split_char_buffer_length_, encoding_);
404 *data_in_buffer += new_chars_in_buffer;
405 // Make sure we used all the data.
406 DCHECK(new_offset == utf8_split_char_buffer_length_);
407 DCHECK(*data_in_buffer <= kBufferSize);
408
409 utf8_split_char_buffer_length_ = 0;
410 }
411
412 // Move bytes which are part of an incomplete character from the end of the
413 // current chunk to utf8_split_char_buffer_. They will be converted when the
414 // next data chunk arrives.
415 while (current_data_length_ > current_data_offset_ &&
416 (c = current_data_[current_data_length_ - 1]) >
417 unibrow::Utf8::kMaxOneByteChar) {
418 --current_data_length_;
419 ++utf8_split_char_buffer_length_;
420 }
421 for (unsigned i = 0; i < utf8_split_char_buffer_length_; ++i) {
422 utf8_split_char_buffer_[i] = current_data_[current_data_length_ + i];
423 }
424 }
425
426
279 // ---------------------------------------------------------------------------- 427 // ----------------------------------------------------------------------------
280 // ExternalTwoByteStringUtf16CharacterStream 428 // ExternalTwoByteStringUtf16CharacterStream
281 429
282 ExternalTwoByteStringUtf16CharacterStream:: 430 ExternalTwoByteStringUtf16CharacterStream::
283 ~ExternalTwoByteStringUtf16CharacterStream() { } 431 ~ExternalTwoByteStringUtf16CharacterStream() { }
284 432
285 433
286 ExternalTwoByteStringUtf16CharacterStream 434 ExternalTwoByteStringUtf16CharacterStream
287 ::ExternalTwoByteStringUtf16CharacterStream( 435 ::ExternalTwoByteStringUtf16CharacterStream(
288 Handle<ExternalTwoByteString> data, 436 Handle<ExternalTwoByteString> data,
289 int start_position, 437 int start_position,
290 int end_position) 438 int end_position)
291 : Utf16CharacterStream(), 439 : Utf16CharacterStream(),
292 source_(data), 440 source_(data),
293 raw_data_(data->GetTwoByteData(start_position)) { 441 raw_data_(data->GetTwoByteData(start_position)) {
294 buffer_cursor_ = raw_data_, 442 buffer_cursor_ = raw_data_,
295 buffer_end_ = raw_data_ + (end_position - start_position); 443 buffer_end_ = raw_data_ + (end_position - start_position);
296 pos_ = start_position; 444 pos_ = start_position;
297 } 445 }
298 446
299 } } // namespace v8::internal 447 } } // namespace v8::internal
OLDNEW
« no previous file with comments | « src/scanner-character-streams.h ('k') | test/cctest/test-api.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698