src/scanner-character-streams.cc - Issue 366153002: Add script streaming API

Side by Side Diff: src/scanner-character-streams.cc

Issue 366153002: Add script streaming API (Closed) Base URL: https://v8.googlecode.com/svn/branches/bleeding_edge

Patch Set: rebased again? Created 6 years, 3 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 // Copyright 2011 the V8 project authors. All rights reserved.	1 // Copyright 2011 the V8 project authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 #include "src/v8.h"	5 #include "src/v8.h"

6	6

7 #include "src/scanner-character-streams.h"	7 #include "src/scanner-character-streams.h"

8	8

	9 #include "include/v8.h"

9 #include "src/handles.h"	10 #include "src/handles.h"

10 #include "src/unicode-inl.h"	11 #include "src/unicode-inl.h"

11	12

12 namespace v8 {	13 namespace v8 {

13 namespace internal {	14 namespace internal {

14	15

	16 namespace {

	17

	18 unsigned CopyCharsHelper(uint16_t* dest, unsigned length, const uint8_t* src,

	19 unsigned* src_pos, unsigned src_length,

	20 ScriptCompiler::StreamedSource::Encoding encoding) {

	21 if (encoding == ScriptCompiler::StreamedSource::UTF8) {

	22 return v8::internal::Utf8ToUtf16CharacterStream::CopyChars(

	23 dest, length, src, src_pos, src_length);

	24 }

	25

	26 unsigned to_fill = length;

	27 if (to_fill > src_length - src_pos) to_fill = src_length - src_pos;

	28

	29 if (encoding == ScriptCompiler::StreamedSource::ONE_BYTE) {

	30 v8::internal::CopyChars<uint8_t, uint16_t>(dest, src + *src_pos, to_fill);

	31 } else {

	32 DCHECK(encoding == ScriptCompiler::StreamedSource::TWO_BYTE);

	33 v8::internal::CopyChars<uint16_t, uint16_t>(

	34 dest, reinterpret_cast<const uint16_t>(src + src_pos), to_fill);

	35 }

	36 *src_pos += to_fill;

	37 return to_fill;

	38 }

	39

	40 } // namespace

	41

	42

15 // ----------------------------------------------------------------------------	43 // ----------------------------------------------------------------------------

16 // BufferedUtf16CharacterStreams	44 // BufferedUtf16CharacterStreams

17	45

18 BufferedUtf16CharacterStream::BufferedUtf16CharacterStream()	46 BufferedUtf16CharacterStream::BufferedUtf16CharacterStream()

19 : Utf16CharacterStream(),	47 : Utf16CharacterStream(),

20 pushback_limit_(NULL) {	48 pushback_limit_(NULL) {

21 // Initialize buffer as being empty. First read will fill the buffer.	49 // Initialize buffer as being empty. First read will fill the buffer.

22 buffer_cursor_ = buffer_;	50 buffer_cursor_ = buffer_;

23 buffer_end_ = buffer_;	51 buffer_end_ = buffer_;

24 }	52 }

(...skipping 113 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
138 raw_data_length_(length),	166 raw_data_length_(length),

139 raw_data_pos_(0),	167 raw_data_pos_(0),

140 raw_character_position_(0) {	168 raw_character_position_(0) {

141 ReadBlock();	169 ReadBlock();

142 }	170 }

143	171

144	172

145 Utf8ToUtf16CharacterStream::~Utf8ToUtf16CharacterStream() { }	173 Utf8ToUtf16CharacterStream::~Utf8ToUtf16CharacterStream() { }

146	174

147	175

	176 unsigned Utf8ToUtf16CharacterStream::CopyChars(uint16_t* dest, unsigned length,

	177 const byte* src,

	178 unsigned* src_pos,

	179 unsigned src_length) {

	180 static const unibrow::uchar kMaxUtf16Character = 0xffff;

	181 unsigned i = 0;

	182 // Because of the UTF-16 lead and trail surrogates, we stop filling the buffer

	183 // one character early (in the normal case), because we need to have at least

	184 // two free spaces in the buffer to be sure that the next character will fit.

	185 while (i < length - 1) {

	186 if (*src_pos == src_length) break;

	187 unibrow::uchar c = src[*src_pos];

	188 if (c <= unibrow::Utf8::kMaxOneByteChar) {

	189 src_pos = src_pos + 1;

	190 } else {

	191 c = unibrow::Utf8::CalculateValue(src + src_pos, src_length - src_pos,

	192 src_pos);

	193 }

	194 if (c > kMaxUtf16Character) {

	195 dest[i++] = unibrow::Utf16::LeadSurrogate(c);

	196 dest[i++] = unibrow::Utf16::TrailSurrogate(c);

	197 } else {

	198 dest[i++] = static_cast<uc16>(c);

	199 }

	200 }

	201 return i;

	202 }

	203

	204

148 unsigned Utf8ToUtf16CharacterStream::BufferSeekForward(unsigned delta) {	205 unsigned Utf8ToUtf16CharacterStream::BufferSeekForward(unsigned delta) {

149 unsigned old_pos = pos_;	206 unsigned old_pos = pos_;

150 unsigned target_pos = pos_ + delta;	207 unsigned target_pos = pos_ + delta;

151 SetRawPosition(target_pos);	208 SetRawPosition(target_pos);

152 pos_ = raw_character_position_;	209 pos_ = raw_character_position_;

153 ReadBlock();	210 ReadBlock();

154 return pos_ - old_pos;	211 return pos_ - old_pos;

155 }	212 }

156	213

157	214

158 unsigned Utf8ToUtf16CharacterStream::FillBuffer(unsigned char_position) {	215 unsigned Utf8ToUtf16CharacterStream::FillBuffer(unsigned char_position) {

159 static const unibrow::uchar kMaxUtf16Character = 0xffff;

160 SetRawPosition(char_position);	216 SetRawPosition(char_position);

161 if (raw_character_position_ != char_position) {	217 if (raw_character_position_ != char_position) {

162 // char_position was not a valid position in the stream (hit the end	218 // char_position was not a valid position in the stream (hit the end

163 // while spooling to it).	219 // while spooling to it).

164 return 0u;	220 return 0u;

165 }	221 }

166 unsigned i = 0;	222 unsigned i = CopyChars(buffer_, kBufferSize, raw_data_, &raw_data_pos_,

167 while (i < kBufferSize - 1) {	223 raw_data_length_);

168 if (raw_data_pos_ == raw_data_length_) break;

169 unibrow::uchar c = raw_data_[raw_data_pos_];

170 if (c <= unibrow::Utf8::kMaxOneByteChar) {

171 raw_data_pos_++;

172 } else {

173 c = unibrow::Utf8::CalculateValue(raw_data_ + raw_data_pos_,

174 raw_data_length_ - raw_data_pos_,

175 &raw_data_pos_);

176 }

177 if (c > kMaxUtf16Character) {

178 buffer_[i++] = unibrow::Utf16::LeadSurrogate(c);

179 buffer_[i++] = unibrow::Utf16::TrailSurrogate(c);

180 } else {

181 buffer_[i++] = static_cast<uc16>(c);

182 }

183 }

184 raw_character_position_ = char_position + i;	224 raw_character_position_ = char_position + i;

185 return i;	225 return i;

186 }	226 }

187	227

188	228

189 static const byte kUtf8MultiByteMask = 0xC0;	229 static const byte kUtf8MultiByteMask = 0xC0;

190 static const byte kUtf8MultiByteCharFollower = 0x80;	230 static const byte kUtf8MultiByteCharFollower = 0x80;

191	231

192	232

193 #ifdef DEBUG	233 #ifdef DEBUG

(...skipping 75 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
269 Utf8CharacterForward(raw_data_, &raw_data_pos_);	309 Utf8CharacterForward(raw_data_, &raw_data_pos_);

270 raw_character_position_++;	310 raw_character_position_++;

271 DCHECK(raw_data_pos_ - old_pos <= 4);	311 DCHECK(raw_data_pos_ - old_pos <= 4);

272 if (raw_data_pos_ - old_pos == 4) raw_character_position_++;	312 if (raw_data_pos_ - old_pos == 4) raw_character_position_++;

273 }	313 }

274 // No surrogate pair splitting.	314 // No surrogate pair splitting.

275 DCHECK(raw_character_position_ == target_position);	315 DCHECK(raw_character_position_ == target_position);

276 }	316 }

277	317

278	318

	319 unsigned ExternalStreamingStream::FillBuffer(unsigned position) {

	320 // Ignore "position" which is the position in the decoded data. Instead,

	321 // ExternalStreamingStream keeps track of the position in the raw data.

	322 unsigned data_in_buffer = 0;

	323 // Note that the UTF-8 decoder might not be able to fill the buffer

	324 // completely; it will typically leave the last character empty (see

	325 // Utf8ToUtf16CharacterStream::CopyChars).

	326 while (data_in_buffer < kBufferSize - 1) {

	327 if (current_data_ == NULL) {

	328 // GetSomeData will wait until the embedder has enough data.

	329 current_data_length_ = source_stream_->GetMoreData(&current_data_);

	330 current_data_offset_ = 0;

	331 bool data_ends = current_data_length_ == 0;

	332

	333 // A caveat: a data chunk might end with bytes from an incomplete UTF-8

	334 // character (the rest of the bytes will be in the next chunk).

	335 if (encoding_ == ScriptCompiler::StreamedSource::UTF8) {

	336 HandleUtf8SplitCharacters(&data_in_buffer);

	337 if (!data_ends && current_data_offset_ == current_data_length_) {

	338 // The data stream didn't end, but we used all the data in the

	339 // chunk. This will only happen when the chunk was really small. We

	340 // don't handle the case where a UTF-8 character is split over several

	341 // chunks; in that case V8 won't crash, but it will be a parse error.

	342 delete[] current_data_;

	343 current_data_ = NULL;

	344 current_data_length_ = 0;

	345 current_data_offset_ = 0;

	346 continue; // Request a new chunk.

	347 }

	348 }

	349

	350 // Did the data stream end?

	351 if (data_ends) {

	352 DCHECK(utf8_split_char_buffer_length_ == 0);

	353 return data_in_buffer;

	354 }

	355 }

	356

	357 // Fill the buffer from current_data_.

	358 unsigned new_offset = 0;

	359 unsigned new_chars_in_buffer =

	360 CopyCharsHelper(buffer_ + data_in_buffer, kBufferSize - data_in_buffer,

	361 current_data_ + current_data_offset_, &new_offset,

	362 current_data_length_ - current_data_offset_, encoding_);

	363 data_in_buffer += new_chars_in_buffer;

	364 current_data_offset_ += new_offset;

	365 DCHECK(data_in_buffer <= kBufferSize);

	366

	367 // Did we use all the data in the data chunk?

	368 if (current_data_offset_ == current_data_length_) {

	369 delete[] current_data_;

	370 current_data_ = NULL;

	371 current_data_length_ = 0;

	372 current_data_offset_ = 0;

	373 }

	374 }

	375 return data_in_buffer;

	376 }

	377

	378 void ExternalStreamingStream::HandleUtf8SplitCharacters(

	379 unsigned* data_in_buffer) {

	380 // First check if we have leftover data from the last chunk.

	381 unibrow::uchar c;

	382 if (utf8_split_char_buffer_length_ > 0) {

	383 // Move the bytes which are part of the split character (which started in

	384 // the previous chunk) into utf8_split_char_buffer_.

	385 while (current_data_offset_ < current_data_length_ &&

	386 utf8_split_char_buffer_length_ < 4 &&

	387 (c = current_data_[current_data_offset_]) >

	388 unibrow::Utf8::kMaxOneByteChar) {

	389 utf8_split_char_buffer_[utf8_split_char_buffer_length_] = c;

	390 ++utf8_split_char_buffer_length_;

	391 ++current_data_offset_;

	392 }

	393

	394 // Convert the data in utf8_split_char_buffer_.

	395 unsigned new_offset = 0;

	396 unsigned new_chars_in_buffer =

	397 CopyCharsHelper(buffer_ + *data_in_buffer,

	398 kBufferSize - *data_in_buffer, utf8_split_char_buffer_,

	399 &new_offset, utf8_split_char_buffer_length_, encoding_);

	400 *data_in_buffer += new_chars_in_buffer;

	401 // Make sure we used all the data.

	402 DCHECK(new_offset == utf8_split_char_buffer_length_);

	403 DCHECK(*data_in_buffer <= kBufferSize);

	404

	405 utf8_split_char_buffer_length_ = 0;

	406 }

	407

	408 // Move bytes which are part of an incomplete character from the end of the

	409 // current chunk to utf8_split_char_buffer_. They will be converted when the

	410 // next data chunk arrives.

	411 while (current_data_length_ > current_data_offset_ &&

	412 (c = current_data_[current_data_length_ - 1]) >

	413 unibrow::Utf8::kMaxOneByteChar) {

	414 --current_data_length_;

	415 ++utf8_split_char_buffer_length_;

	416 }

	417 for (unsigned i = 0; i < utf8_split_char_buffer_length_; ++i) {

	418 utf8_split_char_buffer_[i] = current_data_[current_data_length_ + i];

	419 }

	420 }

	421

	422

279 // ----------------------------------------------------------------------------	423 // ----------------------------------------------------------------------------

280 // ExternalTwoByteStringUtf16CharacterStream	424 // ExternalTwoByteStringUtf16CharacterStream

281	425

282 ExternalTwoByteStringUtf16CharacterStream::	426 ExternalTwoByteStringUtf16CharacterStream::

283 ~ExternalTwoByteStringUtf16CharacterStream() { }	427 ~ExternalTwoByteStringUtf16CharacterStream() { }

284	428

285	429

286 ExternalTwoByteStringUtf16CharacterStream	430 ExternalTwoByteStringUtf16CharacterStream

287 ::ExternalTwoByteStringUtf16CharacterStream(	431 ::ExternalTwoByteStringUtf16CharacterStream(

288 Handle<ExternalTwoByteString> data,	432 Handle<ExternalTwoByteString> data,

289 int start_position,	433 int start_position,

290 int end_position)	434 int end_position)

291 : Utf16CharacterStream(),	435 : Utf16CharacterStream(),

292 source_(data),	436 source_(data),

293 raw_data_(data->GetTwoByteData(start_position)) {	437 raw_data_(data->GetTwoByteData(start_position)) {

294 buffer_cursor_ = raw_data_,	438 buffer_cursor_ = raw_data_,

295 buffer_end_ = raw_data_ + (end_position - start_position);	439 buffer_end_ = raw_data_ + (end_position - start_position);

296 pos_ = start_position;	440 pos_ = start_position;

297 }	441 }

298	442

299 } } // namespace v8::internal	443 } } // namespace v8::internal

OLD	NEW

« no previous file with comments | « src/scanner-character-streams.h ('k') | test/cctest/test-api.cc » ('j') | no next file with comments »