src/scanner-character-streams.cc - Issue 566553002: Add script streaming API.

Side by Side Diff: src/scanner-character-streams.cc

Issue 566553002: Add script streaming API. (Closed) Base URL: https://v8.googlecode.com/svn/branches/bleeding_edge

Patch Set: rebased Created 6 years, 3 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 // Copyright 2011 the V8 project authors. All rights reserved.	1 // Copyright 2011 the V8 project authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 #include "src/v8.h"	5 #include "src/v8.h"

6	6

7 #include "src/scanner-character-streams.h"	7 #include "src/scanner-character-streams.h"

8	8

	9 #include "include/v8.h"

9 #include "src/handles.h"	10 #include "src/handles.h"

10 #include "src/unicode-inl.h"	11 #include "src/unicode-inl.h"

11	12

12 namespace v8 {	13 namespace v8 {

13 namespace internal {	14 namespace internal {

14	15

	16 namespace {

	17

	18 unsigned CopyCharsHelper(uint16_t* dest, unsigned length, const uint8_t* src,

	19 unsigned* src_pos, unsigned src_length,

	20 ScriptCompiler::StreamedSource::Encoding encoding) {

	21 if (encoding == ScriptCompiler::StreamedSource::UTF8) {

	22 return v8::internal::Utf8ToUtf16CharacterStream::CopyChars(

	23 dest, length, src, src_pos, src_length);

	24 }

	25

	26 unsigned to_fill = length;

	27 if (to_fill > src_length - src_pos) to_fill = src_length - src_pos;

	28

	29 if (encoding == ScriptCompiler::StreamedSource::ONE_BYTE) {

	30 v8::internal::CopyChars<uint8_t, uint16_t>(dest, src + *src_pos, to_fill);

	31 } else {

	32 DCHECK(encoding == ScriptCompiler::StreamedSource::TWO_BYTE);

	33 v8::internal::CopyChars<uint16_t, uint16_t>(

	34 dest, reinterpret_cast<const uint16_t>(src + src_pos), to_fill);

	35 }

	36 *src_pos += to_fill;

	37 return to_fill;

	38 }

	39

	40 } // namespace

	41

	42

15 // ----------------------------------------------------------------------------	43 // ----------------------------------------------------------------------------

16 // BufferedUtf16CharacterStreams	44 // BufferedUtf16CharacterStreams

17	45

18 BufferedUtf16CharacterStream::BufferedUtf16CharacterStream()	46 BufferedUtf16CharacterStream::BufferedUtf16CharacterStream()

19 : Utf16CharacterStream(),	47 : Utf16CharacterStream(),

20 pushback_limit_(NULL) {	48 pushback_limit_(NULL) {

21 // Initialize buffer as being empty. First read will fill the buffer.	49 // Initialize buffer as being empty. First read will fill the buffer.

22 buffer_cursor_ = buffer_;	50 buffer_cursor_ = buffer_;

23 buffer_end_ = buffer_;	51 buffer_end_ = buffer_;

24 }	52 }

(...skipping 113 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
138 raw_data_length_(length),	166 raw_data_length_(length),

139 raw_data_pos_(0),	167 raw_data_pos_(0),

140 raw_character_position_(0) {	168 raw_character_position_(0) {

141 ReadBlock();	169 ReadBlock();

142 }	170 }

143	171

144	172

145 Utf8ToUtf16CharacterStream::~Utf8ToUtf16CharacterStream() { }	173 Utf8ToUtf16CharacterStream::~Utf8ToUtf16CharacterStream() { }

146	174

147	175

	176 unsigned Utf8ToUtf16CharacterStream::CopyChars(uint16_t* dest, unsigned length,

	177 const byte* src,

	178 unsigned* src_pos,

	179 unsigned src_length) {

	180 static const unibrow::uchar kMaxUtf16Character = 0xffff;

	181 unsigned i = 0;

	182 // Because of the UTF-16 lead and trail surrogates, we stop filling the buffer

	183 // one character early (in the normal case), because we need to have at least

	184 // two free spaces in the buffer to be sure that the next character will fit.

	185 while (i < length - 1) {

	186 if (*src_pos == src_length) break;

	187 unibrow::uchar c = src[*src_pos];

	188 if (c <= unibrow::Utf8::kMaxOneByteChar) {

	189 src_pos = src_pos + 1;

	190 } else {

	191 c = unibrow::Utf8::CalculateValue(src + src_pos, src_length - src_pos,

	192 src_pos);

	193 }

	194 if (c > kMaxUtf16Character) {

	195 dest[i++] = unibrow::Utf16::LeadSurrogate(c);

	196 dest[i++] = unibrow::Utf16::TrailSurrogate(c);

	197 } else {

	198 dest[i++] = static_cast<uc16>(c);

	199 }

	200 }

	201 return i;

	202 }

	203

	204

148 unsigned Utf8ToUtf16CharacterStream::BufferSeekForward(unsigned delta) {	205 unsigned Utf8ToUtf16CharacterStream::BufferSeekForward(unsigned delta) {

149 unsigned old_pos = pos_;	206 unsigned old_pos = pos_;

150 unsigned target_pos = pos_ + delta;	207 unsigned target_pos = pos_ + delta;

151 SetRawPosition(target_pos);	208 SetRawPosition(target_pos);

152 pos_ = raw_character_position_;	209 pos_ = raw_character_position_;

153 ReadBlock();	210 ReadBlock();

154 return pos_ - old_pos;	211 return pos_ - old_pos;

155 }	212 }

156	213

157	214

158 unsigned Utf8ToUtf16CharacterStream::FillBuffer(unsigned char_position) {	215 unsigned Utf8ToUtf16CharacterStream::FillBuffer(unsigned char_position) {

159 static const unibrow::uchar kMaxUtf16Character = 0xffff;

160 SetRawPosition(char_position);	216 SetRawPosition(char_position);

161 if (raw_character_position_ != char_position) {	217 if (raw_character_position_ != char_position) {

162 // char_position was not a valid position in the stream (hit the end	218 // char_position was not a valid position in the stream (hit the end

163 // while spooling to it).	219 // while spooling to it).

164 return 0u;	220 return 0u;

165 }	221 }

166 unsigned i = 0;	222 unsigned i = CopyChars(buffer_, kBufferSize, raw_data_, &raw_data_pos_,

167 while (i < kBufferSize - 1) {	223 raw_data_length_);

168 if (raw_data_pos_ == raw_data_length_) break;

169 unibrow::uchar c = raw_data_[raw_data_pos_];

170 if (c <= unibrow::Utf8::kMaxOneByteChar) {

171 raw_data_pos_++;

172 } else {

173 c = unibrow::Utf8::CalculateValue(raw_data_ + raw_data_pos_,

174 raw_data_length_ - raw_data_pos_,

175 &raw_data_pos_);

176 }

177 if (c > kMaxUtf16Character) {

178 buffer_[i++] = unibrow::Utf16::LeadSurrogate(c);

179 buffer_[i++] = unibrow::Utf16::TrailSurrogate(c);

180 } else {

181 buffer_[i++] = static_cast<uc16>(c);

182 }

183 }

184 raw_character_position_ = char_position + i;	224 raw_character_position_ = char_position + i;

185 return i;	225 return i;

186 }	226 }

187	227

188	228

189 static const byte kUtf8MultiByteMask = 0xC0;	229 static const byte kUtf8MultiByteMask = 0xC0;

190 static const byte kUtf8MultiByteCharFollower = 0x80;	230 static const byte kUtf8MultiByteCharFollower = 0x80;

191	231

192	232

193 #ifdef DEBUG	233 #ifdef DEBUG

(...skipping 75 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
269 Utf8CharacterForward(raw_data_, &raw_data_pos_);	309 Utf8CharacterForward(raw_data_, &raw_data_pos_);

270 raw_character_position_++;	310 raw_character_position_++;

271 DCHECK(raw_data_pos_ - old_pos <= 4);	311 DCHECK(raw_data_pos_ - old_pos <= 4);

272 if (raw_data_pos_ - old_pos == 4) raw_character_position_++;	312 if (raw_data_pos_ - old_pos == 4) raw_character_position_++;

273 }	313 }

274 // No surrogate pair splitting.	314 // No surrogate pair splitting.

275 DCHECK(raw_character_position_ == target_position);	315 DCHECK(raw_character_position_ == target_position);

276 }	316 }

277	317

278	318

	319 unsigned ExternalStreamingStream::FillBuffer(unsigned position) {

	320 // Ignore "position" which is the position in the decoded data. Instead,

	321 // ExternalStreamingStream keeps track of the position in the raw data.

	322 unsigned data_in_buffer = 0;

	323 // Note that the UTF-8 decoder might not be able to fill the buffer

	324 // completely; it will typically leave the last character empty (see

	325 // Utf8ToUtf16CharacterStream::CopyChars).

	326 while (data_in_buffer < kBufferSize - 1) {

	327 if (current_data_ == NULL) {

	328 // GetSomeData will wait until the embedder has enough data. Here's an

	329 // interface between the API which uses size_t (which is the correct type

	330 // here) and the internal parts which use unsigned. TODO(marja): make the

	331 // internal parts use size_t too.

	332 current_data_length_ =

	333 static_cast<unsigned>(source_stream_->GetMoreData(&current_data_));

	334 current_data_offset_ = 0;

	335 bool data_ends = current_data_length_ == 0;

	336

	337 // A caveat: a data chunk might end with bytes from an incomplete UTF-8

	338 // character (the rest of the bytes will be in the next chunk).

	339 if (encoding_ == ScriptCompiler::StreamedSource::UTF8) {

	340 HandleUtf8SplitCharacters(&data_in_buffer);

	341 if (!data_ends && current_data_offset_ == current_data_length_) {

	342 // The data stream didn't end, but we used all the data in the

	343 // chunk. This will only happen when the chunk was really small. We

	344 // don't handle the case where a UTF-8 character is split over several

	345 // chunks; in that case V8 won't crash, but it will be a parse error.

	346 delete[] current_data_;

	347 current_data_ = NULL;

	348 current_data_length_ = 0;

	349 current_data_offset_ = 0;

	350 continue; // Request a new chunk.

	351 }

	352 }

	353

	354 // Did the data stream end?

	355 if (data_ends) {

	356 DCHECK(utf8_split_char_buffer_length_ == 0);

	357 return data_in_buffer;

	358 }

	359 }

	360

	361 // Fill the buffer from current_data_.

	362 unsigned new_offset = 0;

	363 unsigned new_chars_in_buffer =

	364 CopyCharsHelper(buffer_ + data_in_buffer, kBufferSize - data_in_buffer,

	365 current_data_ + current_data_offset_, &new_offset,

	366 current_data_length_ - current_data_offset_, encoding_);

	367 data_in_buffer += new_chars_in_buffer;

	368 current_data_offset_ += new_offset;

	369 DCHECK(data_in_buffer <= kBufferSize);

	370

	371 // Did we use all the data in the data chunk?

	372 if (current_data_offset_ == current_data_length_) {

	373 delete[] current_data_;

	374 current_data_ = NULL;

	375 current_data_length_ = 0;

	376 current_data_offset_ = 0;

	377 }

	378 }

	379 return data_in_buffer;

	380 }

	381

	382 void ExternalStreamingStream::HandleUtf8SplitCharacters(

	383 unsigned* data_in_buffer) {

	384 // First check if we have leftover data from the last chunk.

	385 unibrow::uchar c;

	386 if (utf8_split_char_buffer_length_ > 0) {

	387 // Move the bytes which are part of the split character (which started in

	388 // the previous chunk) into utf8_split_char_buffer_.

	389 while (current_data_offset_ < current_data_length_ &&

	390 utf8_split_char_buffer_length_ < 4 &&

	391 (c = current_data_[current_data_offset_]) >

	392 unibrow::Utf8::kMaxOneByteChar) {

	393 utf8_split_char_buffer_[utf8_split_char_buffer_length_] = c;

	394 ++utf8_split_char_buffer_length_;

	395 ++current_data_offset_;

	396 }

	397

	398 // Convert the data in utf8_split_char_buffer_.

	399 unsigned new_offset = 0;

	400 unsigned new_chars_in_buffer =

	401 CopyCharsHelper(buffer_ + *data_in_buffer,

	402 kBufferSize - *data_in_buffer, utf8_split_char_buffer_,

	403 &new_offset, utf8_split_char_buffer_length_, encoding_);

	404 *data_in_buffer += new_chars_in_buffer;

	405 // Make sure we used all the data.

	406 DCHECK(new_offset == utf8_split_char_buffer_length_);

	407 DCHECK(*data_in_buffer <= kBufferSize);

	408

	409 utf8_split_char_buffer_length_ = 0;

	410 }

	411

	412 // Move bytes which are part of an incomplete character from the end of the

	413 // current chunk to utf8_split_char_buffer_. They will be converted when the

	414 // next data chunk arrives.

	415 while (current_data_length_ > current_data_offset_ &&

	416 (c = current_data_[current_data_length_ - 1]) >

	417 unibrow::Utf8::kMaxOneByteChar) {

	418 --current_data_length_;

	419 ++utf8_split_char_buffer_length_;

	420 }

	421 for (unsigned i = 0; i < utf8_split_char_buffer_length_; ++i) {

	422 utf8_split_char_buffer_[i] = current_data_[current_data_length_ + i];

	423 }

	424 }

	425

	426

279 // ----------------------------------------------------------------------------	427 // ----------------------------------------------------------------------------

280 // ExternalTwoByteStringUtf16CharacterStream	428 // ExternalTwoByteStringUtf16CharacterStream

281	429

282 ExternalTwoByteStringUtf16CharacterStream::	430 ExternalTwoByteStringUtf16CharacterStream::

283 ~ExternalTwoByteStringUtf16CharacterStream() { }	431 ~ExternalTwoByteStringUtf16CharacterStream() { }

284	432

285	433

286 ExternalTwoByteStringUtf16CharacterStream	434 ExternalTwoByteStringUtf16CharacterStream

287 ::ExternalTwoByteStringUtf16CharacterStream(	435 ::ExternalTwoByteStringUtf16CharacterStream(

288 Handle<ExternalTwoByteString> data,	436 Handle<ExternalTwoByteString> data,

289 int start_position,	437 int start_position,

290 int end_position)	438 int end_position)

291 : Utf16CharacterStream(),	439 : Utf16CharacterStream(),

292 source_(data),	440 source_(data),

293 raw_data_(data->GetTwoByteData(start_position)) {	441 raw_data_(data->GetTwoByteData(start_position)) {

294 buffer_cursor_ = raw_data_,	442 buffer_cursor_ = raw_data_,

295 buffer_end_ = raw_data_ + (end_position - start_position);	443 buffer_end_ = raw_data_ + (end_position - start_position);

296 pos_ = start_position;	444 pos_ = start_position;

297 }	445 }

298	446

299 } } // namespace v8::internal	447 } } // namespace v8::internal

OLD	NEW

« no previous file with comments | « src/scanner-character-streams.h ('k') | test/cctest/test-api.cc » ('j') | no next file with comments »