src/scanner-character-streams.cc - Issue 366153002: Add script streaming API

Side by Side Diff: src/scanner-character-streams.cc

Issue 366153002: Add script streaming API (Closed) Base URL: https://v8.googlecode.com/svn/branches/bleeding_edge

Patch Set: cleanup Created 6 years, 3 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 // Copyright 2011 the V8 project authors. All rights reserved.	1 // Copyright 2011 the V8 project authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 #include "src/v8.h"	5 #include "src/v8.h"

6	6

7 #include "src/scanner-character-streams.h"	7 #include "src/scanner-character-streams.h"

8	8

	9 #include "include/v8.h"

9 #include "src/handles.h"	10 #include "src/handles.h"

10 #include "src/unicode-inl.h"	11 #include "src/unicode-inl.h"

11	12

12 namespace v8 {	13 namespace v8 {

13 namespace internal {	14 namespace internal {

14	15

	16 namespace {

	17

	18 unsigned CopyCharsHelper(

	19 uint16_t* dest, unsigned length, const uint8_t* src, unsigned* src_pos,

	20 unsigned src_length,

	21 ScriptCompiler::ExternalSourceStream::Encoding encoding) {

	22 if (encoding == ScriptCompiler::ExternalSourceStream::UTF8) {

	23 return v8::internal::Utf8ToUtf16CharacterStream::CopyChars(

	24 dest, length, src, src_pos, src_length);

	25 }

	26

	27 unsigned to_fill = length;

	28 if (to_fill > src_length - src_pos) to_fill = src_length - src_pos;

	29

	30 if (encoding == ScriptCompiler::ExternalSourceStream::ONE_BYTE) {

	31 v8::internal::CopyChars<uint8_t, uint16_t>(dest, src + *src_pos, to_fill);

	32 } else {

	33 DCHECK(encoding == ScriptCompiler::ExternalSourceStream::TWO_BYTE);

	34 v8::internal::CopyChars<uint16_t, uint16_t>(

	35 dest, reinterpret_cast<const uint16_t>(src + src_pos), to_fill);

	36 }

	37 *src_pos += to_fill;

	38 return to_fill;

	39 }

	40

	41 } // namespace

	42

	43

15 // ----------------------------------------------------------------------------	44 // ----------------------------------------------------------------------------

16 // BufferedUtf16CharacterStreams	45 // BufferedUtf16CharacterStreams

17	46

18 BufferedUtf16CharacterStream::BufferedUtf16CharacterStream()	47 BufferedUtf16CharacterStream::BufferedUtf16CharacterStream()

19 : Utf16CharacterStream(),	48 : Utf16CharacterStream(),

20 pushback_limit_(NULL) {	49 pushback_limit_(NULL) {

21 // Initialize buffer as being empty. First read will fill the buffer.	50 // Initialize buffer as being empty. First read will fill the buffer.

22 buffer_cursor_ = buffer_;	51 buffer_cursor_ = buffer_;

23 buffer_end_ = buffer_;	52 buffer_end_ = buffer_;

24 }	53 }

(...skipping 113 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
138 raw_data_length_(length),	167 raw_data_length_(length),

139 raw_data_pos_(0),	168 raw_data_pos_(0),

140 raw_character_position_(0) {	169 raw_character_position_(0) {

141 ReadBlock();	170 ReadBlock();

142 }	171 }

143	172

144	173

145 Utf8ToUtf16CharacterStream::~Utf8ToUtf16CharacterStream() { }	174 Utf8ToUtf16CharacterStream::~Utf8ToUtf16CharacterStream() { }

146	175

147	176

	177 unsigned Utf8ToUtf16CharacterStream::CopyChars(uint16_t* dest, unsigned length,

	178 const byte* src,

	179 unsigned* src_pos,

	180 unsigned src_length) {

	181 static const unibrow::uchar kMaxUtf16Character = 0xffff;

	182 unsigned i = 0;

	183 // Because of the UTF-16 lead and trail surrogates, we stop filling the buffer

	184 // one character early (in the normal case), because we need to have at least

	185 // two free spaces in the buffer to be sure that the next character will fit.

	186 while (i < length - 1) {

	187 if (*src_pos == src_length) break;

	188 unibrow::uchar c = src[*src_pos];

	189 if (c <= unibrow::Utf8::kMaxOneByteChar) {

	190 src_pos = src_pos + 1;

	191 } else {

	192 c = unibrow::Utf8::CalculateValue(src + src_pos, src_length - src_pos,

	193 src_pos);

	194 }

	195 if (c > kMaxUtf16Character) {

	196 dest[i++] = unibrow::Utf16::LeadSurrogate(c);

	197 dest[i++] = unibrow::Utf16::TrailSurrogate(c);

	198 } else {

	199 dest[i++] = static_cast<uc16>(c);

	200 }

	201 }

	202 return i;

	203 }

	204

	205

148 unsigned Utf8ToUtf16CharacterStream::BufferSeekForward(unsigned delta) {	206 unsigned Utf8ToUtf16CharacterStream::BufferSeekForward(unsigned delta) {

149 unsigned old_pos = pos_;	207 unsigned old_pos = pos_;

150 unsigned target_pos = pos_ + delta;	208 unsigned target_pos = pos_ + delta;

151 SetRawPosition(target_pos);	209 SetRawPosition(target_pos);

152 pos_ = raw_character_position_;	210 pos_ = raw_character_position_;

153 ReadBlock();	211 ReadBlock();

154 return pos_ - old_pos;	212 return pos_ - old_pos;

155 }	213 }

156	214

157	215

158 unsigned Utf8ToUtf16CharacterStream::FillBuffer(unsigned char_position) {	216 unsigned Utf8ToUtf16CharacterStream::FillBuffer(unsigned char_position) {

159 static const unibrow::uchar kMaxUtf16Character = 0xffff;

160 SetRawPosition(char_position);	217 SetRawPosition(char_position);

161 if (raw_character_position_ != char_position) {	218 if (raw_character_position_ != char_position) {

162 // char_position was not a valid position in the stream (hit the end	219 // char_position was not a valid position in the stream (hit the end

163 // while spooling to it).	220 // while spooling to it).

164 return 0u;	221 return 0u;

165 }	222 }

166 unsigned i = 0;	223 unsigned i = CopyChars(buffer_, kBufferSize, raw_data_, &raw_data_pos_,

167 while (i < kBufferSize - 1) {	224 raw_data_length_);

168 if (raw_data_pos_ == raw_data_length_) break;

169 unibrow::uchar c = raw_data_[raw_data_pos_];

170 if (c <= unibrow::Utf8::kMaxOneByteChar) {

171 raw_data_pos_++;

172 } else {

173 c = unibrow::Utf8::CalculateValue(raw_data_ + raw_data_pos_,

174 raw_data_length_ - raw_data_pos_,

175 &raw_data_pos_);

176 }

177 if (c > kMaxUtf16Character) {

178 buffer_[i++] = unibrow::Utf16::LeadSurrogate(c);

179 buffer_[i++] = unibrow::Utf16::TrailSurrogate(c);

180 } else {

181 buffer_[i++] = static_cast<uc16>(c);

182 }

183 }

184 raw_character_position_ = char_position + i;	225 raw_character_position_ = char_position + i;

185 return i;	226 return i;

186 }	227 }

187	228

188	229

189 static const byte kUtf8MultiByteMask = 0xC0;	230 static const byte kUtf8MultiByteMask = 0xC0;

190 static const byte kUtf8MultiByteCharFollower = 0x80;	231 static const byte kUtf8MultiByteCharFollower = 0x80;

191	232

192	233

193 #ifdef DEBUG	234 #ifdef DEBUG

(...skipping 75 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
269 Utf8CharacterForward(raw_data_, &raw_data_pos_);	310 Utf8CharacterForward(raw_data_, &raw_data_pos_);

270 raw_character_position_++;	311 raw_character_position_++;

271 DCHECK(raw_data_pos_ - old_pos <= 4);	312 DCHECK(raw_data_pos_ - old_pos <= 4);

272 if (raw_data_pos_ - old_pos == 4) raw_character_position_++;	313 if (raw_data_pos_ - old_pos == 4) raw_character_position_++;

273 }	314 }

274 // No surrogate pair splitting.	315 // No surrogate pair splitting.

275 DCHECK(raw_character_position_ == target_position);	316 DCHECK(raw_character_position_ == target_position);

276 }	317 }

277	318

278	319

	320 unsigned ExternalStreamingStream::FillBuffer(unsigned position) {

	321 // Ignore "position" which is the position in the decoded data. Instead,

	322 // ExternalStreamingStream keeps track of the position in the raw data.

	323 unsigned data_in_buffer = 0;

	324 // Note that the UTF-8 decoder might not be able to fill the buffer

	325 // completely; it will typically leave the last character empty (see

	326 // Utf8ToUtf16CharacterStream::CopyChars).

	327 while (data_in_buffer < kBufferSize - 1) {

	328 if (current_data_ == NULL) {

	329 // GetSomeData will wait until the embedder has enough data.

	330 current_data_length_ = source_stream_->GetMoreData(&current_data_);

	331 current_data_offset_ = 0;

	332

	333 // A caveat: a data chunk might end with bytes from an incomplete UTF-8

	334 // character (the rest of the bytes will be in the next chunk).

	335 if (source_stream_->encoding ==

	336 ScriptCompiler::ExternalSourceStream::UTF8) {

	337 bool data_ends = current_data_length_ == 0;

	338 HandleUtf8SplitCharacters(&data_in_buffer);

	339 // Did we use all the data in the data chunk? Note that this would mean

	340 // the chunk was really small. We don't handle the case where a UTF-8

	341 // character is split over several chunks; in that case V8 won't crash,

	342 // but it will be a parse error.

	343 if (!data_ends && current_data_offset_ == current_data_length_) {

	344 delete[] current_data_;

	345 current_data_ = NULL;

	346 current_data_length_ = 0;

	347 current_data_offset_ = 0;

	348 continue;

	349 }

	350 }

	351

	352 // Did the data stream end?

	353 if (current_data_length_ == 0 && utf8_split_char_buffer_length_ == 0) {

	354 return data_in_buffer;

	355 }

	356 }

	357

	358 // Fill the buffer from current_data_.

	359 unsigned new_offset = 0;

	360 unsigned new_chars_in_buffer = CopyCharsHelper(

	361 buffer_ + data_in_buffer, kBufferSize - data_in_buffer,

	362 current_data_ + current_data_offset_, &new_offset,

	363 current_data_length_ - current_data_offset_, source_stream_->encoding);

	364 data_in_buffer += new_chars_in_buffer;

	365 current_data_offset_ += new_offset;

	366 DCHECK(data_in_buffer <= kBufferSize);

	367

	368 // Did we use all the data in the data chunk?

	369 if (current_data_offset_ == current_data_length_) {

	370 delete[] current_data_;

	371 current_data_ = NULL;

	372 current_data_length_ = 0;

	373 current_data_offset_ = 0;

	374 }

	375 }

	376 return data_in_buffer;

	377 }

	378

	379 void ExternalStreamingStream::HandleUtf8SplitCharacters(

	380 unsigned* data_in_buffer) {

	381 // First check if we have leftover data from the last chunk.

	382 unibrow::uchar c;

	383 if (utf8_split_char_buffer_length_ > 0) {

	384 // Move the bytes which are part of the split character (which started in

	385 // the previous chunk) into utf8_split_char_buffer_.

	386 while (current_data_offset_ < current_data_length_ &&

	387 utf8_split_char_buffer_length_ < 4 &&

	388 (c = current_data_[current_data_offset_]) >

	389 unibrow::Utf8::kMaxOneByteChar) {

	390 utf8_split_char_buffer_[utf8_split_char_buffer_length_] = c;

	391 ++utf8_split_char_buffer_length_;

	392 ++current_data_offset_;

	393 }

	394

	395 // Convert the data in utf8_split_char_buffer_.

	396 unsigned new_offset = 0;

	397 unsigned new_chars_in_buffer = CopyCharsHelper(

	398 buffer_ + data_in_buffer, kBufferSize - data_in_buffer,

	399 utf8_split_char_buffer_, &new_offset, utf8_split_char_buffer_length_,

	400 source_stream_->encoding);

	401 *data_in_buffer += new_chars_in_buffer;

	402 // Make sure we used all the data.

	403 DCHECK(new_offset == utf8_split_char_buffer_length_);

	404 DCHECK(*data_in_buffer <= kBufferSize);

	405

	406 utf8_split_char_buffer_length_ = 0;

	407 }

	408

	409 // Move bytes which are part of an incomplete character from the end of the

	410 // current chunk to utf8_split_char_buffer_. They will be converted when the

	411 // next data chunk arrives.

	412 while (current_data_length_ > current_data_offset_ &&

	413 (c = current_data_[current_data_length_ - 1]) >

	414 unibrow::Utf8::kMaxOneByteChar) {

	415 --current_data_length_;

	416 ++utf8_split_char_buffer_length_;

	417 }

	418 for (unsigned i = 0; i < utf8_split_char_buffer_length_; ++i) {

	419 utf8_split_char_buffer_[i] = current_data_[current_data_length_ + i];

	420 }

	421 }

	422

	423

279 // ----------------------------------------------------------------------------	424 // ----------------------------------------------------------------------------

280 // ExternalTwoByteStringUtf16CharacterStream	425 // ExternalTwoByteStringUtf16CharacterStream

281	426

282 ExternalTwoByteStringUtf16CharacterStream::	427 ExternalTwoByteStringUtf16CharacterStream::

283 ~ExternalTwoByteStringUtf16CharacterStream() { }	428 ~ExternalTwoByteStringUtf16CharacterStream() { }

284	429

285	430

286 ExternalTwoByteStringUtf16CharacterStream	431 ExternalTwoByteStringUtf16CharacterStream

287 ::ExternalTwoByteStringUtf16CharacterStream(	432 ::ExternalTwoByteStringUtf16CharacterStream(

288 Handle<ExternalTwoByteString> data,	433 Handle<ExternalTwoByteString> data,

289 int start_position,	434 int start_position,

290 int end_position)	435 int end_position)

291 : Utf16CharacterStream(),	436 : Utf16CharacterStream(),

292 source_(data),	437 source_(data),

293 raw_data_(data->GetTwoByteData(start_position)) {	438 raw_data_(data->GetTwoByteData(start_position)) {

294 buffer_cursor_ = raw_data_,	439 buffer_cursor_ = raw_data_,

295 buffer_end_ = raw_data_ + (end_position - start_position);	440 buffer_end_ = raw_data_ + (end_position - start_position);

296 pos_ = start_position;	441 pos_ = start_position;

297 }	442 }

298	443

299 } } // namespace v8::internal	444 } } // namespace v8::internal

OLD	NEW

« src/background-parsing-task.h ('K') | « src/scanner-character-streams.h ('k') | test/cctest/test-api.cc » ('j') | no next file with comments »