Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(2)

Side by Side Diff: src/scanner-character-streams.cc

Issue 366153002: Add script streaming API (Closed) Base URL: https://v8.googlecode.com/svn/branches/bleeding_edge
Patch Set: added tests + fixed compilation flags (!) Created 6 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 // Copyright 2011 the V8 project authors. All rights reserved. 1 // Copyright 2011 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "src/v8.h" 5 #include "src/v8.h"
6 6
7 #include "src/scanner-character-streams.h" 7 #include "src/scanner-character-streams.h"
8 8
9 #include "include/v8.h"
9 #include "src/handles.h" 10 #include "src/handles.h"
10 #include "src/unicode-inl.h" 11 #include "src/unicode-inl.h"
11 12
12 namespace v8 { 13 namespace v8 {
13 namespace internal { 14 namespace internal {
14 15
16 namespace {
17
18 unsigned CopyCharsHelper(
19 uint16_t* dest, unsigned length, const char* src, unsigned* src_pos,
20 unsigned src_length,
21 ScriptCompiler::ExternalSourceStream::Encoding encoding) {
22 if (encoding == ScriptCompiler::ExternalSourceStream::UTF8) {
23 return v8::internal::Utf8ToUtf16CharacterStream::CopyChars(
24 dest, length, reinterpret_cast<const uint8_t*>(src), src_pos,
25 src_length);
26 }
27
28 unsigned to_fill = length;
29 if (to_fill > src_length - *src_pos) to_fill = src_length - *src_pos;
30
31 if (encoding == ScriptCompiler::ExternalSourceStream::ONE_BYTE) {
32 v8::internal::CopyChars<uint8_t, uint16_t>(
33 dest, reinterpret_cast<const uint8_t*>(src + *src_pos), to_fill);
34 } else {
35 DCHECK(encoding == ScriptCompiler::ExternalSourceStream::TWO_BYTE);
36 v8::internal::CopyChars<uint16_t, uint16_t>(
37 dest, reinterpret_cast<const uint16_t*>(src + *src_pos), to_fill);
38 }
39 *src_pos += to_fill;
40 return to_fill;
41 }
42
43 } // namespace
44
45
15 // ---------------------------------------------------------------------------- 46 // ----------------------------------------------------------------------------
16 // BufferedUtf16CharacterStreams 47 // BufferedUtf16CharacterStreams
17 48
18 BufferedUtf16CharacterStream::BufferedUtf16CharacterStream() 49 BufferedUtf16CharacterStream::BufferedUtf16CharacterStream()
19 : Utf16CharacterStream(), 50 : Utf16CharacterStream(),
20 pushback_limit_(NULL) { 51 pushback_limit_(NULL) {
21 // Initialize buffer as being empty. First read will fill the buffer. 52 // Initialize buffer as being empty. First read will fill the buffer.
22 buffer_cursor_ = buffer_; 53 buffer_cursor_ = buffer_;
23 buffer_end_ = buffer_; 54 buffer_end_ = buffer_;
24 } 55 }
(...skipping 113 matching lines...) Expand 10 before | Expand all | Expand 10 after
138 raw_data_length_(length), 169 raw_data_length_(length),
139 raw_data_pos_(0), 170 raw_data_pos_(0),
140 raw_character_position_(0) { 171 raw_character_position_(0) {
141 ReadBlock(); 172 ReadBlock();
142 } 173 }
143 174
144 175
145 Utf8ToUtf16CharacterStream::~Utf8ToUtf16CharacterStream() { } 176 Utf8ToUtf16CharacterStream::~Utf8ToUtf16CharacterStream() { }
146 177
147 178
179 unsigned Utf8ToUtf16CharacterStream::CopyChars(uint16_t* dest, unsigned length,
180 const byte* src,
181 unsigned* src_pos,
182 unsigned src_length) {
183 static const unibrow::uchar kMaxUtf16Character = 0xffff;
184 unsigned i = 0;
185 // Because of the UTF-16 lead and trail surrogates, we stop filling the buffer
186 // one character early (in the normal case), because we need to have at least
187 // two free spaces in the buffer to be sure that the next character will fit.
188 while (i < length - 1) {
189 if (*src_pos == src_length) break;
190 unibrow::uchar c = src[*src_pos];
191 if (c <= unibrow::Utf8::kMaxOneByteChar) {
192 *src_pos = *src_pos + 1;
193 } else {
194 c = unibrow::Utf8::CalculateValue(src + *src_pos, src_length - *src_pos,
195 src_pos);
196 }
197 if (c > kMaxUtf16Character) {
198 dest[i++] = unibrow::Utf16::LeadSurrogate(c);
199 dest[i++] = unibrow::Utf16::TrailSurrogate(c);
200 } else {
201 dest[i++] = static_cast<uc16>(c);
202 }
203 }
204 return i;
205 }
206
207
148 unsigned Utf8ToUtf16CharacterStream::BufferSeekForward(unsigned delta) { 208 unsigned Utf8ToUtf16CharacterStream::BufferSeekForward(unsigned delta) {
149 unsigned old_pos = pos_; 209 unsigned old_pos = pos_;
150 unsigned target_pos = pos_ + delta; 210 unsigned target_pos = pos_ + delta;
151 SetRawPosition(target_pos); 211 SetRawPosition(target_pos);
152 pos_ = raw_character_position_; 212 pos_ = raw_character_position_;
153 ReadBlock(); 213 ReadBlock();
154 return pos_ - old_pos; 214 return pos_ - old_pos;
155 } 215 }
156 216
157 217
158 unsigned Utf8ToUtf16CharacterStream::FillBuffer(unsigned char_position) { 218 unsigned Utf8ToUtf16CharacterStream::FillBuffer(unsigned char_position) {
159 static const unibrow::uchar kMaxUtf16Character = 0xffff;
160 SetRawPosition(char_position); 219 SetRawPosition(char_position);
161 if (raw_character_position_ != char_position) { 220 if (raw_character_position_ != char_position) {
162 // char_position was not a valid position in the stream (hit the end 221 // char_position was not a valid position in the stream (hit the end
163 // while spooling to it). 222 // while spooling to it).
164 return 0u; 223 return 0u;
165 } 224 }
166 unsigned i = 0; 225 unsigned i = CopyChars(buffer_, kBufferSize, raw_data_, &raw_data_pos_,
167 while (i < kBufferSize - 1) { 226 raw_data_length_);
168 if (raw_data_pos_ == raw_data_length_) break;
169 unibrow::uchar c = raw_data_[raw_data_pos_];
170 if (c <= unibrow::Utf8::kMaxOneByteChar) {
171 raw_data_pos_++;
172 } else {
173 c = unibrow::Utf8::CalculateValue(raw_data_ + raw_data_pos_,
174 raw_data_length_ - raw_data_pos_,
175 &raw_data_pos_);
176 }
177 if (c > kMaxUtf16Character) {
178 buffer_[i++] = unibrow::Utf16::LeadSurrogate(c);
179 buffer_[i++] = unibrow::Utf16::TrailSurrogate(c);
180 } else {
181 buffer_[i++] = static_cast<uc16>(c);
182 }
183 }
184 raw_character_position_ = char_position + i; 227 raw_character_position_ = char_position + i;
185 return i; 228 return i;
186 } 229 }
187 230
188 231
189 static const byte kUtf8MultiByteMask = 0xC0; 232 static const byte kUtf8MultiByteMask = 0xC0;
190 static const byte kUtf8MultiByteCharFollower = 0x80; 233 static const byte kUtf8MultiByteCharFollower = 0x80;
191 234
192 235
193 #ifdef DEBUG 236 #ifdef DEBUG
(...skipping 75 matching lines...) Expand 10 before | Expand all | Expand 10 after
269 Utf8CharacterForward(raw_data_, &raw_data_pos_); 312 Utf8CharacterForward(raw_data_, &raw_data_pos_);
270 raw_character_position_++; 313 raw_character_position_++;
271 DCHECK(raw_data_pos_ - old_pos <= 4); 314 DCHECK(raw_data_pos_ - old_pos <= 4);
272 if (raw_data_pos_ - old_pos == 4) raw_character_position_++; 315 if (raw_data_pos_ - old_pos == 4) raw_character_position_++;
273 } 316 }
274 // No surrogate pair splitting. 317 // No surrogate pair splitting.
275 DCHECK(raw_character_position_ == target_position); 318 DCHECK(raw_character_position_ == target_position);
276 } 319 }
277 320
278 321
322 unsigned ExternalStreamingStream::FillBuffer(unsigned position) {
323 // Ignore "position" which is the position in the decoded data. Instead,
324 // ExternalStreamingStream keeps track of the position in the raw data.
325 unsigned data_in_buffer = 0;
326 // Note that the UTF-8 decoder might not be able to fill the buffer
327 // completely; it will typically leave the last character empty (see
328 // Utf8ToUtf16CharacterStream::CopyChars).
329 while (data_in_buffer < kBufferSize - 1) {
330 if (current_data_ == NULL) {
331 // GetSomeData will wait until the embedder has enough data.
332 current_data_length_ = source_stream_->GetMoreData(&current_data_);
333 current_data_offset_ = 0;
334 // Did the data stream end?
335 if (current_data_length_ == 0) {
336 return data_in_buffer;
337 }
338 }
339 // Fill the buffer from current_data_. FIXME: this doesn't work if the data
340 // chunk ends in the middle of an UTF-8 character.
341 unsigned new_offset = 0;
342 unsigned new_chars_in_buffer = CopyCharsHelper(
343 buffer_ + data_in_buffer, kBufferSize - data_in_buffer,
344 current_data_ + current_data_offset_, &new_offset,
345 current_data_length_ - current_data_offset_, source_stream_->encoding);
346 data_in_buffer += new_chars_in_buffer;
347 current_data_offset_ += new_offset;
348 DCHECK(data_in_buffer <= kBufferSize);
349 // Did we use all the data?
350 if (current_data_offset_ == current_data_length_) {
351 delete[] current_data_;
352 current_data_ = NULL;
353 current_data_length_ = 0;
354 current_data_offset_ = 0;
355 }
356 }
357 return data_in_buffer;
358 }
359
360
279 // ---------------------------------------------------------------------------- 361 // ----------------------------------------------------------------------------
280 // ExternalTwoByteStringUtf16CharacterStream 362 // ExternalTwoByteStringUtf16CharacterStream
281 363
282 ExternalTwoByteStringUtf16CharacterStream:: 364 ExternalTwoByteStringUtf16CharacterStream::
283 ~ExternalTwoByteStringUtf16CharacterStream() { } 365 ~ExternalTwoByteStringUtf16CharacterStream() { }
284 366
285 367
286 ExternalTwoByteStringUtf16CharacterStream 368 ExternalTwoByteStringUtf16CharacterStream
287 ::ExternalTwoByteStringUtf16CharacterStream( 369 ::ExternalTwoByteStringUtf16CharacterStream(
288 Handle<ExternalTwoByteString> data, 370 Handle<ExternalTwoByteString> data,
289 int start_position, 371 int start_position,
290 int end_position) 372 int end_position)
291 : Utf16CharacterStream(), 373 : Utf16CharacterStream(),
292 source_(data), 374 source_(data),
293 raw_data_(data->GetTwoByteData(start_position)) { 375 raw_data_(data->GetTwoByteData(start_position)) {
294 buffer_cursor_ = raw_data_, 376 buffer_cursor_ = raw_data_,
295 buffer_end_ = raw_data_ + (end_position - start_position); 377 buffer_end_ = raw_data_ + (end_position - start_position);
296 pos_ = start_position; 378 pos_ = start_position;
297 } 379 }
298 380
299 } } // namespace v8::internal 381 } } // namespace v8::internal
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698