Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(179)

Side by Side Diff: src/scanner-character-streams.cc

Issue 366153002: Add script streaming API (Closed) Base URL: https://v8.googlecode.com/svn/branches/bleeding_edge
Patch Set: special chars fix Created 6 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 // Copyright 2011 the V8 project authors. All rights reserved. 1 // Copyright 2011 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "src/v8.h" 5 #include "src/v8.h"
6 6
7 #include "src/scanner-character-streams.h" 7 #include "src/scanner-character-streams.h"
8 8
9 #include "include/v8.h"
9 #include "src/handles.h" 10 #include "src/handles.h"
10 #include "src/unicode-inl.h" 11 #include "src/unicode-inl.h"
11 12
12 namespace v8 { 13 namespace v8 {
13 namespace internal { 14 namespace internal {
14 15
16 namespace {
17
18 unsigned CopyCharsHelper(uint16_t* dest, unsigned length, const char* src,
19 unsigned* src_pos, unsigned src_length,
20 ExternalSourceStream::Encoding encoding) {
21 if (encoding == ExternalSourceStream::UTF8) {
22 return v8::internal::Utf8ToUtf16CharacterStream::CopyChars(
23 dest, length, reinterpret_cast<const uint8_t*>(src), src_pos,
24 src_length);
25 }
26
27 unsigned to_fill = length;
28 if (to_fill > src_length - *src_pos) to_fill = src_length - *src_pos;
29
30 if (encoding == ExternalSourceStream::ONE_BYTE) {
31 v8::internal::CopyChars<uint8_t, uint16_t>(
32 dest, reinterpret_cast<const uint8_t*>(src + *src_pos), to_fill);
33 } else {
34 DCHECK(encoding == ExternalSourceStream::TWO_BYTE);
35 v8::internal::CopyChars<uint16_t, uint16_t>(
36 dest, reinterpret_cast<const uint16_t*>(src + *src_pos), to_fill);
37 }
38 *src_pos += to_fill;
39 return to_fill;
40 }
41
42 } // namespace
43
44
15 // ---------------------------------------------------------------------------- 45 // ----------------------------------------------------------------------------
16 // BufferedUtf16CharacterStreams 46 // BufferedUtf16CharacterStreams
17 47
18 BufferedUtf16CharacterStream::BufferedUtf16CharacterStream() 48 BufferedUtf16CharacterStream::BufferedUtf16CharacterStream()
19 : Utf16CharacterStream(), 49 : Utf16CharacterStream(),
20 pushback_limit_(NULL) { 50 pushback_limit_(NULL) {
21 // Initialize buffer as being empty. First read will fill the buffer. 51 // Initialize buffer as being empty. First read will fill the buffer.
22 buffer_cursor_ = buffer_; 52 buffer_cursor_ = buffer_;
23 buffer_end_ = buffer_; 53 buffer_end_ = buffer_;
24 } 54 }
(...skipping 113 matching lines...) Expand 10 before | Expand all | Expand 10 after
138 raw_data_length_(length), 168 raw_data_length_(length),
139 raw_data_pos_(0), 169 raw_data_pos_(0),
140 raw_character_position_(0) { 170 raw_character_position_(0) {
141 ReadBlock(); 171 ReadBlock();
142 } 172 }
143 173
144 174
145 Utf8ToUtf16CharacterStream::~Utf8ToUtf16CharacterStream() { } 175 Utf8ToUtf16CharacterStream::~Utf8ToUtf16CharacterStream() { }
146 176
147 177
178 unsigned Utf8ToUtf16CharacterStream::CopyChars(uint16_t* dest, unsigned length,
179 const byte* src,
180 unsigned* src_pos,
181 unsigned src_length) {
182 static const unibrow::uchar kMaxUtf16Character = 0xffff;
183 unsigned i = 0;
184 // Because of the UTF-16 lead and trail surrogates, we stop filling the buffer
185 // one character early (in the normal case), because we need to have at least
186 // two free spaces in the buffer to be sure that the next character will fit.
187 while (i < length - 1) {
188 if (*src_pos == src_length) break;
189 unibrow::uchar c = src[*src_pos];
190 if (c <= unibrow::Utf8::kMaxOneByteChar) {
191 *src_pos = *src_pos + 1;
192 } else {
193 c = unibrow::Utf8::CalculateValue(src + *src_pos, src_length - *src_pos,
194 src_pos);
195 }
196 if (c > kMaxUtf16Character) {
197 dest[i++] = unibrow::Utf16::LeadSurrogate(c);
198 dest[i++] = unibrow::Utf16::TrailSurrogate(c);
199 } else {
200 dest[i++] = static_cast<uc16>(c);
201 }
202 }
203 return i;
204 }
205
206
148 unsigned Utf8ToUtf16CharacterStream::BufferSeekForward(unsigned delta) { 207 unsigned Utf8ToUtf16CharacterStream::BufferSeekForward(unsigned delta) {
149 unsigned old_pos = pos_; 208 unsigned old_pos = pos_;
150 unsigned target_pos = pos_ + delta; 209 unsigned target_pos = pos_ + delta;
151 SetRawPosition(target_pos); 210 SetRawPosition(target_pos);
152 pos_ = raw_character_position_; 211 pos_ = raw_character_position_;
153 ReadBlock(); 212 ReadBlock();
154 return pos_ - old_pos; 213 return pos_ - old_pos;
155 } 214 }
156 215
157 216
158 unsigned Utf8ToUtf16CharacterStream::FillBuffer(unsigned char_position) { 217 unsigned Utf8ToUtf16CharacterStream::FillBuffer(unsigned char_position) {
159 static const unibrow::uchar kMaxUtf16Character = 0xffff;
160 SetRawPosition(char_position); 218 SetRawPosition(char_position);
161 if (raw_character_position_ != char_position) { 219 if (raw_character_position_ != char_position) {
162 // char_position was not a valid position in the stream (hit the end 220 // char_position was not a valid position in the stream (hit the end
163 // while spooling to it). 221 // while spooling to it).
164 return 0u; 222 return 0u;
165 } 223 }
166 unsigned i = 0; 224 unsigned i = CopyChars(buffer_, kBufferSize, raw_data_, &raw_data_pos_,
167 while (i < kBufferSize - 1) { 225 raw_data_length_);
168 if (raw_data_pos_ == raw_data_length_) break;
169 unibrow::uchar c = raw_data_[raw_data_pos_];
170 if (c <= unibrow::Utf8::kMaxOneByteChar) {
171 raw_data_pos_++;
172 } else {
173 c = unibrow::Utf8::CalculateValue(raw_data_ + raw_data_pos_,
174 raw_data_length_ - raw_data_pos_,
175 &raw_data_pos_);
176 }
177 if (c > kMaxUtf16Character) {
178 buffer_[i++] = unibrow::Utf16::LeadSurrogate(c);
179 buffer_[i++] = unibrow::Utf16::TrailSurrogate(c);
180 } else {
181 buffer_[i++] = static_cast<uc16>(c);
182 }
183 }
184 raw_character_position_ = char_position + i; 226 raw_character_position_ = char_position + i;
185 return i; 227 return i;
186 } 228 }
187 229
188 230
189 static const byte kUtf8MultiByteMask = 0xC0; 231 static const byte kUtf8MultiByteMask = 0xC0;
190 static const byte kUtf8MultiByteCharFollower = 0x80; 232 static const byte kUtf8MultiByteCharFollower = 0x80;
191 233
192 234
193 #ifdef DEBUG 235 #ifdef DEBUG
(...skipping 75 matching lines...) Expand 10 before | Expand all | Expand 10 after
269 Utf8CharacterForward(raw_data_, &raw_data_pos_); 311 Utf8CharacterForward(raw_data_, &raw_data_pos_);
270 raw_character_position_++; 312 raw_character_position_++;
271 DCHECK(raw_data_pos_ - old_pos <= 4); 313 DCHECK(raw_data_pos_ - old_pos <= 4);
272 if (raw_data_pos_ - old_pos == 4) raw_character_position_++; 314 if (raw_data_pos_ - old_pos == 4) raw_character_position_++;
273 } 315 }
274 // No surrogate pair splitting. 316 // No surrogate pair splitting.
275 DCHECK(raw_character_position_ == target_position); 317 DCHECK(raw_character_position_ == target_position);
276 } 318 }
277 319
278 320
321 unsigned ExternalStreamingStream::FillBuffer(unsigned position) {
322 // Ignore "position" which is the position in the decoded data. Instead,
323 // ExternalStringStream keeps track of the position in the raw data.
324 unsigned data_in_buffer = 0;
325 // Note that the UTF-8 decoder might not be able to fill the buffer
326 // completely; it will typically leave the last character empty (see
327 // Utf8ToUtf16CharacterStream::CopyChars).
328 while (data_in_buffer < kBufferSize - 1) {
329 if (current_data_ == NULL) {
330 // GetSomeData will wait until the embedder has enough data.
331 current_data_length_ =
332 source_stream_->GetSomeData(&current_data_, raw_position_);
333 raw_position_ += current_data_length_;
334 current_data_offset_ = 0;
335 // Did the data stream end?
336 if (current_data_length_ == 0) {
337 return data_in_buffer;
338 }
339 }
340 // Fill the buffer from current_data_.
341 unsigned new_offset = 0;
342 unsigned new_chars_in_buffer = CopyCharsHelper(
343 buffer_ + data_in_buffer, kBufferSize - data_in_buffer,
344 current_data_ + current_data_offset_, &new_offset,
345 current_data_length_ - current_data_offset_, source_stream_->encoding);
346 data_in_buffer += new_chars_in_buffer;
347 current_data_offset_ += new_offset;
348 DCHECK(data_in_buffer <= kBufferSize);
349 // Did we use all the data?
350 if (current_data_offset_ == current_data_length_) {
351 delete[] current_data_;
352 current_data_ = NULL;
353 current_data_length_ = 0;
354 current_data_offset_ = 0;
355 }
356 }
357 return data_in_buffer;
358 }
359
360
279 // ---------------------------------------------------------------------------- 361 // ----------------------------------------------------------------------------
280 // ExternalTwoByteStringUtf16CharacterStream 362 // ExternalTwoByteStringUtf16CharacterStream
281 363
282 ExternalTwoByteStringUtf16CharacterStream:: 364 ExternalTwoByteStringUtf16CharacterStream::
283 ~ExternalTwoByteStringUtf16CharacterStream() { } 365 ~ExternalTwoByteStringUtf16CharacterStream() { }
284 366
285 367
286 ExternalTwoByteStringUtf16CharacterStream 368 ExternalTwoByteStringUtf16CharacterStream
287 ::ExternalTwoByteStringUtf16CharacterStream( 369 ::ExternalTwoByteStringUtf16CharacterStream(
288 Handle<ExternalTwoByteString> data, 370 Handle<ExternalTwoByteString> data,
289 int start_position, 371 int start_position,
290 int end_position) 372 int end_position)
291 : Utf16CharacterStream(), 373 : Utf16CharacterStream(),
292 source_(data), 374 source_(data),
293 raw_data_(data->GetTwoByteData(start_position)) { 375 raw_data_(data->GetTwoByteData(start_position)) {
294 buffer_cursor_ = raw_data_, 376 buffer_cursor_ = raw_data_,
295 buffer_end_ = raw_data_ + (end_position - start_position); 377 buffer_end_ = raw_data_ + (end_position - start_position);
296 pos_ = start_position; 378 pos_ = start_position;
297 } 379 }
298 380
299 } } // namespace v8::internal 381 } } // namespace v8::internal
OLDNEW
« include/v8.h ('K') | « src/scanner-character-streams.h ('k') | test/cctest/test-parsing.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698