Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(227)

Side by Side Diff: src/parsing/scanner-character-streams.cc

Issue 2256273002: Remove unused Utf8ToUtf16CharacterStream. (Closed) Base URL: https://chromium.googlesource.com/v8/v8.git@master
Patch Set: Created 4 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/parsing/scanner-character-streams.h ('k') | test/cctest/test-parsing.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2011 the V8 project authors. All rights reserved. 1 // Copyright 2011 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "src/parsing/scanner-character-streams.h" 5 #include "src/parsing/scanner-character-streams.h"
6 6
7 #include "include/v8.h" 7 #include "include/v8.h"
8 #include "src/globals.h" 8 #include "src/globals.h"
9 #include "src/handles.h" 9 #include "src/handles.h"
10 #include "src/list-inl.h" // TODO(mstarzinger): Temporary cycle breaker! 10 #include "src/list-inl.h" // TODO(mstarzinger): Temporary cycle breaker!
11 #include "src/objects-inl.h" 11 #include "src/objects-inl.h"
12 #include "src/unicode-inl.h" 12 #include "src/unicode-inl.h"
13 13
14 namespace v8 { 14 namespace v8 {
15 namespace internal { 15 namespace internal {
16 16
17 namespace { 17 namespace {
18 18
19 size_t CopyUtf8CharsToUtf16Chars(uint16_t* dest, size_t length, const byte* src,
20 size_t* src_pos, size_t src_length) {
21 static const unibrow::uchar kMaxUtf16Character =
22 unibrow::Utf16::kMaxNonSurrogateCharCode;
23 size_t i = 0;
24 // Because of the UTF-16 lead and trail surrogates, we stop filling the buffer
25 // one character early (in the normal case), because we need to have at least
26 // two free spaces in the buffer to be sure that the next character will fit.
27 while (i < length - 1) {
28 if (*src_pos == src_length) break;
29 unibrow::uchar c = src[*src_pos];
30 if (c <= unibrow::Utf8::kMaxOneByteChar) {
31 *src_pos = *src_pos + 1;
32 } else {
33 c = unibrow::Utf8::CalculateValue(src + *src_pos, src_length - *src_pos,
34 src_pos);
35 }
36 if (c > kMaxUtf16Character) {
37 dest[i++] = unibrow::Utf16::LeadSurrogate(c);
38 dest[i++] = unibrow::Utf16::TrailSurrogate(c);
39 } else {
40 dest[i++] = static_cast<uc16>(c);
41 }
42 }
43 return i;
44 }
45
19 size_t CopyCharsHelper(uint16_t* dest, size_t length, const uint8_t* src, 46 size_t CopyCharsHelper(uint16_t* dest, size_t length, const uint8_t* src,
20 size_t* src_pos, size_t src_length, 47 size_t* src_pos, size_t src_length,
21 ScriptCompiler::StreamedSource::Encoding encoding) { 48 ScriptCompiler::StreamedSource::Encoding encoding) {
22 // It's possible that this will be called with length 0, but don't assume that 49 // It's possible that this will be called with length 0, but don't assume that
23 // the functions this calls handle it gracefully. 50 // the functions this calls handle it gracefully.
24 if (length == 0) return 0; 51 if (length == 0) return 0;
25 52
26 if (encoding == ScriptCompiler::StreamedSource::UTF8) { 53 if (encoding == ScriptCompiler::StreamedSource::UTF8) {
27 return v8::internal::Utf8ToUtf16CharacterStream::CopyChars( 54 return CopyUtf8CharsToUtf16Chars(dest, length, src, src_pos, src_length);
28 dest, length, src, src_pos, src_length);
29 } 55 }
30 56
31 size_t to_fill = length; 57 size_t to_fill = length;
32 if (to_fill > src_length - *src_pos) to_fill = src_length - *src_pos; 58 if (to_fill > src_length - *src_pos) to_fill = src_length - *src_pos;
33 59
34 if (encoding == ScriptCompiler::StreamedSource::ONE_BYTE) { 60 if (encoding == ScriptCompiler::StreamedSource::ONE_BYTE) {
35 v8::internal::CopyChars<uint8_t, uint16_t>(dest, src + *src_pos, to_fill); 61 v8::internal::CopyChars<uint8_t, uint16_t>(dest, src + *src_pos, to_fill);
36 } else { 62 } else {
37 DCHECK(encoding == ScriptCompiler::StreamedSource::TWO_BYTE); 63 DCHECK(encoding == ScriptCompiler::StreamedSource::TWO_BYTE);
38 v8::internal::CopyChars<uint16_t, uint16_t>( 64 v8::internal::CopyChars<uint16_t, uint16_t>(
(...skipping 129 matching lines...) Expand 10 before | Expand all | Expand 10 after
168 if (from_pos + length > length_) { 194 if (from_pos + length > length_) {
169 length = length_ - from_pos; 195 length = length_ - from_pos;
170 } 196 }
171 String::WriteToFlat<uc16>(*string_, buffer_, static_cast<int>(from_pos), 197 String::WriteToFlat<uc16>(*string_, buffer_, static_cast<int>(from_pos),
172 static_cast<int>(from_pos + length)); 198 static_cast<int>(from_pos + length));
173 return length; 199 return length;
174 } 200 }
175 201
176 202
177 // ---------------------------------------------------------------------------- 203 // ----------------------------------------------------------------------------
178 // Utf8ToUtf16CharacterStream 204 // ExternalStreamingStream
179 Utf8ToUtf16CharacterStream::Utf8ToUtf16CharacterStream(const byte* data,
180 size_t length)
181 : BufferedUtf16CharacterStream(),
182 raw_data_(data),
183 raw_data_length_(length),
184 raw_data_pos_(0),
185 raw_character_position_(0) {
186 ReadBlock();
187 }
188
189
190 Utf8ToUtf16CharacterStream::~Utf8ToUtf16CharacterStream() { }
191
192
193 size_t Utf8ToUtf16CharacterStream::CopyChars(uint16_t* dest, size_t length,
194 const byte* src, size_t* src_pos,
195 size_t src_length) {
196 static const unibrow::uchar kMaxUtf16Character =
197 unibrow::Utf16::kMaxNonSurrogateCharCode;
198 size_t i = 0;
199 // Because of the UTF-16 lead and trail surrogates, we stop filling the buffer
200 // one character early (in the normal case), because we need to have at least
201 // two free spaces in the buffer to be sure that the next character will fit.
202 while (i < length - 1) {
203 if (*src_pos == src_length) break;
204 unibrow::uchar c = src[*src_pos];
205 if (c <= unibrow::Utf8::kMaxOneByteChar) {
206 *src_pos = *src_pos + 1;
207 } else {
208 c = unibrow::Utf8::CalculateValue(src + *src_pos, src_length - *src_pos,
209 src_pos);
210 }
211 if (c > kMaxUtf16Character) {
212 dest[i++] = unibrow::Utf16::LeadSurrogate(c);
213 dest[i++] = unibrow::Utf16::TrailSurrogate(c);
214 } else {
215 dest[i++] = static_cast<uc16>(c);
216 }
217 }
218 return i;
219 }
220
221
222 size_t Utf8ToUtf16CharacterStream::BufferSeekForward(size_t delta) {
223 size_t old_pos = pos_;
224 size_t target_pos = pos_ + delta;
225 SetRawPosition(target_pos);
226 pos_ = raw_character_position_;
227 ReadBlock();
228 return pos_ - old_pos;
229 }
230
231
232 size_t Utf8ToUtf16CharacterStream::FillBuffer(size_t char_position) {
233 SetRawPosition(char_position);
234 if (raw_character_position_ != char_position) {
235 // char_position was not a valid position in the stream (hit the end
236 // while spooling to it).
237 return 0u;
238 }
239 size_t i = CopyChars(buffer_, kBufferSize, raw_data_, &raw_data_pos_,
240 raw_data_length_);
241 raw_character_position_ = char_position + i;
242 return i;
243 }
244
245
246 static const byte kUtf8MultiByteMask = 0xC0;
247 static const byte kUtf8MultiByteCharFollower = 0x80;
248
249
250 #ifdef DEBUG
251 static const byte kUtf8MultiByteCharStart = 0xC0;
252 static bool IsUtf8MultiCharacterStart(byte first_byte) {
253 return (first_byte & kUtf8MultiByteMask) == kUtf8MultiByteCharStart;
254 }
255 #endif
256
257
258 static bool IsUtf8MultiCharacterFollower(byte later_byte) {
259 return (later_byte & kUtf8MultiByteMask) == kUtf8MultiByteCharFollower;
260 }
261
262
263 // Move the cursor back to point at the preceding UTF-8 character start
264 // in the buffer.
265 static inline void Utf8CharacterBack(const byte* buffer, size_t* cursor) {
266 byte character = buffer[--*cursor];
267 if (character > unibrow::Utf8::kMaxOneByteChar) {
268 DCHECK(IsUtf8MultiCharacterFollower(character));
269 // Last byte of a multi-byte character encoding. Step backwards until
270 // pointing to the first byte of the encoding, recognized by having the
271 // top two bits set.
272 while (IsUtf8MultiCharacterFollower(buffer[--*cursor])) { }
273 DCHECK(IsUtf8MultiCharacterStart(buffer[*cursor]));
274 }
275 }
276
277
278 // Move the cursor forward to point at the next following UTF-8 character start
279 // in the buffer.
280 static inline void Utf8CharacterForward(const byte* buffer, size_t* cursor) {
281 byte character = buffer[(*cursor)++];
282 if (character > unibrow::Utf8::kMaxOneByteChar) {
283 // First character of a multi-byte character encoding.
284 // The number of most-significant one-bits determines the length of the
285 // encoding:
286 // 110..... - (0xCx, 0xDx) one additional byte (minimum).
287 // 1110.... - (0xEx) two additional bytes.
288 // 11110... - (0xFx) three additional bytes (maximum).
289 DCHECK(IsUtf8MultiCharacterStart(character));
290 // Additional bytes is:
291 // 1 if value in range 0xC0 .. 0xDF.
292 // 2 if value in range 0xE0 .. 0xEF.
293 // 3 if value in range 0xF0 .. 0xF7.
294 // Encode that in a single value.
295 size_t additional_bytes =
296 ((0x3211u) >> (((character - 0xC0) >> 2) & 0xC)) & 0x03;
297 *cursor += additional_bytes;
298 DCHECK(!IsUtf8MultiCharacterFollower(buffer[1 + additional_bytes]));
299 }
300 }
301
302
303 // This can't set a raw position between two surrogate pairs, since there
304 // is no position in the UTF8 stream that corresponds to that. This assumes
305 // that the surrogate pair is correctly coded as a 4 byte UTF-8 sequence. If
306 // it is illegally coded as two 3 byte sequences then there is no problem here.
307 void Utf8ToUtf16CharacterStream::SetRawPosition(size_t target_position) {
308 if (raw_character_position_ > target_position) {
309 // Spool backwards in utf8 buffer.
310 do {
311 size_t old_pos = raw_data_pos_;
312 Utf8CharacterBack(raw_data_, &raw_data_pos_);
313 raw_character_position_--;
314 DCHECK(old_pos - raw_data_pos_ <= 4);
315 // Step back over both code units for surrogate pairs.
316 if (old_pos - raw_data_pos_ == 4) raw_character_position_--;
317 } while (raw_character_position_ > target_position);
318 // No surrogate pair splitting.
319 DCHECK(raw_character_position_ == target_position);
320 return;
321 }
322 // Spool forwards in the utf8 buffer.
323 while (raw_character_position_ < target_position) {
324 if (raw_data_pos_ == raw_data_length_) return;
325 size_t old_pos = raw_data_pos_;
326 Utf8CharacterForward(raw_data_, &raw_data_pos_);
327 raw_character_position_++;
328 DCHECK(raw_data_pos_ - old_pos <= 4);
329 if (raw_data_pos_ - old_pos == 4) raw_character_position_++;
330 }
331 // No surrogate pair splitting.
332 DCHECK(raw_character_position_ == target_position);
333 }
334
335 205
336 size_t ExternalStreamingStream::FillBuffer(size_t position) { 206 size_t ExternalStreamingStream::FillBuffer(size_t position) {
337 // Ignore "position" which is the position in the decoded data. Instead, 207 // Ignore "position" which is the position in the decoded data. Instead,
338 // ExternalStreamingStream keeps track of the position in the raw data. 208 // ExternalStreamingStream keeps track of the position in the raw data.
339 size_t data_in_buffer = 0; 209 size_t data_in_buffer = 0;
340 // Note that the UTF-8 decoder might not be able to fill the buffer 210 // Note that the UTF-8 decoder might not be able to fill the buffer
341 // completely; it will typically leave the last character empty (see 211 // completely; it will typically leave the last character empty (see
342 // Utf8ToUtf16CharacterStream::CopyChars). 212 // Utf8ToUtf16CharacterStream::CopyChars).
343 while (data_in_buffer < kBufferSize - 1) { 213 while (data_in_buffer < kBufferSize - 1) {
344 if (current_data_ == NULL) { 214 if (current_data_ == NULL) {
(...skipping 247 matching lines...) Expand 10 before | Expand all | Expand 10 after
592 ExternalOneByteStringUtf16CharacterStream( 462 ExternalOneByteStringUtf16CharacterStream(
593 Handle<ExternalOneByteString> data, int start_position, 463 Handle<ExternalOneByteString> data, int start_position,
594 int end_position) 464 int end_position)
595 : raw_data_(data->GetChars()), 465 : raw_data_(data->GetChars()),
596 length_(end_position), 466 length_(end_position),
597 bookmark_(kNoBookmark) { 467 bookmark_(kNoBookmark) {
598 DCHECK(end_position >= start_position); 468 DCHECK(end_position >= start_position);
599 pos_ = start_position; 469 pos_ = start_position;
600 } 470 }
601 471
472 ExternalOneByteStringUtf16CharacterStream::
473 ExternalOneByteStringUtf16CharacterStream(const char* data, size_t length)
474 : raw_data_(reinterpret_cast<const uint8_t*>(data)),
475 length_(length),
476 bookmark_(kNoBookmark) {}
477
478 ExternalOneByteStringUtf16CharacterStream::
479 ExternalOneByteStringUtf16CharacterStream(const char* data)
480 : ExternalOneByteStringUtf16CharacterStream(data, strlen(data)) {}
481
602 bool ExternalOneByteStringUtf16CharacterStream::SetBookmark() { 482 bool ExternalOneByteStringUtf16CharacterStream::SetBookmark() {
603 bookmark_ = pos_; 483 bookmark_ = pos_;
604 return true; 484 return true;
605 } 485 }
606 486
607 void ExternalOneByteStringUtf16CharacterStream::ResetToBookmark() { 487 void ExternalOneByteStringUtf16CharacterStream::ResetToBookmark() {
608 DCHECK(bookmark_ != kNoBookmark); 488 DCHECK(bookmark_ != kNoBookmark);
609 pos_ = bookmark_; 489 pos_ = bookmark_;
610 buffer_cursor_ = buffer_; 490 buffer_cursor_ = buffer_;
611 buffer_end_ = buffer_ + FillBuffer(pos_); 491 buffer_end_ = buffer_ + FillBuffer(pos_);
(...skipping 11 matching lines...) Expand all
623 if (from_pos >= length_) return 0; 503 if (from_pos >= length_) return 0;
624 size_t length = Min(kBufferSize, length_ - from_pos); 504 size_t length = Min(kBufferSize, length_ - from_pos);
625 for (size_t i = 0; i < length; ++i) { 505 for (size_t i = 0; i < length; ++i) {
626 buffer_[i] = static_cast<uc16>(raw_data_[from_pos + i]); 506 buffer_[i] = static_cast<uc16>(raw_data_[from_pos + i]);
627 } 507 }
628 return length; 508 return length;
629 } 509 }
630 510
631 } // namespace internal 511 } // namespace internal
632 } // namespace v8 512 } // namespace v8
OLDNEW
« no previous file with comments | « src/parsing/scanner-character-streams.h ('k') | test/cctest/test-parsing.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698