src/scanner-character-streams.cc - Issue 864273005: Scanner / Unicode decoding: use size_t instead of unsigned.

Unified Diff: src/scanner-character-streams.cc

Issue 864273005: Scanner / Unicode decoding: use size_t instead of unsigned. (Closed) Base URL: https://chromium.googlesource.com/v8/v8.git@master

Patch Set: tentative Created 5 years, 10 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: src/scanner-character-streams.cc

diff --git a/src/scanner-character-streams.cc b/src/scanner-character-streams.cc

index 50c3955c1bdc888200f593cad0f45ec69900ae18..cc4a18b540f3555a2ea53ab9b9c66147f9979b99 100644

--- a/src/scanner-character-streams.cc

+++ b/src/scanner-character-streams.cc

@@ -15,9 +15,9 @@ namespace internal {

namespace {

-unsigned CopyCharsHelper(uint16_t* dest, unsigned length, const uint8_t* src,

- unsigned* src_pos, unsigned src_length,

- ScriptCompiler::StreamedSource::Encoding encoding) {

+size_t CopyCharsHelper(uint16_t* dest, size_t length, const uint8_t* src,

+ size_t* src_pos, size_t src_length,

+ ScriptCompiler::StreamedSource::Encoding encoding) {

// It's possible that this will be called with length 0, but don't assume that

// the functions this calls handle it gracefully.

if (length == 0) return 0;

@@ -27,7 +27,7 @@ unsigned CopyCharsHelper(uint16_t* dest, unsigned length, const uint8_t* src,

dest, length, src, src_pos, src_length);

}

- unsigned to_fill = length;

+ size_t to_fill = length;

if (to_fill > src_length - *src_pos) to_fill = src_length - *src_pos;

if (encoding == ScriptCompiler::StreamedSource::ONE_BYTE) {

@@ -110,13 +110,13 @@ bool BufferedUtf16CharacterStream::ReadBlock() {

if (buffer_cursor_ < buffer_end_) return true;

// Otherwise read a new block.

}

- unsigned length = FillBuffer(pos_);

+ size_t length = FillBuffer(pos_);

buffer_end_ = buffer_ + length;

return length > 0;

}

-unsigned BufferedUtf16CharacterStream::SlowSeekForward(unsigned delta) {

+size_t BufferedUtf16CharacterStream::SlowSeekForward(size_t delta) {

// Leave pushback mode (i.e., ignore that there might be valid data

// in the buffer before the pushback_limit_ point).

pushback_limit_ = NULL;

@@ -129,11 +129,8 @@ unsigned BufferedUtf16CharacterStream::SlowSeekForward(unsigned delta) {

GenericStringUtf16CharacterStream::GenericStringUtf16CharacterStream(

- Handle<String> data,

- unsigned start_position,

- unsigned end_position)

- : string_(data),

- length_(end_position) {

+ Handle<String> data, size_t start_position, size_t end_position)

+ : string_(data), length_(end_position) {

DCHECK(end_position >= start_position);

pos_ = start_position;

}

@@ -142,21 +139,22 @@ GenericStringUtf16CharacterStream::GenericStringUtf16CharacterStream(

GenericStringUtf16CharacterStream::~GenericStringUtf16CharacterStream() { }

-unsigned GenericStringUtf16CharacterStream::BufferSeekForward(unsigned delta) {

- unsigned old_pos = pos_;

+size_t GenericStringUtf16CharacterStream::BufferSeekForward(size_t delta) {

+ size_t old_pos = pos_;

pos_ = Min(pos_ + delta, length_);

ReadBlock();

return pos_ - old_pos;

}

-unsigned GenericStringUtf16CharacterStream::FillBuffer(unsigned from_pos) {

+size_t GenericStringUtf16CharacterStream::FillBuffer(size_t from_pos) {

if (from_pos >= length_) return 0;

- unsigned length = kBufferSize;

+ size_t length = kBufferSize;

if (from_pos + length > length_) {

length = length_ - from_pos;

}

- String::WriteToFlat<uc16>(*string_, buffer_, from_pos, from_pos + length);

+ String::WriteToFlat<uc16>(*string_, buffer_, static_cast<int>(from_pos),

+ static_cast<int>(from_pos + length));

return length;

}

@@ -164,7 +162,7 @@ unsigned GenericStringUtf16CharacterStream::FillBuffer(unsigned from_pos) {

// ----------------------------------------------------------------------------

// Utf8ToUtf16CharacterStream

Utf8ToUtf16CharacterStream::Utf8ToUtf16CharacterStream(const byte* data,

- unsigned length)

+ size_t length)

: BufferedUtf16CharacterStream(),

raw_data_(data),

raw_data_length_(length),

@@ -177,12 +175,11 @@ Utf8ToUtf16CharacterStream::Utf8ToUtf16CharacterStream(const byte* data,

Utf8ToUtf16CharacterStream::~Utf8ToUtf16CharacterStream() { }

-unsigned Utf8ToUtf16CharacterStream::CopyChars(uint16_t* dest, unsigned length,

- const byte* src,

- unsigned* src_pos,

- unsigned src_length) {

+size_t Utf8ToUtf16CharacterStream::CopyChars(uint16_t* dest, size_t length,

+ const byte* src, size_t* src_pos,

+ size_t src_length) {

static const unibrow::uchar kMaxUtf16Character = 0xffff;

- unsigned i = 0;

+ size_t i = 0;

// Because of the UTF-16 lead and trail surrogates, we stop filling the buffer

// one character early (in the normal case), because we need to have at least

// two free spaces in the buffer to be sure that the next character will fit.

@@ -206,9 +203,9 @@ unsigned Utf8ToUtf16CharacterStream::CopyChars(uint16_t* dest, unsigned length,

}

-unsigned Utf8ToUtf16CharacterStream::BufferSeekForward(unsigned delta) {

- unsigned old_pos = pos_;

- unsigned target_pos = pos_ + delta;

+size_t Utf8ToUtf16CharacterStream::BufferSeekForward(size_t delta) {

+ size_t old_pos = pos_;

+ size_t target_pos = pos_ + delta;

SetRawPosition(target_pos);

pos_ = raw_character_position_;

ReadBlock();

@@ -216,15 +213,15 @@ unsigned Utf8ToUtf16CharacterStream::BufferSeekForward(unsigned delta) {

}

-unsigned Utf8ToUtf16CharacterStream::FillBuffer(unsigned char_position) {

+size_t Utf8ToUtf16CharacterStream::FillBuffer(size_t char_position) {

SetRawPosition(char_position);

if (raw_character_position_ != char_position) {

// char_position was not a valid position in the stream (hit the end

// while spooling to it).

return 0u;

}

- unsigned i = CopyChars(buffer_, kBufferSize, raw_data_, &raw_data_pos_,

- raw_data_length_);

+ size_t i = CopyChars(buffer_, kBufferSize, raw_data_, &raw_data_pos_,

+ raw_data_length_);

raw_character_position_ = char_position + i;

return i;

}

@@ -249,7 +246,7 @@ static bool IsUtf8MultiCharacterFollower(byte later_byte) {

// Move the cursor back to point at the preceding UTF-8 character start

// in the buffer.

-static inline void Utf8CharacterBack(const byte* buffer, unsigned* cursor) {

+static inline void Utf8CharacterBack(const byte* buffer, size_t* cursor) {

byte character = buffer[--*cursor];

if (character > unibrow::Utf8::kMaxOneByteChar) {

DCHECK(IsUtf8MultiCharacterFollower(character));

@@ -264,7 +261,7 @@ static inline void Utf8CharacterBack(const byte* buffer, unsigned* cursor) {

// Move the cursor forward to point at the next following UTF-8 character start

// in the buffer.

-static inline void Utf8CharacterForward(const byte* buffer, unsigned* cursor) {

+static inline void Utf8CharacterForward(const byte* buffer, size_t* cursor) {

byte character = buffer[(*cursor)++];

if (character > unibrow::Utf8::kMaxOneByteChar) {

// First character of a multi-byte character encoding.

@@ -279,7 +276,7 @@ static inline void Utf8CharacterForward(const byte* buffer, unsigned* cursor) {

// 2 if value in range 0xE0 .. 0xEF.

// 3 if value in range 0xF0 .. 0xF7.

// Encode that in a single value.

- unsigned additional_bytes =

+ size_t additional_bytes =

((0x3211u) >> (((character - 0xC0) >> 2) & 0xC)) & 0x03;

*cursor += additional_bytes;

DCHECK(!IsUtf8MultiCharacterFollower(buffer[1 + additional_bytes]));

@@ -291,11 +288,11 @@ static inline void Utf8CharacterForward(const byte* buffer, unsigned* cursor) {

// is no position in the UTF8 stream that corresponds to that. This assumes

// that the surrogate pair is correctly coded as a 4 byte UTF-8 sequence. If

// it is illegally coded as two 3 byte sequences then there is no problem here.

-void Utf8ToUtf16CharacterStream::SetRawPosition(unsigned target_position) {

+void Utf8ToUtf16CharacterStream::SetRawPosition(size_t target_position) {

if (raw_character_position_ > target_position) {

// Spool backwards in utf8 buffer.

do {

- int old_pos = raw_data_pos_;

+ size_t old_pos = raw_data_pos_;

Utf8CharacterBack(raw_data_, &raw_data_pos_);

raw_character_position_--;

DCHECK(old_pos - raw_data_pos_ <= 4);

@@ -309,7 +306,7 @@ void Utf8ToUtf16CharacterStream::SetRawPosition(unsigned target_position) {

// Spool forwards in the utf8 buffer.

while (raw_character_position_ < target_position) {

if (raw_data_pos_ == raw_data_length_) return;

- int old_pos = raw_data_pos_;

+ size_t old_pos = raw_data_pos_;

Utf8CharacterForward(raw_data_, &raw_data_pos_);

raw_character_position_++;

DCHECK(raw_data_pos_ - old_pos <= 4);

@@ -320,10 +317,10 @@ void Utf8ToUtf16CharacterStream::SetRawPosition(unsigned target_position) {

}

-unsigned ExternalStreamingStream::FillBuffer(unsigned position) {

+size_t ExternalStreamingStream::FillBuffer(size_t position) {

// Ignore "position" which is the position in the decoded data. Instead,

// ExternalStreamingStream keeps track of the position in the raw data.

- unsigned data_in_buffer = 0;

+ size_t data_in_buffer = 0;

// Note that the UTF-8 decoder might not be able to fill the buffer

// completely; it will typically leave the last character empty (see

// Utf8ToUtf16CharacterStream::CopyChars).

@@ -331,10 +328,8 @@ unsigned ExternalStreamingStream::FillBuffer(unsigned position) {

if (current_data_ == NULL) {

// GetSomeData will wait until the embedder has enough data. Here's an

// interface between the API which uses size_t (which is the correct type

- // here) and the internal parts which use unsigned. TODO(marja): make the

- // internal parts use size_t too.

- current_data_length_ =

- static_cast<unsigned>(source_stream_->GetMoreData(&current_data_));

+ // here) and the internal parts which use size_t.

+ current_data_length_ = source_stream_->GetMoreData(&current_data_);

current_data_offset_ = 0;

bool data_ends = current_data_length_ == 0;

@@ -363,8 +358,8 @@ unsigned ExternalStreamingStream::FillBuffer(unsigned position) {

}

// Fill the buffer from current_data_.

- unsigned new_offset = 0;

- unsigned new_chars_in_buffer =

+ size_t new_offset = 0;

+ size_t new_chars_in_buffer =

CopyCharsHelper(buffer_ + data_in_buffer, kBufferSize - data_in_buffer,

current_data_ + current_data_offset_, &new_offset,

current_data_length_ - current_data_offset_, encoding_);

@@ -384,7 +379,7 @@ unsigned ExternalStreamingStream::FillBuffer(unsigned position) {

}

void ExternalStreamingStream::HandleUtf8SplitCharacters(

- unsigned* data_in_buffer) {

+ size_t* data_in_buffer) {

// Note the following property of UTF-8 which makes this function possible:

// Given any byte, we can always read its local environment (in both

// directions) to find out the (possibly multi-byte) character it belongs

@@ -407,8 +402,8 @@ void ExternalStreamingStream::HandleUtf8SplitCharacters(

}

// Convert the data in utf8_split_char_buffer_.

- unsigned new_offset = 0;

- unsigned new_chars_in_buffer =

+ size_t new_offset = 0;

+ size_t new_chars_in_buffer =

CopyCharsHelper(buffer_ + *data_in_buffer,

kBufferSize - *data_in_buffer, utf8_split_char_buffer_,

&new_offset, utf8_split_char_buffer_length_, encoding_);

@@ -439,7 +434,7 @@ void ExternalStreamingStream::HandleUtf8SplitCharacters(

}

CHECK(utf8_split_char_buffer_length_ <= 4);

- for (unsigned i = 0; i < utf8_split_char_buffer_length_; ++i) {

+ for (size_t i = 0; i < utf8_split_char_buffer_length_; ++i) {

utf8_split_char_buffer_[i] = current_data_[current_data_length_ + i];

}

« no previous file with comments | « src/scanner-character-streams.h ('k') | src/unicode.h » ('j') | no next file with comments »