src/parsing/scanner-character-streams.cc - Issue 2256273002: Remove unused Utf8ToUtf16CharacterStream.

Side by Side Diff: src/parsing/scanner-character-streams.cc

Issue 2256273002: Remove unused Utf8ToUtf16CharacterStream. (Closed) Base URL: https://chromium.googlesource.com/v8/v8.git@master

Patch Set: Created 4 years, 4 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 // Copyright 2011 the V8 project authors. All rights reserved.	1 // Copyright 2011 the V8 project authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 #include "src/parsing/scanner-character-streams.h"	5 #include "src/parsing/scanner-character-streams.h"

6	6

7 #include "include/v8.h"	7 #include "include/v8.h"

8 #include "src/globals.h"	8 #include "src/globals.h"

9 #include "src/handles.h"	9 #include "src/handles.h"

10 #include "src/list-inl.h" // TODO(mstarzinger): Temporary cycle breaker!	10 #include "src/list-inl.h" // TODO(mstarzinger): Temporary cycle breaker!

11 #include "src/objects-inl.h"	11 #include "src/objects-inl.h"

12 #include "src/unicode-inl.h"	12 #include "src/unicode-inl.h"

13	13

14 namespace v8 {	14 namespace v8 {

15 namespace internal {	15 namespace internal {

16	16

17 namespace {	17 namespace {

18	18

	19 size_t CopyUtf8CharsToUtf16Chars(uint16_t* dest, size_t length, const byte* src,

	20 size_t* src_pos, size_t src_length) {

	21 static const unibrow::uchar kMaxUtf16Character =

	22 unibrow::Utf16::kMaxNonSurrogateCharCode;

	23 size_t i = 0;

	24 // Because of the UTF-16 lead and trail surrogates, we stop filling the buffer

	25 // one character early (in the normal case), because we need to have at least

	26 // two free spaces in the buffer to be sure that the next character will fit.

	27 while (i < length - 1) {

	28 if (*src_pos == src_length) break;

	29 unibrow::uchar c = src[*src_pos];

	30 if (c <= unibrow::Utf8::kMaxOneByteChar) {

	31 src_pos = src_pos + 1;

	32 } else {

	33 c = unibrow::Utf8::CalculateValue(src + src_pos, src_length - src_pos,

	34 src_pos);

	35 }

	36 if (c > kMaxUtf16Character) {

	37 dest[i++] = unibrow::Utf16::LeadSurrogate(c);

	38 dest[i++] = unibrow::Utf16::TrailSurrogate(c);

	39 } else {

	40 dest[i++] = static_cast<uc16>(c);

	41 }

	42 }

	43 return i;

	44 }

	45

19 size_t CopyCharsHelper(uint16_t* dest, size_t length, const uint8_t* src,	46 size_t CopyCharsHelper(uint16_t* dest, size_t length, const uint8_t* src,

20 size_t* src_pos, size_t src_length,	47 size_t* src_pos, size_t src_length,

21 ScriptCompiler::StreamedSource::Encoding encoding) {	48 ScriptCompiler::StreamedSource::Encoding encoding) {

22 // It's possible that this will be called with length 0, but don't assume that	49 // It's possible that this will be called with length 0, but don't assume that

23 // the functions this calls handle it gracefully.	50 // the functions this calls handle it gracefully.

24 if (length == 0) return 0;	51 if (length == 0) return 0;

25	52

26 if (encoding == ScriptCompiler::StreamedSource::UTF8) {	53 if (encoding == ScriptCompiler::StreamedSource::UTF8) {

27 return v8::internal::Utf8ToUtf16CharacterStream::CopyChars(	54 return CopyUtf8CharsToUtf16Chars(dest, length, src, src_pos, src_length);

28 dest, length, src, src_pos, src_length);

29 }	55 }

30	56

31 size_t to_fill = length;	57 size_t to_fill = length;

32 if (to_fill > src_length - src_pos) to_fill = src_length - src_pos;	58 if (to_fill > src_length - src_pos) to_fill = src_length - src_pos;

33	59

34 if (encoding == ScriptCompiler::StreamedSource::ONE_BYTE) {	60 if (encoding == ScriptCompiler::StreamedSource::ONE_BYTE) {

35 v8::internal::CopyChars<uint8_t, uint16_t>(dest, src + *src_pos, to_fill);	61 v8::internal::CopyChars<uint8_t, uint16_t>(dest, src + *src_pos, to_fill);

36 } else {	62 } else {

37 DCHECK(encoding == ScriptCompiler::StreamedSource::TWO_BYTE);	63 DCHECK(encoding == ScriptCompiler::StreamedSource::TWO_BYTE);

38 v8::internal::CopyChars<uint16_t, uint16_t>(	64 v8::internal::CopyChars<uint16_t, uint16_t>(

(...skipping 129 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
168 if (from_pos + length > length_) {	194 if (from_pos + length > length_) {

169 length = length_ - from_pos;	195 length = length_ - from_pos;

170 }	196 }

171 String::WriteToFlat<uc16>(*string_, buffer_, static_cast<int>(from_pos),	197 String::WriteToFlat<uc16>(*string_, buffer_, static_cast<int>(from_pos),

172 static_cast<int>(from_pos + length));	198 static_cast<int>(from_pos + length));

173 return length;	199 return length;

174 }	200 }

175	201

176	202

177 // ----------------------------------------------------------------------------	203 // ----------------------------------------------------------------------------

178 // Utf8ToUtf16CharacterStream	204 // ExternalStreamingStream

179 Utf8ToUtf16CharacterStream::Utf8ToUtf16CharacterStream(const byte* data,

180 size_t length)

181 : BufferedUtf16CharacterStream(),

182 raw_data_(data),

183 raw_data_length_(length),

184 raw_data_pos_(0),

185 raw_character_position_(0) {

186 ReadBlock();

187 }

188

189

190 Utf8ToUtf16CharacterStream::~Utf8ToUtf16CharacterStream() { }

191

192

193 size_t Utf8ToUtf16CharacterStream::CopyChars(uint16_t* dest, size_t length,

194 const byte* src, size_t* src_pos,

195 size_t src_length) {

196 static const unibrow::uchar kMaxUtf16Character =

197 unibrow::Utf16::kMaxNonSurrogateCharCode;

198 size_t i = 0;

199 // Because of the UTF-16 lead and trail surrogates, we stop filling the buffer

200 // one character early (in the normal case), because we need to have at least

201 // two free spaces in the buffer to be sure that the next character will fit.

202 while (i < length - 1) {

203 if (*src_pos == src_length) break;

204 unibrow::uchar c = src[*src_pos];

205 if (c <= unibrow::Utf8::kMaxOneByteChar) {

206 src_pos = src_pos + 1;

207 } else {

208 c = unibrow::Utf8::CalculateValue(src + src_pos, src_length - src_pos,

209 src_pos);

210 }

211 if (c > kMaxUtf16Character) {

212 dest[i++] = unibrow::Utf16::LeadSurrogate(c);

213 dest[i++] = unibrow::Utf16::TrailSurrogate(c);

214 } else {

215 dest[i++] = static_cast<uc16>(c);

216 }

217 }

218 return i;

219 }

220

221

222 size_t Utf8ToUtf16CharacterStream::BufferSeekForward(size_t delta) {

223 size_t old_pos = pos_;

224 size_t target_pos = pos_ + delta;

225 SetRawPosition(target_pos);

226 pos_ = raw_character_position_;

227 ReadBlock();

228 return pos_ - old_pos;

229 }

230

231

232 size_t Utf8ToUtf16CharacterStream::FillBuffer(size_t char_position) {

233 SetRawPosition(char_position);

234 if (raw_character_position_ != char_position) {

235 // char_position was not a valid position in the stream (hit the end

236 // while spooling to it).

237 return 0u;

238 }

239 size_t i = CopyChars(buffer_, kBufferSize, raw_data_, &raw_data_pos_,

240 raw_data_length_);

241 raw_character_position_ = char_position + i;

242 return i;

243 }

244

245

246 static const byte kUtf8MultiByteMask = 0xC0;

247 static const byte kUtf8MultiByteCharFollower = 0x80;

248

249

250 #ifdef DEBUG

251 static const byte kUtf8MultiByteCharStart = 0xC0;

252 static bool IsUtf8MultiCharacterStart(byte first_byte) {

253 return (first_byte & kUtf8MultiByteMask) == kUtf8MultiByteCharStart;

254 }

255 #endif

256

257

258 static bool IsUtf8MultiCharacterFollower(byte later_byte) {

259 return (later_byte & kUtf8MultiByteMask) == kUtf8MultiByteCharFollower;

260 }

261

262

263 // Move the cursor back to point at the preceding UTF-8 character start

264 // in the buffer.

265 static inline void Utf8CharacterBack(const byte* buffer, size_t* cursor) {

266 byte character = buffer[--*cursor];

267 if (character > unibrow::Utf8::kMaxOneByteChar) {

268 DCHECK(IsUtf8MultiCharacterFollower(character));

269 // Last byte of a multi-byte character encoding. Step backwards until

270 // pointing to the first byte of the encoding, recognized by having the

271 // top two bits set.

272 while (IsUtf8MultiCharacterFollower(buffer[--*cursor])) { }

273 DCHECK(IsUtf8MultiCharacterStart(buffer[*cursor]));

274 }

275 }

276

277

278 // Move the cursor forward to point at the next following UTF-8 character start

279 // in the buffer.

280 static inline void Utf8CharacterForward(const byte* buffer, size_t* cursor) {

281 byte character = buffer[(*cursor)++];

282 if (character > unibrow::Utf8::kMaxOneByteChar) {

283 // First character of a multi-byte character encoding.

284 // The number of most-significant one-bits determines the length of the

285 // encoding:

286 // 110..... - (0xCx, 0xDx) one additional byte (minimum).

287 // 1110.... - (0xEx) two additional bytes.

288 // 11110... - (0xFx) three additional bytes (maximum).

289 DCHECK(IsUtf8MultiCharacterStart(character));

290 // Additional bytes is:

291 // 1 if value in range 0xC0 .. 0xDF.

292 // 2 if value in range 0xE0 .. 0xEF.

293 // 3 if value in range 0xF0 .. 0xF7.

294 // Encode that in a single value.

295 size_t additional_bytes =

296 ((0x3211u) >> (((character - 0xC0) >> 2) & 0xC)) & 0x03;

297 *cursor += additional_bytes;

298 DCHECK(!IsUtf8MultiCharacterFollower(buffer[1 + additional_bytes]));

299 }

300 }

301

302

303 // This can't set a raw position between two surrogate pairs, since there

304 // is no position in the UTF8 stream that corresponds to that. This assumes

305 // that the surrogate pair is correctly coded as a 4 byte UTF-8 sequence. If

306 // it is illegally coded as two 3 byte sequences then there is no problem here.

307 void Utf8ToUtf16CharacterStream::SetRawPosition(size_t target_position) {

308 if (raw_character_position_ > target_position) {

309 // Spool backwards in utf8 buffer.

310 do {

311 size_t old_pos = raw_data_pos_;

312 Utf8CharacterBack(raw_data_, &raw_data_pos_);

313 raw_character_position_--;

314 DCHECK(old_pos - raw_data_pos_ <= 4);

315 // Step back over both code units for surrogate pairs.

316 if (old_pos - raw_data_pos_ == 4) raw_character_position_--;

317 } while (raw_character_position_ > target_position);

318 // No surrogate pair splitting.

319 DCHECK(raw_character_position_ == target_position);

320 return;

321 }

322 // Spool forwards in the utf8 buffer.

323 while (raw_character_position_ < target_position) {

324 if (raw_data_pos_ == raw_data_length_) return;

325 size_t old_pos = raw_data_pos_;

326 Utf8CharacterForward(raw_data_, &raw_data_pos_);

327 raw_character_position_++;

328 DCHECK(raw_data_pos_ - old_pos <= 4);

329 if (raw_data_pos_ - old_pos == 4) raw_character_position_++;

330 }

331 // No surrogate pair splitting.

332 DCHECK(raw_character_position_ == target_position);

333 }

334

335	205

336 size_t ExternalStreamingStream::FillBuffer(size_t position) {	206 size_t ExternalStreamingStream::FillBuffer(size_t position) {

337 // Ignore "position" which is the position in the decoded data. Instead,	207 // Ignore "position" which is the position in the decoded data. Instead,

338 // ExternalStreamingStream keeps track of the position in the raw data.	208 // ExternalStreamingStream keeps track of the position in the raw data.

339 size_t data_in_buffer = 0;	209 size_t data_in_buffer = 0;

340 // Note that the UTF-8 decoder might not be able to fill the buffer	210 // Note that the UTF-8 decoder might not be able to fill the buffer

341 // completely; it will typically leave the last character empty (see	211 // completely; it will typically leave the last character empty (see

342 // Utf8ToUtf16CharacterStream::CopyChars).	212 // Utf8ToUtf16CharacterStream::CopyChars).

343 while (data_in_buffer < kBufferSize - 1) {	213 while (data_in_buffer < kBufferSize - 1) {

344 if (current_data_ == NULL) {	214 if (current_data_ == NULL) {

(...skipping 247 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
592 ExternalOneByteStringUtf16CharacterStream(	462 ExternalOneByteStringUtf16CharacterStream(

593 Handle<ExternalOneByteString> data, int start_position,	463 Handle<ExternalOneByteString> data, int start_position,

594 int end_position)	464 int end_position)

595 : raw_data_(data->GetChars()),	465 : raw_data_(data->GetChars()),

596 length_(end_position),	466 length_(end_position),

597 bookmark_(kNoBookmark) {	467 bookmark_(kNoBookmark) {

598 DCHECK(end_position >= start_position);	468 DCHECK(end_position >= start_position);

599 pos_ = start_position;	469 pos_ = start_position;

600 }	470 }

601	471

	472 ExternalOneByteStringUtf16CharacterStream::

	473 ExternalOneByteStringUtf16CharacterStream(const char* data, size_t length)

	474 : raw_data_(reinterpret_cast<const uint8_t*>(data)),

	475 length_(length),

	476 bookmark_(kNoBookmark) {}

	477

	478 ExternalOneByteStringUtf16CharacterStream::

	479 ExternalOneByteStringUtf16CharacterStream(const char* data)

	480 : ExternalOneByteStringUtf16CharacterStream(data, strlen(data)) {}

	481

602 bool ExternalOneByteStringUtf16CharacterStream::SetBookmark() {	482 bool ExternalOneByteStringUtf16CharacterStream::SetBookmark() {

603 bookmark_ = pos_;	483 bookmark_ = pos_;

604 return true;	484 return true;

605 }	485 }

606	486

607 void ExternalOneByteStringUtf16CharacterStream::ResetToBookmark() {	487 void ExternalOneByteStringUtf16CharacterStream::ResetToBookmark() {

608 DCHECK(bookmark_ != kNoBookmark);	488 DCHECK(bookmark_ != kNoBookmark);

609 pos_ = bookmark_;	489 pos_ = bookmark_;

610 buffer_cursor_ = buffer_;	490 buffer_cursor_ = buffer_;

611 buffer_end_ = buffer_ + FillBuffer(pos_);	491 buffer_end_ = buffer_ + FillBuffer(pos_);

(...skipping 11 matching lines...) Expand all Loading...
623 if (from_pos >= length_) return 0;	503 if (from_pos >= length_) return 0;

624 size_t length = Min(kBufferSize, length_ - from_pos);	504 size_t length = Min(kBufferSize, length_ - from_pos);

625 for (size_t i = 0; i < length; ++i) {	505 for (size_t i = 0; i < length; ++i) {

626 buffer_[i] = static_cast<uc16>(raw_data_[from_pos + i]);	506 buffer_[i] = static_cast<uc16>(raw_data_[from_pos + i]);

627 }	507 }

628 return length;	508 return length;

629 }	509 }

630	510

631 } // namespace internal	511 } // namespace internal

632 } // namespace v8	512 } // namespace v8

OLD	NEW

« no previous file with comments | « src/parsing/scanner-character-streams.h ('k') | test/cctest/test-parsing.cc » ('j') | no next file with comments »