sdk/lib/convert/utf.dart - Issue 435553002: Improve utf8 decoding of single-char bytes, by isolating the loop.

Unified Diff: sdk/lib/convert/utf.dart

Issue 435553002: Improve utf8 decoding of single-char bytes, by isolating the loop. (Closed) Base URL: https://dart.googlecode.com/svn/branches/bleeding_edge/dart

Patch Set: Created 6 years, 5 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: sdk/lib/convert/utf.dart

diff --git a/sdk/lib/convert/utf.dart b/sdk/lib/convert/utf.dart

index 5b281934ff93aad9b8d19af940fafc7421f9ea77..c2b42bafb995aa20e4b770e69194a724cda913b5 100644

--- a/sdk/lib/convert/utf.dart

+++ b/sdk/lib/convert/utf.dart

@@ -420,22 +420,29 @@ class _Utf8Decoder {

int value = _value;

int expectedUnits = _expectedUnits;

int extraUnits = _extraUnits;

- int singleBytesCount = 0;

_value = 0;

_expectedUnits = 0;

_extraUnits = 0;

+ int scanOneByteCharacters(units, int from) {

+ final to = endIndex;

+ final mask = ~_ONE_BYTE_LIMIT;

+ for (var i = from; i < to; i++) {

+ if ((units[i] & mask) != 0) return i - from;

+ }

+ return to - from;

+ }

void addSingleBytes(int from, int to) {

- assert(singleBytesCount > 0);

assert(from >= startIndex && from <= endIndex);

assert(to >= startIndex && to <= endIndex);

if (from == 0 && to == codeUnits.length) {

- _stringSink.write(new String.fromCharCodes(codeUnits));

+ var str = new String.fromCharCodes(codeUnits);

Florian Schneider 2014/07/31 11:19:49 Why this change?

Anders Johnsen 2014/07/31 12:23:03 Ah, was for debugging. Reverted.

+ _stringSink.write(str);

} else {

_stringSink.write(

new String.fromCharCodes(codeUnits.sublist(from, to)));

}

- singleBytesCount = 0;

}

int i = startIndex;

@@ -485,6 +492,13 @@ class _Utf8Decoder {

}

while (i < endIndex) {

+ int oneBytes = scanOneByteCharacters(codeUnits, i);

+ if (oneBytes > 0) {

+ _isFirstCharacter = false;

+ addSingleBytes(i, i + oneBytes);

+ i += oneBytes;

+ if (i == endIndex) break;

+ }

int unit = codeUnits[i++];

// TODO(floitsch): the way we test we could potentially allow

// units that are too large, if they happen to have the

@@ -493,23 +507,13 @@ class _Utf8Decoder {

// https://codereview.chromium.org/22929022/diff/1/sdk/lib/convert/utf.dart?column_width=80

if (unit < 0) {

// TODO(floitsch): should this be unit <= 0 ?

- if (singleBytesCount > 0) {

- int to = i - 1;

- addSingleBytes(to - singleBytesCount, to);

- }

if (!_allowMalformed) {

throw new FormatException(

"Negative UTF-8 code unit: -0x${(-unit).toRadixString(16)}");

}

_stringSink.writeCharCode(UNICODE_REPLACEMENT_CHARACTER_RUNE);

- } else if (unit <= _ONE_BYTE_LIMIT) {

- _isFirstCharacter = false;

- singleBytesCount++;

} else {

- if (singleBytesCount > 0) {

- int to = i - 1;

- addSingleBytes(to - singleBytesCount, to);

- }

+ assert(unit > _ONE_BYTE_LIMIT);

if ((unit & 0xE0) == 0xC0) {

value = unit & 0x1F;

expectedUnits = extraUnits = 1;

@@ -538,9 +542,6 @@ class _Utf8Decoder {

}

break loop;

}

- if (singleBytesCount > 0) {

- addSingleBytes(i - singleBytesCount, endIndex);

- }

if (expectedUnits > 0) {

_value = value;

_expectedUnits = expectedUnits;

« no previous file with comments | « no previous file | no next file » | no next file with comments »