third_party/protobuf/js/binary/decoder.js - Issue 2495533002: third_party/protobuf: Update to HEAD (83d681ee2c)

Unified Diff: third_party/protobuf/js/binary/decoder.js

Issue 2495533002: third_party/protobuf: Update to HEAD (83d681ee2c) (Closed)

Patch Set: Update to new HEAD (b7632464b4) + restore GOOGLE_PROTOBUF_NO_STATIC_INITIALIZER Created 4 years, 1 month ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

« no previous file with comments | « third_party/protobuf/js/binary/constants.js ('k') | third_party/protobuf/js/binary/decoder_test.js » ('j') | third_party/protobuf/patches/0008-uninline_getemptystring_and_getemptystringalreadyinited.patch » ('J')
Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

Index: third_party/protobuf/js/binary/decoder.js

diff --git a/third_party/protobuf/js/binary/decoder.js b/third_party/protobuf/js/binary/decoder.js

index 41094a36881ece2dee8208bdd9b2b690a16536c1..040cf7153fcda2652a55588c9349cbc9f398afb0 100644

--- a/third_party/protobuf/js/binary/decoder.js

+++ b/third_party/protobuf/js/binary/decoder.js

@@ -895,11 +895,9 @@ jspb.BinaryDecoder.prototype.readEnum = function() {

/**

* Reads and parses a UTF-8 encoded unicode string from the stream.

- * The code is inspired by maps.vectortown.parse.StreamedDataViewReader, with

- * the exception that the implementation here does not get confused if it

- * encounters characters longer than three bytes. These characters are ignored

- * though, as they are extremely rare: three UTF-8 bytes cover virtually all

- * characters in common use (http://en.wikipedia.org/wiki/UTF-8).

+ * The code is inspired by maps.vectortown.parse.StreamedDataViewReader.

+ * Supports codepoints from U+0000 up to U+10FFFF.

+ * (http://en.wikipedia.org/wiki/UTF-8).

* @param {number} length The length of the string to read.

* @return {string} The decoded string.

@@ -907,30 +905,45 @@ jspb.BinaryDecoder.prototype.readString = function(length) {

var bytes = this.bytes_;

var cursor = this.cursor_;

var end = cursor + length;

- var chars = [];

+ var codeUnits = [];

while (cursor < end) {

var c = bytes[cursor++];

if (c < 128) { // Regular 7-bit ASCII.

- chars.push(c);

+ codeUnits.push(c);

} else if (c < 192) {

// UTF-8 continuation mark. We are out of sync. This

// might happen if we attempted to read a character

- // with more than three bytes.

+ // with more than four bytes.

continue;

} else if (c < 224) { // UTF-8 with two bytes.

var c2 = bytes[cursor++];

- chars.push(((c & 31) << 6) | (c2 & 63));

+ codeUnits.push(((c & 31) << 6) | (c2 & 63));

} else if (c < 240) { // UTF-8 with three bytes.

var c2 = bytes[cursor++];

var c3 = bytes[cursor++];

- chars.push(((c & 15) << 12) | ((c2 & 63) << 6) | (c3 & 63));

+ codeUnits.push(((c & 15) << 12) | ((c2 & 63) << 6) | (c3 & 63));

+ } else if (c < 248) { // UTF-8 with 4 bytes.

+ var c2 = bytes[cursor++];

+ var c3 = bytes[cursor++];

+ var c4 = bytes[cursor++];

+ // Characters written on 4 bytes have 21 bits for a codepoint.

+ // We can't fit that on 16bit characters, so we use surrogates.

+ var codepoint = ((c & 7) << 18) | ((c2 & 63) << 12) | ((c3 & 63) << 6) | (c4 & 63);

+ // Surrogates formula from wikipedia.

+ // 1. Subtract 0x10000 from codepoint

+ codepoint -= 0x10000;

+ // 2. Split this into the high 10-bit value and the low 10-bit value

+ // 3. Add 0xD800 to the high value to form the high surrogate

+ // 4. Add 0xDC00 to the low value to form the low surrogate:

+ var low = (codepoint & 1023) + 0xDC00;

+ var high = ((codepoint >> 10) & 1023) + 0xD800;

+ codeUnits.push(high, low)

}

// String.fromCharCode.apply is faster than manually appending characters on

// Chrome 25+, and generates no additional cons string garbage.

- var result = String.fromCharCode.apply(null, chars);

+ var result = String.fromCharCode.apply(null, codeUnits);

this.cursor_ = cursor;

return result;

};