| OLD | NEW |
| 1 // Protocol Buffers - Google's data interchange format | 1 // Protocol Buffers - Google's data interchange format |
| 2 // Copyright 2008 Google Inc. All rights reserved. | 2 // Copyright 2008 Google Inc. All rights reserved. |
| 3 // https://developers.google.com/protocol-buffers/ | 3 // https://developers.google.com/protocol-buffers/ |
| 4 // | 4 // |
| 5 // Redistribution and use in source and binary forms, with or without | 5 // Redistribution and use in source and binary forms, with or without |
| 6 // modification, are permitted provided that the following conditions are | 6 // modification, are permitted provided that the following conditions are |
| 7 // met: | 7 // met: |
| 8 // | 8 // |
| 9 // * Redistributions of source code must retain the above copyright | 9 // * Redistributions of source code must retain the above copyright |
| 10 // notice, this list of conditions and the following disclaimer. | 10 // notice, this list of conditions and the following disclaimer. |
| (...skipping 877 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 888 * signed varints. | 888 * signed varints. |
| 889 * @return {number} The enum value read from the binary stream. | 889 * @return {number} The enum value read from the binary stream. |
| 890 */ | 890 */ |
| 891 jspb.BinaryDecoder.prototype.readEnum = function() { | 891 jspb.BinaryDecoder.prototype.readEnum = function() { |
| 892 return this.readSignedVarint32(); | 892 return this.readSignedVarint32(); |
| 893 }; | 893 }; |
| 894 | 894 |
| 895 | 895 |
| 896 /** | 896 /** |
| 897 * Reads and parses a UTF-8 encoded unicode string from the stream. | 897 * Reads and parses a UTF-8 encoded unicode string from the stream. |
| 898 * The code is inspired by maps.vectortown.parse.StreamedDataViewReader, with | 898 * The code is inspired by maps.vectortown.parse.StreamedDataViewReader. |
| 899 * the exception that the implementation here does not get confused if it | 899 * Supports codepoints from U+0000 up to U+10FFFF. |
| 900 * encounters characters longer than three bytes. These characters are ignored | 900 * (http://en.wikipedia.org/wiki/UTF-8). |
| 901 * though, as they are extremely rare: three UTF-8 bytes cover virtually all | |
| 902 * characters in common use (http://en.wikipedia.org/wiki/UTF-8). | |
| 903 * @param {number} length The length of the string to read. | 901 * @param {number} length The length of the string to read. |
| 904 * @return {string} The decoded string. | 902 * @return {string} The decoded string. |
| 905 */ | 903 */ |
| 906 jspb.BinaryDecoder.prototype.readString = function(length) { | 904 jspb.BinaryDecoder.prototype.readString = function(length) { |
| 907 var bytes = this.bytes_; | 905 var bytes = this.bytes_; |
| 908 var cursor = this.cursor_; | 906 var cursor = this.cursor_; |
| 909 var end = cursor + length; | 907 var end = cursor + length; |
| 910 var chars = []; | 908 var codeUnits = []; |
| 911 | 909 |
| 912 while (cursor < end) { | 910 while (cursor < end) { |
| 913 var c = bytes[cursor++]; | 911 var c = bytes[cursor++]; |
| 914 if (c < 128) { // Regular 7-bit ASCII. | 912 if (c < 128) { // Regular 7-bit ASCII. |
| 915 chars.push(c); | 913 codeUnits.push(c); |
| 916 } else if (c < 192) { | 914 } else if (c < 192) { |
| 917 // UTF-8 continuation mark. We are out of sync. This | 915 // UTF-8 continuation mark. We are out of sync. This |
| 918 // might happen if we attempted to read a character | 916 // might happen if we attempted to read a character |
| 919 // with more than three bytes. | 917 // with more than four bytes. |
| 920 continue; | 918 continue; |
| 921 } else if (c < 224) { // UTF-8 with two bytes. | 919 } else if (c < 224) { // UTF-8 with two bytes. |
| 922 var c2 = bytes[cursor++]; | 920 var c2 = bytes[cursor++]; |
| 923 chars.push(((c & 31) << 6) | (c2 & 63)); | 921 codeUnits.push(((c & 31) << 6) | (c2 & 63)); |
| 924 } else if (c < 240) { // UTF-8 with three bytes. | 922 } else if (c < 240) { // UTF-8 with three bytes. |
| 925 var c2 = bytes[cursor++]; | 923 var c2 = bytes[cursor++]; |
| 926 var c3 = bytes[cursor++]; | 924 var c3 = bytes[cursor++]; |
| 927 chars.push(((c & 15) << 12) | ((c2 & 63) << 6) | (c3 & 63)); | 925 codeUnits.push(((c & 15) << 12) | ((c2 & 63) << 6) | (c3 & 63)); |
| 926 } else if (c < 248) { // UTF-8 with 4 bytes. |
| 927 var c2 = bytes[cursor++]; |
| 928 var c3 = bytes[cursor++]; |
| 929 var c4 = bytes[cursor++]; |
| 930 // Characters written on 4 bytes have 21 bits for a codepoint. |
| 931 // We can't fit that on 16bit characters, so we use surrogates. |
| 932 var codepoint = ((c & 7) << 18) | ((c2 & 63) << 12) | ((c3 & 63) << 6) | (
c4 & 63); |
| 933 // Surrogates formula from wikipedia. |
| 934 // 1. Subtract 0x10000 from codepoint |
| 935 codepoint -= 0x10000; |
| 936 // 2. Split this into the high 10-bit value and the low 10-bit value |
| 937 // 3. Add 0xD800 to the high value to form the high surrogate |
| 938 // 4. Add 0xDC00 to the low value to form the low surrogate: |
| 939 var low = (codepoint & 1023) + 0xDC00; |
| 940 var high = ((codepoint >> 10) & 1023) + 0xD800; |
| 941 codeUnits.push(high, low) |
| 928 } | 942 } |
| 929 } | 943 } |
| 930 | |
| 931 // String.fromCharCode.apply is faster than manually appending characters on | 944 // String.fromCharCode.apply is faster than manually appending characters on |
| 932 // Chrome 25+, and generates no additional cons string garbage. | 945 // Chrome 25+, and generates no additional cons string garbage. |
| 933 var result = String.fromCharCode.apply(null, chars); | 946 var result = String.fromCharCode.apply(null, codeUnits); |
| 934 this.cursor_ = cursor; | 947 this.cursor_ = cursor; |
| 935 return result; | 948 return result; |
| 936 }; | 949 }; |
| 937 | 950 |
| 938 | 951 |
| 939 /** | 952 /** |
| 940 * Reads and parses a UTF-8 encoded unicode string (with length prefix) from | 953 * Reads and parses a UTF-8 encoded unicode string (with length prefix) from |
| 941 * the stream. | 954 * the stream. |
| 942 * @return {string} The decoded string. | 955 * @return {string} The decoded string. |
| 943 */ | 956 */ |
| (...skipping 54 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 998 var d = bytes[cursor + 3]; | 1011 var d = bytes[cursor + 3]; |
| 999 var e = bytes[cursor + 4]; | 1012 var e = bytes[cursor + 4]; |
| 1000 var f = bytes[cursor + 5]; | 1013 var f = bytes[cursor + 5]; |
| 1001 var g = bytes[cursor + 6]; | 1014 var g = bytes[cursor + 6]; |
| 1002 var h = bytes[cursor + 7]; | 1015 var h = bytes[cursor + 7]; |
| 1003 | 1016 |
| 1004 this.cursor_ += 8; | 1017 this.cursor_ += 8; |
| 1005 | 1018 |
| 1006 return String.fromCharCode(a, b, c, d, e, f, g, h); | 1019 return String.fromCharCode(a, b, c, d, e, f, g, h); |
| 1007 }; | 1020 }; |
| OLD | NEW |