OLD | NEW |
1 // Protocol Buffers - Google's data interchange format | 1 // Protocol Buffers - Google's data interchange format |
2 // Copyright 2008 Google Inc. All rights reserved. | 2 // Copyright 2008 Google Inc. All rights reserved. |
3 // https://developers.google.com/protocol-buffers/ | 3 // https://developers.google.com/protocol-buffers/ |
4 // | 4 // |
5 // Redistribution and use in source and binary forms, with or without | 5 // Redistribution and use in source and binary forms, with or without |
6 // modification, are permitted provided that the following conditions are | 6 // modification, are permitted provided that the following conditions are |
7 // met: | 7 // met: |
8 // | 8 // |
9 // * Redistributions of source code must retain the above copyright | 9 // * Redistributions of source code must retain the above copyright |
10 // notice, this list of conditions and the following disclaimer. | 10 // notice, this list of conditions and the following disclaimer. |
(...skipping 877 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
888 * signed varints. | 888 * signed varints. |
889 * @return {number} The enum value read from the binary stream. | 889 * @return {number} The enum value read from the binary stream. |
890 */ | 890 */ |
891 jspb.BinaryDecoder.prototype.readEnum = function() { | 891 jspb.BinaryDecoder.prototype.readEnum = function() { |
892 return this.readSignedVarint32(); | 892 return this.readSignedVarint32(); |
893 }; | 893 }; |
894 | 894 |
895 | 895 |
896 /** | 896 /** |
897 * Reads and parses a UTF-8 encoded unicode string from the stream. | 897 * Reads and parses a UTF-8 encoded unicode string from the stream. |
898 * The code is inspired by maps.vectortown.parse.StreamedDataViewReader, with | 898 * The code is inspired by maps.vectortown.parse.StreamedDataViewReader. |
899 * the exception that the implementation here does not get confused if it | 899 * Supports codepoints from U+0000 up to U+10FFFF. |
900 * encounters characters longer than three bytes. These characters are ignored | 900 * (http://en.wikipedia.org/wiki/UTF-8). |
901 * though, as they are extremely rare: three UTF-8 bytes cover virtually all | |
902 * characters in common use (http://en.wikipedia.org/wiki/UTF-8). | |
903 * @param {number} length The length of the string to read. | 901 * @param {number} length The length of the string to read. |
904 * @return {string} The decoded string. | 902 * @return {string} The decoded string. |
905 */ | 903 */ |
906 jspb.BinaryDecoder.prototype.readString = function(length) { | 904 jspb.BinaryDecoder.prototype.readString = function(length) { |
907 var bytes = this.bytes_; | 905 var bytes = this.bytes_; |
908 var cursor = this.cursor_; | 906 var cursor = this.cursor_; |
909 var end = cursor + length; | 907 var end = cursor + length; |
910 var chars = []; | 908 var codeUnits = []; |
911 | 909 |
912 while (cursor < end) { | 910 while (cursor < end) { |
913 var c = bytes[cursor++]; | 911 var c = bytes[cursor++]; |
914 if (c < 128) { // Regular 7-bit ASCII. | 912 if (c < 128) { // Regular 7-bit ASCII. |
915 chars.push(c); | 913 codeUnits.push(c); |
916 } else if (c < 192) { | 914 } else if (c < 192) { |
917 // UTF-8 continuation mark. We are out of sync. This | 915 // UTF-8 continuation mark. We are out of sync. This |
918 // might happen if we attempted to read a character | 916 // might happen if we attempted to read a character |
919 // with more than three bytes. | 917 // with more than four bytes. |
920 continue; | 918 continue; |
921 } else if (c < 224) { // UTF-8 with two bytes. | 919 } else if (c < 224) { // UTF-8 with two bytes. |
922 var c2 = bytes[cursor++]; | 920 var c2 = bytes[cursor++]; |
923 chars.push(((c & 31) << 6) | (c2 & 63)); | 921 codeUnits.push(((c & 31) << 6) | (c2 & 63)); |
924 } else if (c < 240) { // UTF-8 with three bytes. | 922 } else if (c < 240) { // UTF-8 with three bytes. |
925 var c2 = bytes[cursor++]; | 923 var c2 = bytes[cursor++]; |
926 var c3 = bytes[cursor++]; | 924 var c3 = bytes[cursor++]; |
927 chars.push(((c & 15) << 12) | ((c2 & 63) << 6) | (c3 & 63)); | 925 codeUnits.push(((c & 15) << 12) | ((c2 & 63) << 6) | (c3 & 63)); |
| 926 } else if (c < 248) { // UTF-8 with 4 bytes. |
| 927 var c2 = bytes[cursor++]; |
| 928 var c3 = bytes[cursor++]; |
| 929 var c4 = bytes[cursor++]; |
| 930 // Characters written on 4 bytes have 21 bits for a codepoint. |
| 931 // We can't fit that on 16bit characters, so we use surrogates. |
| 932 var codepoint = ((c & 7) << 18) | ((c2 & 63) << 12) | ((c3 & 63) << 6) | (
c4 & 63); |
| 933 // Surrogates formula from wikipedia. |
| 934 // 1. Subtract 0x10000 from codepoint |
| 935 codepoint -= 0x10000; |
| 936 // 2. Split this into the high 10-bit value and the low 10-bit value |
| 937 // 3. Add 0xD800 to the high value to form the high surrogate |
| 938 // 4. Add 0xDC00 to the low value to form the low surrogate: |
| 939 var low = (codepoint & 1023) + 0xDC00; |
| 940 var high = ((codepoint >> 10) & 1023) + 0xD800; |
| 941 codeUnits.push(high, low) |
928 } | 942 } |
929 } | 943 } |
930 | |
931 // String.fromCharCode.apply is faster than manually appending characters on | 944 // String.fromCharCode.apply is faster than manually appending characters on |
932 // Chrome 25+, and generates no additional cons string garbage. | 945 // Chrome 25+, and generates no additional cons string garbage. |
933 var result = String.fromCharCode.apply(null, chars); | 946 var result = String.fromCharCode.apply(null, codeUnits); |
934 this.cursor_ = cursor; | 947 this.cursor_ = cursor; |
935 return result; | 948 return result; |
936 }; | 949 }; |
937 | 950 |
938 | 951 |
939 /** | 952 /** |
940 * Reads and parses a UTF-8 encoded unicode string (with length prefix) from | 953 * Reads and parses a UTF-8 encoded unicode string (with length prefix) from |
941 * the stream. | 954 * the stream. |
942 * @return {string} The decoded string. | 955 * @return {string} The decoded string. |
943 */ | 956 */ |
(...skipping 54 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
998 var d = bytes[cursor + 3]; | 1011 var d = bytes[cursor + 3]; |
999 var e = bytes[cursor + 4]; | 1012 var e = bytes[cursor + 4]; |
1000 var f = bytes[cursor + 5]; | 1013 var f = bytes[cursor + 5]; |
1001 var g = bytes[cursor + 6]; | 1014 var g = bytes[cursor + 6]; |
1002 var h = bytes[cursor + 7]; | 1015 var h = bytes[cursor + 7]; |
1003 | 1016 |
1004 this.cursor_ += 8; | 1017 this.cursor_ += 8; |
1005 | 1018 |
1006 return String.fromCharCode(a, b, c, d, e, f, g, h); | 1019 return String.fromCharCode(a, b, c, d, e, f, g, h); |
1007 }; | 1020 }; |
OLD | NEW |