Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(9)

Side by Side Diff: sdk/lib/uri/encode_decode.dart

Issue 16019002: Merge the dart:uri library into dart:core and update the Uri class (Closed) Base URL: https://dart.googlecode.com/svn/branches/bleeding_edge/dart
Patch Set: Final cleanup Created 7 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « sdk/lib/mirrors/mirrors.dart ('k') | sdk/lib/uri/helpers.dart » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 // Copyright (c) 2013, the Dart project authors. Please see the AUTHORS file
2 // for details. All rights reserved. Use of this source code is governed by a
3 // BSD-style license that can be found in the LICENSE file.
4
5 part of dart.uri;
6
7 /**
8 * Javascript-like URI encode/decode functions.
9 * The documentation here borrows heavily from the original Javascript
10 * doumentation on MDN at:
11 * https://developer.mozilla.org/en/JavaScript/Reference/Global_Objects
12 */
13
14 /**
15 * A JavaScript-like URI encoder. Encodes Uniform Resource Identifier [uri]
16 * by replacing each instance of certain characters by one, two, three, or four
17 * escape sequences representing the UTF-8 encoding of the character (will
18 * only be four escape sequences for characters composed of two "surrogate"
19 * characters). This assumes that [uri] is a complete URI, so does not encode
20 * reserved characters that have special meaning in the URI: [:#;,/?:@&=+\$:]
21 * It returns the escaped URI.
22 */
23 String encodeUri(String uri) {
24 // Bit vector of 128 bits where each bit indicate whether a
25 // character code on the 0-127 needs to be escaped or not.
26 const canonicalTable = const [
27 // LSB MSB
28 // | |
29 0x0000, // 0x00 - 0x0f 0000000000000000
30 0x0000, // 0x10 - 0x1f 0000000000000000
31 // ! #$ &'()*+,-./
32 0xf7da, // 0x20 - 0x2f 0101101111101111
33 // 0123456789:; = ?
34 0xafff, // 0x30 - 0x3f 1111111111110101
35 // @ABCDEFGHIJKLMNO
36 0xffff, // 0x40 - 0x4f 1111111111111111
37 // PQRSTUVWXYZ _
38 0x87ff, // 0x50 - 0x5f 1111111111100001
39 // abcdefghijklmno
40 0xfffe, // 0x60 - 0x6f 0111111111111111
41 // pqrstuvwxyz ~
42 0x47ff]; // 0x70 - 0x7f 1111111111100010
43 return _uriEncode(canonicalTable, uri);
44 }
45
46 /**
47 * An implementation of JavaScript's decodeURIComponent function.
48 * Decodes a Uniform Resource Identifier [uri] previously created by
49 * encodeURI or by a similar routine. It replaces each escape sequence
50 * in [uri] with the character that it represents. It does not decode
51 * escape sequences that could not have been introduced by encodeURI.
52 * It returns the unescaped URI.
53 */
54 String decodeUri(String uri) {
55 return _uriDecode(uri);
56 }
57
58 /**
59 * A javaScript-like URI component encoder, this encodes a URI
60 * [component] by replacing each instance of certain characters by one,
61 * two, three, or four escape sequences representing the UTF-8 encoding of
62 * the character (will only be four escape sequences for characters composed
63 * of two "surrogate" characters).
64 * To avoid unexpected requests to the server, you should call
65 * encodeURIComponent on any user-entered parameters that will be passed as
66 * part of a URI. For example, a user could type "Thyme &time=again" for a
67 * variable comment. Not using encodeURIComponent on this variable will give
68 * comment=Thyme%20&time=again. Note that the ampersand and the equal sign
69 * mark a new key and value pair. So instead of having a POST comment key
70 * equal to "Thyme &time=again", you have two POST keys, one equal to "Thyme "
71 * and another (time) equal to again.
72 * It returns the escaped string.
73 */
74 String encodeUriComponent(String component) {
75 // Bit vector of 128 bits where each bit indicate whether a
76 // character code on the 0-127 needs to be escaped or not.
77 const canonicalTable = const [
78 // LSB MSB
79 // | |
80 0x0000, // 0x00 - 0x0f 0000000000000000
81 0x0000, // 0x10 - 0x1f 0000000000000000
82 // ! '()* -.
83 0x6782, // 0x20 - 0x2f 0100000111100110
84 // 0123456789
85 0x03ff, // 0x30 - 0x3f 1111111111000000
86 // @ABCDEFGHIJKLMNO
87 0xfffe, // 0x40 - 0x4f 0111111111111111
88 // PQRSTUVWXYZ _
89 0x87ff, // 0x50 - 0x5f 1111111111100001
90 // abcdefghijklmno
91 0xfffe, // 0x60 - 0x6f 0111111111111111
92 // pqrstuvwxyz ~
93 0x47ff]; // 0x70 - 0x7f 1111111111100010
94 return _uriEncode(canonicalTable, component);
95 }
96
97 /**
98 * An implementation of JavaScript's decodeURIComponent function.
99 * Decodes a Uniform Resource Identifier (URI) [component] previously
100 * created by encodeURIComponent or by a similar routine.
101 * It returns the unescaped string.
102 */
103 String decodeUriComponent(String encodedComponent) {
104 return _uriDecode(encodedComponent);
105 }
106
107 /**
108 * This is the internal implementation of JavaScript's encodeURI function.
109 * It encodes all characters in the string [text] except for those
110 * that appear in [canonicalTable], and returns the escaped string.
111 */
112 String _uriEncode(List<int> canonicalTable, String text) {
113 final String hex = '0123456789ABCDEF';
114 var byteToHex = (int v) => '%${hex[v >> 4]}${hex[v & 0x0f]}';
115 StringBuffer result = new StringBuffer();
116 for (int i = 0; i < text.length; i++) {
117 int ch = text.codeUnitAt(i);
118 if (ch < 128 && ((canonicalTable[ch >> 4] & (1 << (ch & 0x0f))) != 0)) {
119 result.write(text[i]);
120 } else if (text[i] == " ") {
121 result.write("+");
122 } else {
123 if (ch >= 0xD800 && ch < 0xDC00) {
124 // Low surrogate. We expect a next char high surrogate.
125 ++i;
126 int nextCh = text.length == i ? 0 : text.codeUnitAt(i);
127 if (nextCh >= 0xDC00 && nextCh < 0xE000) {
128 // convert the pair to a U+10000 codepoint
129 ch = 0x10000 + ((ch - 0xD800) << 10) + (nextCh - 0xDC00);
130 } else {
131 throw new ArgumentError('Malformed URI');
132 }
133 }
134 for (int codepoint in codepointsToUtf8([ch])) {
135 result.write(byteToHex(codepoint));
136 }
137 }
138 }
139 return result.toString();
140 }
141
142 /**
143 * Convert a byte (2 character hex sequence) in string [s] starting
144 * at position [pos] to its ordinal value
145 */
146 int _hexCharPairToByte(String s, int pos) {
147 int byte = 0;
148 for (int i = 0; i < 2; i++) {
149 var charCode = s.codeUnitAt(pos + i);
150 if (0x30 <= charCode && charCode <= 0x39) {
151 byte = byte * 16 + charCode - 0x30;
152 } else {
153 // Check ranges A-F (0x41-0x46) and a-f (0x61-0x66).
154 charCode |= 0x20;
155 if (0x61 <= charCode && charCode <= 0x66) {
156 byte = byte * 16 + charCode - 0x57;
157 } else {
158 throw new ArgumentError("Invalid URL encoding");
159 }
160 }
161 }
162 return byte;
163 }
164
165 /**
166 * A JavaScript-like decodeURI function. It unescapes the string [text] and
167 * returns the unescaped string.
168 */
169 String _uriDecode(String text) {
170 StringBuffer result = new StringBuffer();
171 List<int> codepoints = new List<int>();
172 for (int i = 0; i < text.length;) {
173 String ch = text[i];
174 if (ch != '%') {
175 if (ch == '+') {
176 result.write(" ");
177 } else {
178 result.write(ch);
179 }
180 i++;
181 } else {
182 codepoints.clear();
183 while (ch == '%') {
184 if (++i > text.length - 2) {
185 throw new ArgumentError('Truncated URI');
186 }
187 codepoints.add(_hexCharPairToByte(text, i));
188 i += 2;
189 if (i == text.length)
190 break;
191 ch = text[i];
192 }
193 result.write(decodeUtf8(codepoints));
194 }
195 }
196 return result.toString();
197 }
198
OLDNEW
« no previous file with comments | « sdk/lib/mirrors/mirrors.dart ('k') | sdk/lib/uri/helpers.dart » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698