sdk/lib/uri/encode_decode.dart - Issue 16019002: Merge the dart:uri library into dart:core and update the Uri class

Side by Side Diff: sdk/lib/uri/encode_decode.dart

Issue 16019002: Merge the dart:uri library into dart:core and update the Uri class (Closed) Base URL: https://dart.googlecode.com/svn/branches/bleeding_edge/dart

Patch Set: Final cleanup Created 7 years, 6 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
	(Empty)
1 // Copyright (c) 2013, the Dart project authors. Please see the AUTHORS file

2 // for details. All rights reserved. Use of this source code is governed by a

3 // BSD-style license that can be found in the LICENSE file.

4

5 part of dart.uri;

6

7 /**

8 * Javascript-like URI encode/decode functions.

9 * The documentation here borrows heavily from the original Javascript

10 * doumentation on MDN at:

11 * https://developer.mozilla.org/en/JavaScript/Reference/Global_Objects

12 */

13

14 /**

15 * A JavaScript-like URI encoder. Encodes Uniform Resource Identifier [uri]

16 * by replacing each instance of certain characters by one, two, three, or four

17 * escape sequences representing the UTF-8 encoding of the character (will

18 * only be four escape sequences for characters composed of two "surrogate"

19 * characters). This assumes that [uri] is a complete URI, so does not encode

20 * reserved characters that have special meaning in the URI: [:#;,/?:@&=+\$:]

21 * It returns the escaped URI.

22 */

23 String encodeUri(String uri) {

24 // Bit vector of 128 bits where each bit indicate whether a

25 // character code on the 0-127 needs to be escaped or not.

26 const canonicalTable = const [

27 // LSB MSB

28 // \| \|

29 0x0000, // 0x00 - 0x0f 0000000000000000

30 0x0000, // 0x10 - 0x1f 0000000000000000

31 // ! #$ &'()*+,-./

32 0xf7da, // 0x20 - 0x2f 0101101111101111

33 // 0123456789:; = ?

34 0xafff, // 0x30 - 0x3f 1111111111110101

35 // @ABCDEFGHIJKLMNO

36 0xffff, // 0x40 - 0x4f 1111111111111111

37 // PQRSTUVWXYZ _

38 0x87ff, // 0x50 - 0x5f 1111111111100001

39 // abcdefghijklmno

40 0xfffe, // 0x60 - 0x6f 0111111111111111

41 // pqrstuvwxyz ~

42 0x47ff]; // 0x70 - 0x7f 1111111111100010

43 return _uriEncode(canonicalTable, uri);

44 }

45

46 /**

47 * An implementation of JavaScript's decodeURIComponent function.

48 * Decodes a Uniform Resource Identifier [uri] previously created by

49 * encodeURI or by a similar routine. It replaces each escape sequence

50 * in [uri] with the character that it represents. It does not decode

51 * escape sequences that could not have been introduced by encodeURI.

52 * It returns the unescaped URI.

53 */

54 String decodeUri(String uri) {

55 return _uriDecode(uri);

56 }

57

58 /**

59 * A javaScript-like URI component encoder, this encodes a URI

60 * [component] by replacing each instance of certain characters by one,

61 * two, three, or four escape sequences representing the UTF-8 encoding of

62 * the character (will only be four escape sequences for characters composed

63 * of two "surrogate" characters).

64 * To avoid unexpected requests to the server, you should call

65 * encodeURIComponent on any user-entered parameters that will be passed as

66 * part of a URI. For example, a user could type "Thyme &time=again" for a

67 * variable comment. Not using encodeURIComponent on this variable will give

68 * comment=Thyme%20&time=again. Note that the ampersand and the equal sign

69 * mark a new key and value pair. So instead of having a POST comment key

70 * equal to "Thyme &time=again", you have two POST keys, one equal to "Thyme "

71 * and another (time) equal to again.

72 * It returns the escaped string.

73 */

74 String encodeUriComponent(String component) {

75 // Bit vector of 128 bits where each bit indicate whether a

76 // character code on the 0-127 needs to be escaped or not.

77 const canonicalTable = const [

78 // LSB MSB

79 // \| \|

80 0x0000, // 0x00 - 0x0f 0000000000000000

81 0x0000, // 0x10 - 0x1f 0000000000000000

82 // ! '()* -.

83 0x6782, // 0x20 - 0x2f 0100000111100110

84 // 0123456789

85 0x03ff, // 0x30 - 0x3f 1111111111000000

86 // @ABCDEFGHIJKLMNO

87 0xfffe, // 0x40 - 0x4f 0111111111111111

88 // PQRSTUVWXYZ _

89 0x87ff, // 0x50 - 0x5f 1111111111100001

90 // abcdefghijklmno

91 0xfffe, // 0x60 - 0x6f 0111111111111111

92 // pqrstuvwxyz ~

93 0x47ff]; // 0x70 - 0x7f 1111111111100010

94 return _uriEncode(canonicalTable, component);

95 }

96

97 /**

98 * An implementation of JavaScript's decodeURIComponent function.

99 * Decodes a Uniform Resource Identifier (URI) [component] previously

100 * created by encodeURIComponent or by a similar routine.

101 * It returns the unescaped string.

102 */

103 String decodeUriComponent(String encodedComponent) {

104 return _uriDecode(encodedComponent);

105 }

106

107 /**

108 * This is the internal implementation of JavaScript's encodeURI function.

109 * It encodes all characters in the string [text] except for those

110 * that appear in [canonicalTable], and returns the escaped string.

111 */

112 String _uriEncode(List<int> canonicalTable, String text) {

113 final String hex = '0123456789ABCDEF';

114 var byteToHex = (int v) => '%${hex[v >> 4]}${hex[v & 0x0f]}';

115 StringBuffer result = new StringBuffer();

116 for (int i = 0; i < text.length; i++) {

117 int ch = text.codeUnitAt(i);

118 if (ch < 128 && ((canonicalTable[ch >> 4] & (1 << (ch & 0x0f))) != 0)) {

119 result.write(text[i]);

120 } else if (text[i] == " ") {

121 result.write("+");

122 } else {

123 if (ch >= 0xD800 && ch < 0xDC00) {

124 // Low surrogate. We expect a next char high surrogate.

125 ++i;

126 int nextCh = text.length == i ? 0 : text.codeUnitAt(i);

127 if (nextCh >= 0xDC00 && nextCh < 0xE000) {

128 // convert the pair to a U+10000 codepoint

129 ch = 0x10000 + ((ch - 0xD800) << 10) + (nextCh - 0xDC00);

130 } else {

131 throw new ArgumentError('Malformed URI');

132 }

133 }

134 for (int codepoint in codepointsToUtf8([ch])) {

135 result.write(byteToHex(codepoint));

136 }

137 }

138 }

139 return result.toString();

140 }

141

142 /**

143 * Convert a byte (2 character hex sequence) in string [s] starting

144 * at position [pos] to its ordinal value

145 */

146 int _hexCharPairToByte(String s, int pos) {

147 int byte = 0;

148 for (int i = 0; i < 2; i++) {

149 var charCode = s.codeUnitAt(pos + i);

150 if (0x30 <= charCode && charCode <= 0x39) {

151 byte = byte * 16 + charCode - 0x30;

152 } else {

153 // Check ranges A-F (0x41-0x46) and a-f (0x61-0x66).

154 charCode \|= 0x20;

155 if (0x61 <= charCode && charCode <= 0x66) {

156 byte = byte * 16 + charCode - 0x57;

157 } else {

158 throw new ArgumentError("Invalid URL encoding");

159 }

160 }

161 }

162 return byte;

163 }

164

165 /**

166 * A JavaScript-like decodeURI function. It unescapes the string [text] and

167 * returns the unescaped string.

168 */

169 String _uriDecode(String text) {

170 StringBuffer result = new StringBuffer();

171 List<int> codepoints = new List<int>();

172 for (int i = 0; i < text.length;) {

173 String ch = text[i];

174 if (ch != '%') {

175 if (ch == '+') {

176 result.write(" ");

177 } else {

178 result.write(ch);

179 }

180 i++;

181 } else {

182 codepoints.clear();

183 while (ch == '%') {

184 if (++i > text.length - 2) {

185 throw new ArgumentError('Truncated URI');

186 }

187 codepoints.add(_hexCharPairToByte(text, i));

188 i += 2;

189 if (i == text.length)

190 break;

191 ch = text[i];

192 }

193 result.write(decodeUtf8(codepoints));

194 }

195 }

196 return result.toString();

197 }

198

OLD	NEW

« no previous file with comments | « sdk/lib/mirrors/mirrors.dart ('k') | sdk/lib/uri/helpers.dart » ('j') | no next file with comments »