OLD | NEW |
---|---|
(Empty) | |
1 // Copyright (c) 2016, the Dart project authors. Please see the AUTHORS file | |
2 // for details. All rights reserved. Use of this source code is governed by a | |
3 // BSD-style license that can be found in the LICENSE file. | |
4 | |
5 library fasta.quote; | |
6 | |
7 import 'errors.dart' show | |
8 inputError, | |
9 internalError; | |
10 | |
11 import 'package:dart_scanner/src/characters.dart' show | |
12 $BACKSLASH, | |
13 $BS, | |
14 $CLOSE_CURLY_BRACKET, | |
15 $CR, | |
16 $FF, | |
17 $LF, | |
18 $OPEN_CURLY_BRACKET, | |
19 $SPACE, | |
20 $TAB, | |
21 $VTAB, | |
22 $b, | |
23 $f, | |
24 $n, | |
25 $r, | |
26 $t, | |
27 $u, | |
28 $v, | |
29 $x, | |
30 hexDigitValue, | |
31 isHexDigit; | |
32 | |
33 enum Quote { | |
34 Single, | |
35 Double, | |
36 MultiLineSingle, | |
37 MultiLineDouble, | |
38 RawSingle, | |
39 RawDouble, | |
40 RawMultiLineSingle, | |
41 RawMultiLineDouble, | |
42 } | |
43 | |
44 Quote analyzeQuote(String first) { | |
45 if (first.startsWith('"""')) return Quote.MultiLineDouble; | |
46 if (first.startsWith('r"""')) return Quote.RawMultiLineDouble; | |
47 if (first.startsWith("'''")) return Quote.MultiLineSingle; | |
48 if (first.startsWith("r'''")) return Quote.RawMultiLineSingle; | |
49 if (first.startsWith('"')) return Quote.Double; | |
50 if (first.startsWith('r"')) return Quote.RawDouble; | |
51 if (first.startsWith("'")) return Quote.Single; | |
52 if (first.startsWith("r'")) return Quote.RawSingle; | |
53 return internalError("Unexpected string literal: $first"); | |
54 } | |
55 | |
56 int lengthOfOptionalWhitespacePrefix(String first, int start) { | |
57 List<int> codeUnits = first.codeUnits; | |
58 for (int i = start; i < codeUnits.length; i++) { | |
59 int code = codeUnits[i]; | |
60 if (code == $BACKSLASH) { | |
61 i++; | |
62 if (i < codeUnits.length) { | |
63 code = codeUnits[i]; | |
64 } else { | |
65 break; | |
66 } | |
67 } | |
68 if (code == $TAB || code == $SPACE) continue; | |
69 if (code == $CR) { | |
70 if (i + 1 < codeUnits.length && codeUnits[i] == $LF) { | |
71 i++; | |
72 } | |
73 return i + 1; | |
74 } | |
75 if (code == $LF) { | |
76 return i + 1; | |
77 } | |
Johnni Winther
2017/01/16 13:01:19
Shouldn't we break after this? If we have
String
ahe
2017/01/16 15:26:34
I think you're right. I copied this from pkg/compi
| |
78 } | |
79 return start; | |
80 } | |
81 | |
82 int firstQuoteLength(String first, Quote quote) { | |
83 switch (quote) { | |
84 case Quote.Single: | |
85 case Quote.Double: | |
86 return 1; | |
87 | |
88 case Quote.MultiLineSingle: | |
89 case Quote.MultiLineDouble: | |
90 return lengthOfOptionalWhitespacePrefix(first, 3); | |
91 | |
92 case Quote.RawSingle: | |
93 case Quote.RawDouble: | |
94 return 2; | |
95 | |
96 case Quote.RawMultiLineSingle: | |
97 case Quote.RawMultiLineDouble: | |
98 return lengthOfOptionalWhitespacePrefix(first, 4); | |
99 } | |
100 return internalError("Unhandled string quote: $quote"); | |
101 } | |
102 | |
103 int lastQuoteLength(Quote quote) { | |
104 switch (quote) { | |
105 case Quote.Single: | |
106 case Quote.Double: | |
107 case Quote.RawSingle: | |
108 case Quote.RawDouble: | |
109 return 1; | |
110 | |
111 case Quote.MultiLineSingle: | |
112 case Quote.MultiLineDouble: | |
113 case Quote.RawMultiLineSingle: | |
114 case Quote.RawMultiLineDouble: | |
115 return 3; | |
116 } | |
117 return internalError("Unhandled string quote: $quote"); | |
118 } | |
119 | |
120 String unescapeFirstStringPart(String first, Quote quote) { | |
121 return unescape(first.substring(firstQuoteLength(first, quote)), quote); | |
122 } | |
123 | |
124 String unescapeLastStringPart(String last, Quote quote) { | |
125 return unescape(last.substring(0, last.length - lastQuoteLength(quote)), | |
126 quote); | |
127 } | |
128 | |
129 String unescapeString(String string) { | |
130 Quote quote = analyzeQuote(string); | |
131 return unescape(string.substring( | |
132 firstQuoteLength(string, quote), string.length - lastQuoteLength(quote)), | |
133 quote); | |
134 } | |
135 | |
136 String unescape(String string, Quote quote) { | |
137 switch (quote) { | |
138 case Quote.Single: | |
139 case Quote.Double: | |
140 case Quote.MultiLineSingle: | |
141 case Quote.MultiLineDouble: | |
142 break; | |
143 | |
144 case Quote.RawSingle: | |
145 case Quote.RawDouble: | |
146 case Quote.RawMultiLineSingle: | |
147 case Quote.RawMultiLineDouble: | |
148 return string; | |
149 } | |
150 return !string.contains("\\") ? string : unescapeCodeUnits(string.codeUnits); | |
151 } | |
152 | |
153 const String incompleteSequence = "Incomplete escape sequence."; | |
154 | |
155 const String invalidCharacter = "Invalid character in escape sequence."; | |
156 | |
157 const String invalidCodePoint = "Invalid code point."; | |
158 | |
159 String unescapeCodeUnits(List<int> codeUnits) { | |
160 // Can't use Uint8List or Uint16List here, the code units may be larger. | |
161 List<int> result = new List<int>(codeUnits.length); | |
162 int resultOffset = 0; | |
163 error(int offset, String message) { | |
164 inputError(null, null, message); | |
165 } | |
166 for (int i = 0; i < codeUnits.length; i++) { | |
167 int code = codeUnits[i]; | |
168 if (code == $BACKSLASH) { | |
169 if (codeUnits.length == ++i) return error(i, incompleteSequence); | |
170 code = codeUnits[i]; | |
171 /// `\n` for newline, equivalent to `\x0A`. | |
172 /// `\r` for carriage return, equivalent to `\x0D`. | |
173 /// `\f` for form feed, equivalent to `\x0C`. | |
174 /// `\b` for backspace, equivalent to `\x08`. | |
175 /// `\t` for tab, equivalent to `\x09`. | |
176 /// `\v` for vertical tab, equivalent to `\x0B`. | |
177 /// `\xXX` for hex escape. | |
178 /// `\uXXXX` or `\u{XX?X?X?X?X?}` for Unicode hex escape. | |
179 if (code == $n) { | |
180 code = $LF; | |
181 } else if (code == $r) { | |
182 code = $CR; | |
183 } else if (code == $f) { | |
184 code = $FF; | |
185 } else if (code == $b) { | |
186 code = $BS; | |
187 } else if (code == $t) { | |
188 code = $TAB; | |
189 } else if (code == $v) { | |
190 code = $VTAB; | |
191 } else if (code == $x) { | |
192 // Expect exactly 2 hex digits. | |
193 if (codeUnits.length <= i + 2) return error(i, incompleteSequence); | |
194 code = 0; | |
195 for (int j = 0; j < 2; j++) { | |
196 int digit = codeUnits[++i]; | |
197 if (!isHexDigit(digit)) return error(i, invalidCharacter); | |
198 code = (code << 4) + hexDigitValue(digit); | |
199 } | |
200 } else if (code == $u) { | |
201 if (codeUnits.length == i + 1) return error(i, incompleteSequence); | |
202 code = codeUnits[i + 1]; | |
203 if (code == $OPEN_CURLY_BRACKET) { | |
204 // Expect 1-6 hex digits followed by '}'. | |
205 if (codeUnits.length == ++i) return error(i, incompleteSequence); | |
206 code = 0; | |
207 for (int j = 0; j < 7; j++) { | |
208 if (codeUnits.length == ++i) return error(i, incompleteSequence); | |
209 int digit = codeUnits[i]; | |
210 if (j != 0 && digit == $CLOSE_CURLY_BRACKET) break; | |
211 if (!isHexDigit(digit)) return error(i, invalidCharacter); | |
212 code = (code << 4) + hexDigitValue(digit); | |
213 } | |
214 } else { | |
215 // Expect exactly 4 hex digits. | |
216 code = 0; | |
217 for (int j = 0; j < 4; j++) { | |
218 if (codeUnits.length == ++i) return error(i, incompleteSequence); | |
Johnni Winther
2017/01/16 13:01:19
Remove this and add a check before the loop, like
ahe
2017/01/16 15:26:34
Done.
| |
219 int digit = codeUnits[i]; | |
220 if (!isHexDigit(digit)) return error(i, invalidCharacter); | |
221 code = (code << 4) + hexDigitValue(digit); | |
222 } | |
223 } | |
224 } else { | |
225 // Nothing, escaped character is passed through; | |
226 } | |
227 if (code > 0xFFFF) { | |
228 if (code > 0x10FFFF) return error(i, invalidCodePoint); | |
Johnni Winther
2017/01/16 13:01:19
Why check for '> 0xFFFF' first?
ahe
2017/01/16 15:26:33
Probably because I was tired when I copied this fr
| |
229 } | |
230 } | |
231 result[resultOffset++] = code; | |
232 } | |
233 return new String.fromCharCodes(result, 0, resultOffset); | |
234 } | |
OLD | NEW |