OLD | NEW |
| (Empty) |
1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file | |
2 // for details. All rights reserved. Use of this source code is governed by a | |
3 // BSD-style license that can be found in the LICENSE file. | |
4 | |
5 // Check the validity of string literals. | |
6 | |
7 library stringvalidator; | |
8 | |
9 import "dart:collection"; | |
10 | |
11 import "dart2jslib.dart"; | |
12 import "tree/tree.dart"; | |
13 import "util/characters.dart"; | |
14 import "scanner/scannerlib.dart" show Token; | |
15 | |
16 class StringValidator { | |
17 final DiagnosticListener listener; | |
18 | |
19 StringValidator(this.listener); | |
20 | |
21 DartString validateInterpolationPart(Token token, StringQuoting quoting, | |
22 {bool isFirst: false, | |
23 bool isLast: false}) { | |
24 String source = token.value; | |
25 int leftQuote = 0; | |
26 int rightQuote = 0; | |
27 if (isFirst) leftQuote = quoting.leftQuoteLength; | |
28 if (isLast) rightQuote = quoting.rightQuoteLength; | |
29 String content = copyWithoutQuotes(source, leftQuote, rightQuote); | |
30 return validateString(token, | |
31 token.charOffset + leftQuote, | |
32 content, | |
33 quoting); | |
34 } | |
35 | |
36 static StringQuoting quotingFromString(String sourceString) { | |
37 Iterator<int> source = sourceString.codeUnits.iterator; | |
38 bool raw = false; | |
39 int leftQuoteLength = 1; | |
40 source.moveNext(); | |
41 int quoteChar = source.current; | |
42 if (quoteChar == $r) { | |
43 raw = true; | |
44 source.moveNext(); | |
45 quoteChar = source.current; | |
46 } | |
47 assert(quoteChar == $SQ || quoteChar == $DQ); | |
48 // String has at least one quote. Check it if has three. | |
49 // If it only has two, the string must be an empty string literal, | |
50 // and end after the second quote. | |
51 bool multiline = false; | |
52 if (source.moveNext() && source.current == quoteChar && source.moveNext()) { | |
53 int code = source.current; | |
54 assert(code == quoteChar); // If not, there is a bug in the parser. | |
55 leftQuoteLength = 3; | |
56 | |
57 // Check if a multiline string starts with optional whitespace followed by | |
58 // a newline (CR, LF or CR+LF). | |
59 // We also accept if the these characters are escaped by a backslash. | |
60 int newLineLength = 1; | |
61 while (true) { | |
62 // Due to string-interpolations we are not guaranteed to see the | |
63 // trailing quoting characters. The invocations to `moveNext()` may | |
64 // therefore return false and the `current`-getter return `null`. The | |
65 // code does not need to handle this specially (as it will not find the | |
66 // newline characters). | |
67 source.moveNext(); | |
68 code = source.current; | |
69 if (code == $BACKSLASH) { | |
70 newLineLength++; | |
71 source.moveNext(); | |
72 code = source.current; | |
73 } | |
74 if (code == $TAB || code == $SPACE) { | |
75 newLineLength++; | |
76 continue; | |
77 } | |
78 if (code == $CR) { | |
79 if (source.moveNext() && source.current == $LF) { | |
80 newLineLength++; | |
81 } | |
82 leftQuoteLength += newLineLength; | |
83 } else if (code == $LF) { | |
84 leftQuoteLength += newLineLength; | |
85 } | |
86 break; | |
87 } | |
88 } | |
89 return StringQuoting.getQuoting(quoteChar, raw, leftQuoteLength); | |
90 } | |
91 | |
92 /** | |
93 * Return the string [string] witout its [initial] first and [terminal] last | |
94 * characters. This is intended to be used to remove quotes from string | |
95 * literals (including an initial 'r' for raw strings). | |
96 */ | |
97 String copyWithoutQuotes(String string, int initial, int terminal) { | |
98 assert(0 <= initial); | |
99 assert(0 <= terminal); | |
100 assert(initial + terminal <= string.length); | |
101 return string.substring(initial, string.length - terminal); | |
102 } | |
103 | |
104 void stringParseError(String message, Token token, int offset) { | |
105 listener.reportFatalError( | |
106 token, MessageKind.GENERIC, {'text': "$message @ $offset"}); | |
107 } | |
108 | |
109 /** | |
110 * Validates the escape sequences and special characters of a string literal. | |
111 * Returns a DartString if valid, and null if not. | |
112 */ | |
113 DartString validateString(Token token, | |
114 int startOffset, | |
115 String string, | |
116 StringQuoting quoting) { | |
117 // We need to check for invalid x and u escapes, for line | |
118 // terminators in non-multiline strings, and for invalid Unicode | |
119 // scalar values (either directly or as u-escape values). We also check | |
120 // for unpaired UTF-16 surrogates. | |
121 int length = 0; | |
122 int index = startOffset; | |
123 bool containsEscape = false; | |
124 bool previousWasLeadSurrogate = false; | |
125 bool invalidUtf16 = false; | |
126 var stringIter = string.codeUnits.iterator; | |
127 for(HasNextIterator<int> iter = new HasNextIterator(stringIter); | |
128 iter.hasNext; | |
129 length++) { | |
130 index++; | |
131 int code = iter.next(); | |
132 if (code == $BACKSLASH) { | |
133 if (quoting.raw) continue; | |
134 containsEscape = true; | |
135 if (!iter.hasNext) { | |
136 stringParseError("Incomplete escape sequence",token, index); | |
137 return null; | |
138 } | |
139 index++; | |
140 code = iter.next(); | |
141 if (code == $x) { | |
142 for (int i = 0; i < 2; i++) { | |
143 if (!iter.hasNext) { | |
144 stringParseError("Incomplete escape sequence", token, index); | |
145 return null; | |
146 } | |
147 index++; | |
148 code = iter.next(); | |
149 if (!isHexDigit(code)) { | |
150 stringParseError("Invalid character in escape sequence", | |
151 token, index); | |
152 return null; | |
153 } | |
154 } | |
155 // A two-byte hex escape can't generate an invalid value. | |
156 continue; | |
157 } else if (code == $u) { | |
158 int escapeStart = index - 1; | |
159 index++; | |
160 code = iter.hasNext ? iter.next() : 0; | |
161 int value = 0; | |
162 if (code == $OPEN_CURLY_BRACKET) { | |
163 // expect 1-6 hex digits. | |
164 int count = 0; | |
165 while (iter.hasNext) { | |
166 code = iter.next(); | |
167 index++; | |
168 if (code == $CLOSE_CURLY_BRACKET) { | |
169 break; | |
170 } | |
171 if (!isHexDigit(code)) { | |
172 stringParseError("Invalid character in escape sequence", | |
173 token, index); | |
174 return null; | |
175 } | |
176 count++; | |
177 value = value * 16 + hexDigitValue(code); | |
178 } | |
179 if (code != $CLOSE_CURLY_BRACKET || count == 0 || count > 6) { | |
180 int errorPosition = index - count; | |
181 if (count > 6) errorPosition += 6; | |
182 stringParseError("Invalid character in escape sequence", | |
183 token, errorPosition); | |
184 return null; | |
185 } | |
186 } else { | |
187 // Expect four hex digits, including the one just read. | |
188 for (int i = 0; i < 4; i++) { | |
189 if (i > 0) { | |
190 if (iter.hasNext) { | |
191 index++; | |
192 code = iter.next(); | |
193 } else { | |
194 code = 0; | |
195 } | |
196 } | |
197 if (!isHexDigit(code)) { | |
198 stringParseError("Invalid character in escape sequence", | |
199 token, index); | |
200 return null; | |
201 } | |
202 value = value * 16 + hexDigitValue(code); | |
203 } | |
204 } | |
205 code = value; | |
206 } | |
207 } | |
208 if (code >= 0x10000) length++; | |
209 // This handles both unescaped characters and the value of unicode | |
210 // escapes. | |
211 if (previousWasLeadSurrogate) { | |
212 if (!isUtf16TrailSurrogate(code)) { | |
213 invalidUtf16 = true; | |
214 break; | |
215 } | |
216 previousWasLeadSurrogate = false; | |
217 } else if (isUtf16LeadSurrogate(code)) { | |
218 previousWasLeadSurrogate = true; | |
219 } else if (!isUnicodeScalarValue(code)) { | |
220 invalidUtf16 = true; | |
221 break; | |
222 } | |
223 } | |
224 if (previousWasLeadSurrogate || invalidUtf16) { | |
225 stringParseError("Invalid Utf16 surrogate", token, index); | |
226 return null; | |
227 } | |
228 // String literal successfully validated. | |
229 if (quoting.raw || !containsEscape) { | |
230 // A string without escapes could just as well have been raw. | |
231 return new DartString.rawString(string, length); | |
232 } | |
233 return new DartString.escapedString(string, length); | |
234 } | |
235 } | |
OLD | NEW |