OLD | NEW |
1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file | 1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file |
2 // for details. All rights reserved. Use of this source code is governed by a | 2 // for details. All rights reserved. Use of this source code is governed by a |
3 // BSD-style license that can be found in the LICENSE file. | 3 // BSD-style license that can be found in the LICENSE file. |
4 | 4 |
5 // Check the validity of string literals. | 5 // Check the validity of string literals. |
6 | 6 |
7 library stringvalidator; | 7 library stringvalidator; |
8 | 8 |
9 import 'dart:collection'; | 9 import 'dart:collection'; |
10 | 10 |
11 import 'common.dart'; | 11 import 'common.dart'; |
12 import 'tokens/token.dart' show Token; | 12 import 'tokens/token.dart' show Token; |
13 import 'tree/tree.dart'; | 13 import 'tree/tree.dart'; |
14 import 'util/characters.dart'; | 14 import 'util/characters.dart'; |
15 | 15 |
16 class StringValidator { | 16 class StringValidator { |
17 final DiagnosticReporter reporter; | 17 final DiagnosticReporter reporter; |
18 | 18 |
19 StringValidator(this.reporter); | 19 StringValidator(this.reporter); |
20 | 20 |
21 DartString validateInterpolationPart(Token token, StringQuoting quoting, | 21 DartString validateInterpolationPart(Token token, StringQuoting quoting, |
22 {bool isFirst: false, | 22 {bool isFirst: false, bool isLast: false}) { |
23 bool isLast: false}) { | |
24 String source = token.value; | 23 String source = token.value; |
25 int leftQuote = 0; | 24 int leftQuote = 0; |
26 int rightQuote = 0; | 25 int rightQuote = 0; |
27 if (isFirst) leftQuote = quoting.leftQuoteLength; | 26 if (isFirst) leftQuote = quoting.leftQuoteLength; |
28 if (isLast) rightQuote = quoting.rightQuoteLength; | 27 if (isLast) rightQuote = quoting.rightQuoteLength; |
29 String content = copyWithoutQuotes(source, leftQuote, rightQuote); | 28 String content = copyWithoutQuotes(source, leftQuote, rightQuote); |
30 return validateString(token, | 29 return validateString( |
31 token.charOffset + leftQuote, | 30 token, token.charOffset + leftQuote, content, quoting); |
32 content, | |
33 quoting); | |
34 } | 31 } |
35 | 32 |
36 static StringQuoting quotingFromString(String sourceString) { | 33 static StringQuoting quotingFromString(String sourceString) { |
37 Iterator<int> source = sourceString.codeUnits.iterator; | 34 Iterator<int> source = sourceString.codeUnits.iterator; |
38 bool raw = false; | 35 bool raw = false; |
39 int leftQuoteLength = 1; | 36 int leftQuoteLength = 1; |
40 source.moveNext(); | 37 source.moveNext(); |
41 int quoteChar = source.current; | 38 int quoteChar = source.current; |
42 if (quoteChar == $r) { | 39 if (quoteChar == $r) { |
43 raw = true; | 40 raw = true; |
44 source.moveNext(); | 41 source.moveNext(); |
45 quoteChar = source.current; | 42 quoteChar = source.current; |
46 } | 43 } |
47 assert(quoteChar == $SQ || quoteChar == $DQ); | 44 assert(quoteChar == $SQ || quoteChar == $DQ); |
48 // String has at least one quote. Check it if has three. | 45 // String has at least one quote. Check it if has three. |
49 // If it only has two, the string must be an empty string literal, | 46 // If it only has two, the string must be an empty string literal, |
50 // and end after the second quote. | 47 // and end after the second quote. |
51 if (source.moveNext() && source.current == quoteChar && source.moveNext()) { | 48 if (source.moveNext() && source.current == quoteChar && source.moveNext()) { |
52 int code = source.current; | 49 int code = source.current; |
53 assert(code == quoteChar); // If not, there is a bug in the parser. | 50 assert(code == quoteChar); // If not, there is a bug in the parser. |
54 leftQuoteLength = 3; | 51 leftQuoteLength = 3; |
55 | 52 |
56 // Check if a multiline string starts with optional whitespace followed by | 53 // Check if a multiline string starts with optional whitespace followed by |
57 // a newline (CR, LF or CR+LF). | 54 // a newline (CR, LF or CR+LF). |
58 // We also accept if the these characters are escaped by a backslash. | 55 // We also accept if the these characters are escaped by a backslash. |
59 int newLineLength = 1; | 56 int newLineLength = 1; |
60 while (true) { | 57 while (true) { |
61 // Due to string-interpolations we are not guaranteed to see the | 58 // Due to string-interpolations we are not guaranteed to see the |
62 // trailing quoting characters. The invocations to `moveNext()` may | 59 // trailing quoting characters. The invocations to `moveNext()` may |
63 // therefore return false and the `current`-getter return `null`. The | 60 // therefore return false and the `current`-getter return `null`. The |
(...skipping 38 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
102 | 99 |
103 void stringParseError(String message, Token token, int offset) { | 100 void stringParseError(String message, Token token, int offset) { |
104 reporter.reportErrorMessage( | 101 reporter.reportErrorMessage( |
105 token, MessageKind.GENERIC, {'text': "$message @ $offset"}); | 102 token, MessageKind.GENERIC, {'text': "$message @ $offset"}); |
106 } | 103 } |
107 | 104 |
108 /** | 105 /** |
109 * Validates the escape sequences and special characters of a string literal. | 106 * Validates the escape sequences and special characters of a string literal. |
110 * Returns a DartString if valid, and null if not. | 107 * Returns a DartString if valid, and null if not. |
111 */ | 108 */ |
112 DartString validateString(Token token, | 109 DartString validateString( |
113 int startOffset, | 110 Token token, int startOffset, String string, StringQuoting quoting) { |
114 String string, | |
115 StringQuoting quoting) { | |
116 // We need to check for invalid x and u escapes, for line | 111 // We need to check for invalid x and u escapes, for line |
117 // terminators in non-multiline strings, and for invalid Unicode | 112 // terminators in non-multiline strings, and for invalid Unicode |
118 // scalar values (either directly or as u-escape values). We also check | 113 // scalar values (either directly or as u-escape values). We also check |
119 // for unpaired UTF-16 surrogates. | 114 // for unpaired UTF-16 surrogates. |
120 int length = 0; | 115 int length = 0; |
121 int index = startOffset; | 116 int index = startOffset; |
122 bool containsEscape = false; | 117 bool containsEscape = false; |
123 bool previousWasLeadSurrogate = false; | 118 bool previousWasLeadSurrogate = false; |
124 bool invalidUtf16 = false; | 119 bool invalidUtf16 = false; |
125 var stringIter = string.codeUnits.iterator; | 120 var stringIter = string.codeUnits.iterator; |
126 for(HasNextIterator<int> iter = new HasNextIterator(stringIter); | 121 for (HasNextIterator<int> iter = new HasNextIterator(stringIter); |
127 iter.hasNext; | 122 iter.hasNext; |
128 length++) { | 123 length++) { |
129 index++; | 124 index++; |
130 int code = iter.next(); | 125 int code = iter.next(); |
131 if (code == $BACKSLASH) { | 126 if (code == $BACKSLASH) { |
132 if (quoting.raw) continue; | 127 if (quoting.raw) continue; |
133 containsEscape = true; | 128 containsEscape = true; |
134 if (!iter.hasNext) { | 129 if (!iter.hasNext) { |
135 stringParseError("Incomplete escape sequence",token, index); | 130 stringParseError("Incomplete escape sequence", token, index); |
136 return null; | 131 return null; |
137 } | 132 } |
138 index++; | 133 index++; |
139 code = iter.next(); | 134 code = iter.next(); |
140 if (code == $x) { | 135 if (code == $x) { |
141 for (int i = 0; i < 2; i++) { | 136 for (int i = 0; i < 2; i++) { |
142 if (!iter.hasNext) { | 137 if (!iter.hasNext) { |
143 stringParseError("Incomplete escape sequence", token, index); | 138 stringParseError("Incomplete escape sequence", token, index); |
144 return null; | 139 return null; |
145 } | 140 } |
146 index++; | 141 index++; |
147 code = iter.next(); | 142 code = iter.next(); |
148 if (!isHexDigit(code)) { | 143 if (!isHexDigit(code)) { |
149 stringParseError("Invalid character in escape sequence", | 144 stringParseError( |
150 token, index); | 145 "Invalid character in escape sequence", token, index); |
151 return null; | 146 return null; |
152 } | 147 } |
153 } | 148 } |
154 // A two-byte hex escape can't generate an invalid value. | 149 // A two-byte hex escape can't generate an invalid value. |
155 continue; | 150 continue; |
156 } else if (code == $u) { | 151 } else if (code == $u) { |
157 index++; | 152 index++; |
158 code = iter.hasNext ? iter.next() : 0; | 153 code = iter.hasNext ? iter.next() : 0; |
159 int value = 0; | 154 int value = 0; |
160 if (code == $OPEN_CURLY_BRACKET) { | 155 if (code == $OPEN_CURLY_BRACKET) { |
161 // expect 1-6 hex digits. | 156 // expect 1-6 hex digits. |
162 int count = 0; | 157 int count = 0; |
163 while (iter.hasNext) { | 158 while (iter.hasNext) { |
164 code = iter.next(); | 159 code = iter.next(); |
165 index++; | 160 index++; |
166 if (code == $CLOSE_CURLY_BRACKET) { | 161 if (code == $CLOSE_CURLY_BRACKET) { |
167 break; | 162 break; |
168 } | 163 } |
169 if (!isHexDigit(code)) { | 164 if (!isHexDigit(code)) { |
170 stringParseError("Invalid character in escape sequence", | 165 stringParseError( |
171 token, index); | 166 "Invalid character in escape sequence", token, index); |
172 return null; | 167 return null; |
173 } | 168 } |
174 count++; | 169 count++; |
175 value = value * 16 + hexDigitValue(code); | 170 value = value * 16 + hexDigitValue(code); |
176 } | 171 } |
177 if (code != $CLOSE_CURLY_BRACKET || count == 0 || count > 6) { | 172 if (code != $CLOSE_CURLY_BRACKET || count == 0 || count > 6) { |
178 int errorPosition = index - count; | 173 int errorPosition = index - count; |
179 if (count > 6) errorPosition += 6; | 174 if (count > 6) errorPosition += 6; |
180 stringParseError("Invalid character in escape sequence", | 175 stringParseError( |
181 token, errorPosition); | 176 "Invalid character in escape sequence", token, errorPosition); |
182 return null; | 177 return null; |
183 } | 178 } |
184 } else { | 179 } else { |
185 // Expect four hex digits, including the one just read. | 180 // Expect four hex digits, including the one just read. |
186 for (int i = 0; i < 4; i++) { | 181 for (int i = 0; i < 4; i++) { |
187 if (i > 0) { | 182 if (i > 0) { |
188 if (iter.hasNext) { | 183 if (iter.hasNext) { |
189 index++; | 184 index++; |
190 code = iter.next(); | 185 code = iter.next(); |
191 } else { | 186 } else { |
192 code = 0; | 187 code = 0; |
193 } | 188 } |
194 } | 189 } |
195 if (!isHexDigit(code)) { | 190 if (!isHexDigit(code)) { |
196 stringParseError("Invalid character in escape sequence", | 191 stringParseError( |
197 token, index); | 192 "Invalid character in escape sequence", token, index); |
198 return null; | 193 return null; |
199 } | 194 } |
200 value = value * 16 + hexDigitValue(code); | 195 value = value * 16 + hexDigitValue(code); |
201 } | 196 } |
202 } | 197 } |
203 code = value; | 198 code = value; |
204 } | 199 } |
205 } | 200 } |
206 if (code >= 0x10000) length++; | 201 if (code >= 0x10000) length++; |
207 // This handles both unescaped characters and the value of unicode | 202 // This handles both unescaped characters and the value of unicode |
(...skipping 16 matching lines...) Expand all Loading... |
224 return null; | 219 return null; |
225 } | 220 } |
226 // String literal successfully validated. | 221 // String literal successfully validated. |
227 if (quoting.raw || !containsEscape) { | 222 if (quoting.raw || !containsEscape) { |
228 // A string without escapes could just as well have been raw. | 223 // A string without escapes could just as well have been raw. |
229 return new DartString.rawString(string, length); | 224 return new DartString.rawString(string, length); |
230 } | 225 } |
231 return new DartString.escapedString(string, length); | 226 return new DartString.escapedString(string, length); |
232 } | 227 } |
233 } | 228 } |
OLD | NEW |