Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(44)

Side by Side Diff: sdk/lib/_internal/compiler/implementation/string_validator.dart

Issue 694353007: Move dart2js from sdk/lib/_internal/compiler to pkg/compiler (Closed) Base URL: https://dart.googlecode.com/svn/branches/bleeding_edge/dart
Patch Set: Created 6 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
(Empty)
1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file
2 // for details. All rights reserved. Use of this source code is governed by a
3 // BSD-style license that can be found in the LICENSE file.
4
5 // Check the validity of string literals.
6
7 library stringvalidator;
8
9 import "dart:collection";
10
11 import "dart2jslib.dart";
12 import "tree/tree.dart";
13 import "util/characters.dart";
14 import "scanner/scannerlib.dart" show Token;
15
16 class StringValidator {
17 final DiagnosticListener listener;
18
19 StringValidator(this.listener);
20
21 DartString validateInterpolationPart(Token token, StringQuoting quoting,
22 {bool isFirst: false,
23 bool isLast: false}) {
24 String source = token.value;
25 int leftQuote = 0;
26 int rightQuote = 0;
27 if (isFirst) leftQuote = quoting.leftQuoteLength;
28 if (isLast) rightQuote = quoting.rightQuoteLength;
29 String content = copyWithoutQuotes(source, leftQuote, rightQuote);
30 return validateString(token,
31 token.charOffset + leftQuote,
32 content,
33 quoting);
34 }
35
36 static StringQuoting quotingFromString(String sourceString) {
37 Iterator<int> source = sourceString.codeUnits.iterator;
38 bool raw = false;
39 int leftQuoteLength = 1;
40 source.moveNext();
41 int quoteChar = source.current;
42 if (quoteChar == $r) {
43 raw = true;
44 source.moveNext();
45 quoteChar = source.current;
46 }
47 assert(quoteChar == $SQ || quoteChar == $DQ);
48 // String has at least one quote. Check it if has three.
49 // If it only has two, the string must be an empty string literal,
50 // and end after the second quote.
51 bool multiline = false;
52 if (source.moveNext() && source.current == quoteChar && source.moveNext()) {
53 int code = source.current;
54 assert(code == quoteChar); // If not, there is a bug in the parser.
55 leftQuoteLength = 3;
56
57 // Check if a multiline string starts with optional whitespace followed by
58 // a newline (CR, LF or CR+LF).
59 // We also accept if the these characters are escaped by a backslash.
60 int newLineLength = 1;
61 while (true) {
62 // Due to string-interpolations we are not guaranteed to see the
63 // trailing quoting characters. The invocations to `moveNext()` may
64 // therefore return false and the `current`-getter return `null`. The
65 // code does not need to handle this specially (as it will not find the
66 // newline characters).
67 source.moveNext();
68 code = source.current;
69 if (code == $BACKSLASH) {
70 newLineLength++;
71 source.moveNext();
72 code = source.current;
73 }
74 if (code == $TAB || code == $SPACE) {
75 newLineLength++;
76 continue;
77 }
78 if (code == $CR) {
79 if (source.moveNext() && source.current == $LF) {
80 newLineLength++;
81 }
82 leftQuoteLength += newLineLength;
83 } else if (code == $LF) {
84 leftQuoteLength += newLineLength;
85 }
86 break;
87 }
88 }
89 return StringQuoting.getQuoting(quoteChar, raw, leftQuoteLength);
90 }
91
92 /**
93 * Return the string [string] witout its [initial] first and [terminal] last
94 * characters. This is intended to be used to remove quotes from string
95 * literals (including an initial 'r' for raw strings).
96 */
97 String copyWithoutQuotes(String string, int initial, int terminal) {
98 assert(0 <= initial);
99 assert(0 <= terminal);
100 assert(initial + terminal <= string.length);
101 return string.substring(initial, string.length - terminal);
102 }
103
104 void stringParseError(String message, Token token, int offset) {
105 listener.reportFatalError(
106 token, MessageKind.GENERIC, {'text': "$message @ $offset"});
107 }
108
109 /**
110 * Validates the escape sequences and special characters of a string literal.
111 * Returns a DartString if valid, and null if not.
112 */
113 DartString validateString(Token token,
114 int startOffset,
115 String string,
116 StringQuoting quoting) {
117 // We need to check for invalid x and u escapes, for line
118 // terminators in non-multiline strings, and for invalid Unicode
119 // scalar values (either directly or as u-escape values). We also check
120 // for unpaired UTF-16 surrogates.
121 int length = 0;
122 int index = startOffset;
123 bool containsEscape = false;
124 bool previousWasLeadSurrogate = false;
125 bool invalidUtf16 = false;
126 var stringIter = string.codeUnits.iterator;
127 for(HasNextIterator<int> iter = new HasNextIterator(stringIter);
128 iter.hasNext;
129 length++) {
130 index++;
131 int code = iter.next();
132 if (code == $BACKSLASH) {
133 if (quoting.raw) continue;
134 containsEscape = true;
135 if (!iter.hasNext) {
136 stringParseError("Incomplete escape sequence",token, index);
137 return null;
138 }
139 index++;
140 code = iter.next();
141 if (code == $x) {
142 for (int i = 0; i < 2; i++) {
143 if (!iter.hasNext) {
144 stringParseError("Incomplete escape sequence", token, index);
145 return null;
146 }
147 index++;
148 code = iter.next();
149 if (!isHexDigit(code)) {
150 stringParseError("Invalid character in escape sequence",
151 token, index);
152 return null;
153 }
154 }
155 // A two-byte hex escape can't generate an invalid value.
156 continue;
157 } else if (code == $u) {
158 int escapeStart = index - 1;
159 index++;
160 code = iter.hasNext ? iter.next() : 0;
161 int value = 0;
162 if (code == $OPEN_CURLY_BRACKET) {
163 // expect 1-6 hex digits.
164 int count = 0;
165 while (iter.hasNext) {
166 code = iter.next();
167 index++;
168 if (code == $CLOSE_CURLY_BRACKET) {
169 break;
170 }
171 if (!isHexDigit(code)) {
172 stringParseError("Invalid character in escape sequence",
173 token, index);
174 return null;
175 }
176 count++;
177 value = value * 16 + hexDigitValue(code);
178 }
179 if (code != $CLOSE_CURLY_BRACKET || count == 0 || count > 6) {
180 int errorPosition = index - count;
181 if (count > 6) errorPosition += 6;
182 stringParseError("Invalid character in escape sequence",
183 token, errorPosition);
184 return null;
185 }
186 } else {
187 // Expect four hex digits, including the one just read.
188 for (int i = 0; i < 4; i++) {
189 if (i > 0) {
190 if (iter.hasNext) {
191 index++;
192 code = iter.next();
193 } else {
194 code = 0;
195 }
196 }
197 if (!isHexDigit(code)) {
198 stringParseError("Invalid character in escape sequence",
199 token, index);
200 return null;
201 }
202 value = value * 16 + hexDigitValue(code);
203 }
204 }
205 code = value;
206 }
207 }
208 if (code >= 0x10000) length++;
209 // This handles both unescaped characters and the value of unicode
210 // escapes.
211 if (previousWasLeadSurrogate) {
212 if (!isUtf16TrailSurrogate(code)) {
213 invalidUtf16 = true;
214 break;
215 }
216 previousWasLeadSurrogate = false;
217 } else if (isUtf16LeadSurrogate(code)) {
218 previousWasLeadSurrogate = true;
219 } else if (!isUnicodeScalarValue(code)) {
220 invalidUtf16 = true;
221 break;
222 }
223 }
224 if (previousWasLeadSurrogate || invalidUtf16) {
225 stringParseError("Invalid Utf16 surrogate", token, index);
226 return null;
227 }
228 // String literal successfully validated.
229 if (quoting.raw || !containsEscape) {
230 // A string without escapes could just as well have been raw.
231 return new DartString.rawString(string, length);
232 }
233 return new DartString.escapedString(string, length);
234 }
235 }
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698