Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(229)

Side by Side Diff: pkg/compiler/lib/src/string_validator.dart

Issue 2304923002: Allow surrogates in string literals. (Closed)
Patch Set: Include analyzer and spec changes. Created 4 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file 1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file
2 // for details. All rights reserved. Use of this source code is governed by a 2 // for details. All rights reserved. Use of this source code is governed by a
3 // BSD-style license that can be found in the LICENSE file. 3 // BSD-style license that can be found in the LICENSE file.
4 4
5 // Check the validity of string literals. 5 // Check the validity of string literals.
6 6
7 library stringvalidator; 7 library stringvalidator;
8 8
9 import 'dart:collection'; 9 import 'dart:collection';
10 10
(...skipping 93 matching lines...) Expand 10 before | Expand all | Expand 10 after
104 } 104 }
105 105
106 /** 106 /**
107 * Validates the escape sequences and special characters of a string literal. 107 * Validates the escape sequences and special characters of a string literal.
108 * Returns a DartString if valid, and null if not. 108 * Returns a DartString if valid, and null if not.
109 */ 109 */
110 DartString validateString( 110 DartString validateString(
111 Token token, int startOffset, String string, StringQuoting quoting) { 111 Token token, int startOffset, String string, StringQuoting quoting) {
112 // We need to check for invalid x and u escapes, for line 112 // We need to check for invalid x and u escapes, for line
113 // terminators in non-multiline strings, and for invalid Unicode 113 // terminators in non-multiline strings, and for invalid Unicode
114 // scalar values (either directly or as u-escape values). We also check 114 // code points (either directly or as u-escape values).
115 // for unpaired UTF-16 surrogates.
116 int length = 0; 115 int length = 0;
117 int index = startOffset; 116 int index = startOffset;
118 bool containsEscape = false; 117 bool containsEscape = false;
119 bool previousWasLeadSurrogate = false;
120 bool invalidUtf16 = false;
121 var stringIter = string.codeUnits.iterator; 118 var stringIter = string.codeUnits.iterator;
122 for (HasNextIterator<int> iter = new HasNextIterator(stringIter); 119 for (HasNextIterator<int> iter = new HasNextIterator(stringIter);
123 iter.hasNext; 120 iter.hasNext;
124 length++) { 121 length++) {
125 index++; 122 index++;
126 int code = iter.next(); 123 int code = iter.next();
127 if (code == $BACKSLASH) { 124 if (code == $BACKSLASH) {
128 if (quoting.raw) continue; 125 if (quoting.raw) continue;
129 containsEscape = true; 126 containsEscape = true;
130 if (!iter.hasNext) { 127 if (!iter.hasNext) {
(...skipping 61 matching lines...) Expand 10 before | Expand all | Expand 10 after
192 stringParseError( 189 stringParseError(
193 "Invalid character in escape sequence", token, index); 190 "Invalid character in escape sequence", token, index);
194 return null; 191 return null;
195 } 192 }
196 value = value * 16 + hexDigitValue(code); 193 value = value * 16 + hexDigitValue(code);
197 } 194 }
198 } 195 }
199 code = value; 196 code = value;
200 } 197 }
201 } 198 }
202 if (code >= 0x10000) length++; 199 if (code >= 0x10000) {
203 // This handles both unescaped characters and the value of unicode 200 length++;
204 // escapes. 201 if (code > 0x10FFFF) {
205 if (previousWasLeadSurrogate) { 202 stringParseError("Invalid code point", token, index);
206 if (!isUtf16TrailSurrogate(code)) {
207 invalidUtf16 = true;
208 break;
209 } 203 }
210 previousWasLeadSurrogate = false;
211 } else if (isUtf16LeadSurrogate(code)) {
212 previousWasLeadSurrogate = true;
213 } else if (!isUnicodeScalarValue(code)) {
214 invalidUtf16 = true;
215 break;
216 } 204 }
217 } 205 }
218 if (previousWasLeadSurrogate || invalidUtf16) {
219 stringParseError("Invalid Utf16 surrogate", token, index);
220 return null;
221 }
222 // String literal successfully validated. 206 // String literal successfully validated.
223 if (quoting.raw || !containsEscape) { 207 if (quoting.raw || !containsEscape) {
224 // A string without escapes could just as well have been raw. 208 // A string without escapes could just as well have been raw.
225 return new DartString.rawString(string, length); 209 return new DartString.rawString(string, length);
226 } 210 }
227 return new DartString.escapedString(string, length); 211 return new DartString.escapedString(string, length);
228 } 212 }
229 } 213 }
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698