| OLD | NEW |
| (Empty) |
| 1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file | |
| 2 // for details. All rights reserved. Use of this source code is governed by a | |
| 3 // BSD-style license that can be found in the LICENSE file. | |
| 4 | |
| 5 // Check the validity of string literals. | |
| 6 | |
| 7 library stringvalidator; | |
| 8 | |
| 9 import "dart:collection"; | |
| 10 | |
| 11 import "dart2jslib.dart"; | |
| 12 import "tree/tree.dart"; | |
| 13 import "util/characters.dart"; | |
| 14 import "scanner/scannerlib.dart" show Token; | |
| 15 | |
| 16 class StringValidator { | |
| 17 final DiagnosticListener listener; | |
| 18 | |
| 19 StringValidator(this.listener); | |
| 20 | |
| 21 DartString validateInterpolationPart(Token token, StringQuoting quoting, | |
| 22 {bool isFirst: false, | |
| 23 bool isLast: false}) { | |
| 24 String source = token.value; | |
| 25 int leftQuote = 0; | |
| 26 int rightQuote = 0; | |
| 27 if (isFirst) leftQuote = quoting.leftQuoteLength; | |
| 28 if (isLast) rightQuote = quoting.rightQuoteLength; | |
| 29 String content = copyWithoutQuotes(source, leftQuote, rightQuote); | |
| 30 return validateString(token, | |
| 31 token.charOffset + leftQuote, | |
| 32 content, | |
| 33 quoting); | |
| 34 } | |
| 35 | |
| 36 static StringQuoting quotingFromString(String sourceString) { | |
| 37 Iterator<int> source = sourceString.codeUnits.iterator; | |
| 38 bool raw = false; | |
| 39 int leftQuoteLength = 1; | |
| 40 source.moveNext(); | |
| 41 int quoteChar = source.current; | |
| 42 if (quoteChar == $r) { | |
| 43 raw = true; | |
| 44 source.moveNext(); | |
| 45 quoteChar = source.current; | |
| 46 } | |
| 47 assert(quoteChar == $SQ || quoteChar == $DQ); | |
| 48 // String has at least one quote. Check it if has three. | |
| 49 // If it only has two, the string must be an empty string literal, | |
| 50 // and end after the second quote. | |
| 51 bool multiline = false; | |
| 52 if (source.moveNext() && source.current == quoteChar && source.moveNext()) { | |
| 53 int code = source.current; | |
| 54 assert(code == quoteChar); // If not, there is a bug in the parser. | |
| 55 leftQuoteLength = 3; | |
| 56 | |
| 57 // Check if a multiline string starts with optional whitespace followed by | |
| 58 // a newline (CR, LF or CR+LF). | |
| 59 // We also accept if the these characters are escaped by a backslash. | |
| 60 int newLineLength = 1; | |
| 61 while (true) { | |
| 62 // Due to string-interpolations we are not guaranteed to see the | |
| 63 // trailing quoting characters. The invocations to `moveNext()` may | |
| 64 // therefore return false and the `current`-getter return `null`. The | |
| 65 // code does not need to handle this specially (as it will not find the | |
| 66 // newline characters). | |
| 67 source.moveNext(); | |
| 68 code = source.current; | |
| 69 if (code == $BACKSLASH) { | |
| 70 newLineLength++; | |
| 71 source.moveNext(); | |
| 72 code = source.current; | |
| 73 } | |
| 74 if (code == $TAB || code == $SPACE) { | |
| 75 newLineLength++; | |
| 76 continue; | |
| 77 } | |
| 78 if (code == $CR) { | |
| 79 if (source.moveNext() && source.current == $LF) { | |
| 80 newLineLength++; | |
| 81 } | |
| 82 leftQuoteLength += newLineLength; | |
| 83 } else if (code == $LF) { | |
| 84 leftQuoteLength += newLineLength; | |
| 85 } | |
| 86 break; | |
| 87 } | |
| 88 } | |
| 89 return StringQuoting.getQuoting(quoteChar, raw, leftQuoteLength); | |
| 90 } | |
| 91 | |
| 92 /** | |
| 93 * Return the string [string] witout its [initial] first and [terminal] last | |
| 94 * characters. This is intended to be used to remove quotes from string | |
| 95 * literals (including an initial 'r' for raw strings). | |
| 96 */ | |
| 97 String copyWithoutQuotes(String string, int initial, int terminal) { | |
| 98 assert(0 <= initial); | |
| 99 assert(0 <= terminal); | |
| 100 assert(initial + terminal <= string.length); | |
| 101 return string.substring(initial, string.length - terminal); | |
| 102 } | |
| 103 | |
| 104 void stringParseError(String message, Token token, int offset) { | |
| 105 listener.reportFatalError( | |
| 106 token, MessageKind.GENERIC, {'text': "$message @ $offset"}); | |
| 107 } | |
| 108 | |
| 109 /** | |
| 110 * Validates the escape sequences and special characters of a string literal. | |
| 111 * Returns a DartString if valid, and null if not. | |
| 112 */ | |
| 113 DartString validateString(Token token, | |
| 114 int startOffset, | |
| 115 String string, | |
| 116 StringQuoting quoting) { | |
| 117 // We need to check for invalid x and u escapes, for line | |
| 118 // terminators in non-multiline strings, and for invalid Unicode | |
| 119 // scalar values (either directly or as u-escape values). We also check | |
| 120 // for unpaired UTF-16 surrogates. | |
| 121 int length = 0; | |
| 122 int index = startOffset; | |
| 123 bool containsEscape = false; | |
| 124 bool previousWasLeadSurrogate = false; | |
| 125 bool invalidUtf16 = false; | |
| 126 var stringIter = string.codeUnits.iterator; | |
| 127 for(HasNextIterator<int> iter = new HasNextIterator(stringIter); | |
| 128 iter.hasNext; | |
| 129 length++) { | |
| 130 index++; | |
| 131 int code = iter.next(); | |
| 132 if (code == $BACKSLASH) { | |
| 133 if (quoting.raw) continue; | |
| 134 containsEscape = true; | |
| 135 if (!iter.hasNext) { | |
| 136 stringParseError("Incomplete escape sequence",token, index); | |
| 137 return null; | |
| 138 } | |
| 139 index++; | |
| 140 code = iter.next(); | |
| 141 if (code == $x) { | |
| 142 for (int i = 0; i < 2; i++) { | |
| 143 if (!iter.hasNext) { | |
| 144 stringParseError("Incomplete escape sequence", token, index); | |
| 145 return null; | |
| 146 } | |
| 147 index++; | |
| 148 code = iter.next(); | |
| 149 if (!isHexDigit(code)) { | |
| 150 stringParseError("Invalid character in escape sequence", | |
| 151 token, index); | |
| 152 return null; | |
| 153 } | |
| 154 } | |
| 155 // A two-byte hex escape can't generate an invalid value. | |
| 156 continue; | |
| 157 } else if (code == $u) { | |
| 158 int escapeStart = index - 1; | |
| 159 index++; | |
| 160 code = iter.hasNext ? iter.next() : 0; | |
| 161 int value = 0; | |
| 162 if (code == $OPEN_CURLY_BRACKET) { | |
| 163 // expect 1-6 hex digits. | |
| 164 int count = 0; | |
| 165 while (iter.hasNext) { | |
| 166 code = iter.next(); | |
| 167 index++; | |
| 168 if (code == $CLOSE_CURLY_BRACKET) { | |
| 169 break; | |
| 170 } | |
| 171 if (!isHexDigit(code)) { | |
| 172 stringParseError("Invalid character in escape sequence", | |
| 173 token, index); | |
| 174 return null; | |
| 175 } | |
| 176 count++; | |
| 177 value = value * 16 + hexDigitValue(code); | |
| 178 } | |
| 179 if (code != $CLOSE_CURLY_BRACKET || count == 0 || count > 6) { | |
| 180 int errorPosition = index - count; | |
| 181 if (count > 6) errorPosition += 6; | |
| 182 stringParseError("Invalid character in escape sequence", | |
| 183 token, errorPosition); | |
| 184 return null; | |
| 185 } | |
| 186 } else { | |
| 187 // Expect four hex digits, including the one just read. | |
| 188 for (int i = 0; i < 4; i++) { | |
| 189 if (i > 0) { | |
| 190 if (iter.hasNext) { | |
| 191 index++; | |
| 192 code = iter.next(); | |
| 193 } else { | |
| 194 code = 0; | |
| 195 } | |
| 196 } | |
| 197 if (!isHexDigit(code)) { | |
| 198 stringParseError("Invalid character in escape sequence", | |
| 199 token, index); | |
| 200 return null; | |
| 201 } | |
| 202 value = value * 16 + hexDigitValue(code); | |
| 203 } | |
| 204 } | |
| 205 code = value; | |
| 206 } | |
| 207 } | |
| 208 if (code >= 0x10000) length++; | |
| 209 // This handles both unescaped characters and the value of unicode | |
| 210 // escapes. | |
| 211 if (previousWasLeadSurrogate) { | |
| 212 if (!isUtf16TrailSurrogate(code)) { | |
| 213 invalidUtf16 = true; | |
| 214 break; | |
| 215 } | |
| 216 previousWasLeadSurrogate = false; | |
| 217 } else if (isUtf16LeadSurrogate(code)) { | |
| 218 previousWasLeadSurrogate = true; | |
| 219 } else if (!isUnicodeScalarValue(code)) { | |
| 220 invalidUtf16 = true; | |
| 221 break; | |
| 222 } | |
| 223 } | |
| 224 if (previousWasLeadSurrogate || invalidUtf16) { | |
| 225 stringParseError("Invalid Utf16 surrogate", token, index); | |
| 226 return null; | |
| 227 } | |
| 228 // String literal successfully validated. | |
| 229 if (quoting.raw || !containsEscape) { | |
| 230 // A string without escapes could just as well have been raw. | |
| 231 return new DartString.rawString(string, length); | |
| 232 } | |
| 233 return new DartString.escapedString(string, length); | |
| 234 } | |
| 235 } | |
| OLD | NEW |