| OLD | NEW |
| 1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file | 1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file |
| 2 // for details. All rights reserved. Use of this source code is governed by a | 2 // for details. All rights reserved. Use of this source code is governed by a |
| 3 // BSD-style license that can be found in the LICENSE file. | 3 // BSD-style license that can be found in the LICENSE file. |
| 4 | 4 |
| 5 // Check the validity of string literals. | 5 // Check the validity of string literals. |
| 6 | 6 |
| 7 library stringvalidator; | 7 library stringvalidator; |
| 8 | 8 |
| 9 import 'dart:collection'; | 9 import 'dart:collection'; |
| 10 | 10 |
| 11 import 'common.dart'; | 11 import 'common.dart'; |
| 12 import 'tokens/token.dart' show Token; | 12 import 'tokens/token.dart' show Token; |
| 13 import 'tree/tree.dart'; | 13 import 'tree/tree.dart'; |
| 14 import 'util/characters.dart'; | 14 import 'util/characters.dart'; |
| 15 | 15 |
| 16 class StringValidator { | 16 class StringValidator { |
| 17 final DiagnosticReporter reporter; | 17 final DiagnosticReporter reporter; |
| 18 | 18 |
| 19 StringValidator(this.reporter); | 19 StringValidator(this.reporter); |
| 20 | 20 |
| 21 DartString validateInterpolationPart(Token token, StringQuoting quoting, | 21 DartString validateInterpolationPart(Token token, StringQuoting quoting, |
| 22 {bool isFirst: false, | 22 {bool isFirst: false, bool isLast: false}) { |
| 23 bool isLast: false}) { | |
| 24 String source = token.value; | 23 String source = token.value; |
| 25 int leftQuote = 0; | 24 int leftQuote = 0; |
| 26 int rightQuote = 0; | 25 int rightQuote = 0; |
| 27 if (isFirst) leftQuote = quoting.leftQuoteLength; | 26 if (isFirst) leftQuote = quoting.leftQuoteLength; |
| 28 if (isLast) rightQuote = quoting.rightQuoteLength; | 27 if (isLast) rightQuote = quoting.rightQuoteLength; |
| 29 String content = copyWithoutQuotes(source, leftQuote, rightQuote); | 28 String content = copyWithoutQuotes(source, leftQuote, rightQuote); |
| 30 return validateString(token, | 29 return validateString( |
| 31 token.charOffset + leftQuote, | 30 token, token.charOffset + leftQuote, content, quoting); |
| 32 content, | |
| 33 quoting); | |
| 34 } | 31 } |
| 35 | 32 |
| 36 static StringQuoting quotingFromString(String sourceString) { | 33 static StringQuoting quotingFromString(String sourceString) { |
| 37 Iterator<int> source = sourceString.codeUnits.iterator; | 34 Iterator<int> source = sourceString.codeUnits.iterator; |
| 38 bool raw = false; | 35 bool raw = false; |
| 39 int leftQuoteLength = 1; | 36 int leftQuoteLength = 1; |
| 40 source.moveNext(); | 37 source.moveNext(); |
| 41 int quoteChar = source.current; | 38 int quoteChar = source.current; |
| 42 if (quoteChar == $r) { | 39 if (quoteChar == $r) { |
| 43 raw = true; | 40 raw = true; |
| 44 source.moveNext(); | 41 source.moveNext(); |
| 45 quoteChar = source.current; | 42 quoteChar = source.current; |
| 46 } | 43 } |
| 47 assert(quoteChar == $SQ || quoteChar == $DQ); | 44 assert(quoteChar == $SQ || quoteChar == $DQ); |
| 48 // String has at least one quote. Check it if has three. | 45 // String has at least one quote. Check it if has three. |
| 49 // If it only has two, the string must be an empty string literal, | 46 // If it only has two, the string must be an empty string literal, |
| 50 // and end after the second quote. | 47 // and end after the second quote. |
| 51 if (source.moveNext() && source.current == quoteChar && source.moveNext()) { | 48 if (source.moveNext() && source.current == quoteChar && source.moveNext()) { |
| 52 int code = source.current; | 49 int code = source.current; |
| 53 assert(code == quoteChar); // If not, there is a bug in the parser. | 50 assert(code == quoteChar); // If not, there is a bug in the parser. |
| 54 leftQuoteLength = 3; | 51 leftQuoteLength = 3; |
| 55 | 52 |
| 56 // Check if a multiline string starts with optional whitespace followed by | 53 // Check if a multiline string starts with optional whitespace followed by |
| 57 // a newline (CR, LF or CR+LF). | 54 // a newline (CR, LF or CR+LF). |
| 58 // We also accept if the these characters are escaped by a backslash. | 55 // We also accept if the these characters are escaped by a backslash. |
| 59 int newLineLength = 1; | 56 int newLineLength = 1; |
| 60 while (true) { | 57 while (true) { |
| 61 // Due to string-interpolations we are not guaranteed to see the | 58 // Due to string-interpolations we are not guaranteed to see the |
| 62 // trailing quoting characters. The invocations to `moveNext()` may | 59 // trailing quoting characters. The invocations to `moveNext()` may |
| 63 // therefore return false and the `current`-getter return `null`. The | 60 // therefore return false and the `current`-getter return `null`. The |
| (...skipping 38 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 102 | 99 |
| 103 void stringParseError(String message, Token token, int offset) { | 100 void stringParseError(String message, Token token, int offset) { |
| 104 reporter.reportErrorMessage( | 101 reporter.reportErrorMessage( |
| 105 token, MessageKind.GENERIC, {'text': "$message @ $offset"}); | 102 token, MessageKind.GENERIC, {'text': "$message @ $offset"}); |
| 106 } | 103 } |
| 107 | 104 |
| 108 /** | 105 /** |
| 109 * Validates the escape sequences and special characters of a string literal. | 106 * Validates the escape sequences and special characters of a string literal. |
| 110 * Returns a DartString if valid, and null if not. | 107 * Returns a DartString if valid, and null if not. |
| 111 */ | 108 */ |
| 112 DartString validateString(Token token, | 109 DartString validateString( |
| 113 int startOffset, | 110 Token token, int startOffset, String string, StringQuoting quoting) { |
| 114 String string, | |
| 115 StringQuoting quoting) { | |
| 116 // We need to check for invalid x and u escapes, for line | 111 // We need to check for invalid x and u escapes, for line |
| 117 // terminators in non-multiline strings, and for invalid Unicode | 112 // terminators in non-multiline strings, and for invalid Unicode |
| 118 // scalar values (either directly or as u-escape values). We also check | 113 // scalar values (either directly or as u-escape values). We also check |
| 119 // for unpaired UTF-16 surrogates. | 114 // for unpaired UTF-16 surrogates. |
| 120 int length = 0; | 115 int length = 0; |
| 121 int index = startOffset; | 116 int index = startOffset; |
| 122 bool containsEscape = false; | 117 bool containsEscape = false; |
| 123 bool previousWasLeadSurrogate = false; | 118 bool previousWasLeadSurrogate = false; |
| 124 bool invalidUtf16 = false; | 119 bool invalidUtf16 = false; |
| 125 var stringIter = string.codeUnits.iterator; | 120 var stringIter = string.codeUnits.iterator; |
| 126 for(HasNextIterator<int> iter = new HasNextIterator(stringIter); | 121 for (HasNextIterator<int> iter = new HasNextIterator(stringIter); |
| 127 iter.hasNext; | 122 iter.hasNext; |
| 128 length++) { | 123 length++) { |
| 129 index++; | 124 index++; |
| 130 int code = iter.next(); | 125 int code = iter.next(); |
| 131 if (code == $BACKSLASH) { | 126 if (code == $BACKSLASH) { |
| 132 if (quoting.raw) continue; | 127 if (quoting.raw) continue; |
| 133 containsEscape = true; | 128 containsEscape = true; |
| 134 if (!iter.hasNext) { | 129 if (!iter.hasNext) { |
| 135 stringParseError("Incomplete escape sequence",token, index); | 130 stringParseError("Incomplete escape sequence", token, index); |
| 136 return null; | 131 return null; |
| 137 } | 132 } |
| 138 index++; | 133 index++; |
| 139 code = iter.next(); | 134 code = iter.next(); |
| 140 if (code == $x) { | 135 if (code == $x) { |
| 141 for (int i = 0; i < 2; i++) { | 136 for (int i = 0; i < 2; i++) { |
| 142 if (!iter.hasNext) { | 137 if (!iter.hasNext) { |
| 143 stringParseError("Incomplete escape sequence", token, index); | 138 stringParseError("Incomplete escape sequence", token, index); |
| 144 return null; | 139 return null; |
| 145 } | 140 } |
| 146 index++; | 141 index++; |
| 147 code = iter.next(); | 142 code = iter.next(); |
| 148 if (!isHexDigit(code)) { | 143 if (!isHexDigit(code)) { |
| 149 stringParseError("Invalid character in escape sequence", | 144 stringParseError( |
| 150 token, index); | 145 "Invalid character in escape sequence", token, index); |
| 151 return null; | 146 return null; |
| 152 } | 147 } |
| 153 } | 148 } |
| 154 // A two-byte hex escape can't generate an invalid value. | 149 // A two-byte hex escape can't generate an invalid value. |
| 155 continue; | 150 continue; |
| 156 } else if (code == $u) { | 151 } else if (code == $u) { |
| 157 index++; | 152 index++; |
| 158 code = iter.hasNext ? iter.next() : 0; | 153 code = iter.hasNext ? iter.next() : 0; |
| 159 int value = 0; | 154 int value = 0; |
| 160 if (code == $OPEN_CURLY_BRACKET) { | 155 if (code == $OPEN_CURLY_BRACKET) { |
| 161 // expect 1-6 hex digits. | 156 // expect 1-6 hex digits. |
| 162 int count = 0; | 157 int count = 0; |
| 163 while (iter.hasNext) { | 158 while (iter.hasNext) { |
| 164 code = iter.next(); | 159 code = iter.next(); |
| 165 index++; | 160 index++; |
| 166 if (code == $CLOSE_CURLY_BRACKET) { | 161 if (code == $CLOSE_CURLY_BRACKET) { |
| 167 break; | 162 break; |
| 168 } | 163 } |
| 169 if (!isHexDigit(code)) { | 164 if (!isHexDigit(code)) { |
| 170 stringParseError("Invalid character in escape sequence", | 165 stringParseError( |
| 171 token, index); | 166 "Invalid character in escape sequence", token, index); |
| 172 return null; | 167 return null; |
| 173 } | 168 } |
| 174 count++; | 169 count++; |
| 175 value = value * 16 + hexDigitValue(code); | 170 value = value * 16 + hexDigitValue(code); |
| 176 } | 171 } |
| 177 if (code != $CLOSE_CURLY_BRACKET || count == 0 || count > 6) { | 172 if (code != $CLOSE_CURLY_BRACKET || count == 0 || count > 6) { |
| 178 int errorPosition = index - count; | 173 int errorPosition = index - count; |
| 179 if (count > 6) errorPosition += 6; | 174 if (count > 6) errorPosition += 6; |
| 180 stringParseError("Invalid character in escape sequence", | 175 stringParseError( |
| 181 token, errorPosition); | 176 "Invalid character in escape sequence", token, errorPosition); |
| 182 return null; | 177 return null; |
| 183 } | 178 } |
| 184 } else { | 179 } else { |
| 185 // Expect four hex digits, including the one just read. | 180 // Expect four hex digits, including the one just read. |
| 186 for (int i = 0; i < 4; i++) { | 181 for (int i = 0; i < 4; i++) { |
| 187 if (i > 0) { | 182 if (i > 0) { |
| 188 if (iter.hasNext) { | 183 if (iter.hasNext) { |
| 189 index++; | 184 index++; |
| 190 code = iter.next(); | 185 code = iter.next(); |
| 191 } else { | 186 } else { |
| 192 code = 0; | 187 code = 0; |
| 193 } | 188 } |
| 194 } | 189 } |
| 195 if (!isHexDigit(code)) { | 190 if (!isHexDigit(code)) { |
| 196 stringParseError("Invalid character in escape sequence", | 191 stringParseError( |
| 197 token, index); | 192 "Invalid character in escape sequence", token, index); |
| 198 return null; | 193 return null; |
| 199 } | 194 } |
| 200 value = value * 16 + hexDigitValue(code); | 195 value = value * 16 + hexDigitValue(code); |
| 201 } | 196 } |
| 202 } | 197 } |
| 203 code = value; | 198 code = value; |
| 204 } | 199 } |
| 205 } | 200 } |
| 206 if (code >= 0x10000) length++; | 201 if (code >= 0x10000) length++; |
| 207 // This handles both unescaped characters and the value of unicode | 202 // This handles both unescaped characters and the value of unicode |
| (...skipping 16 matching lines...) Expand all Loading... |
| 224 return null; | 219 return null; |
| 225 } | 220 } |
| 226 // String literal successfully validated. | 221 // String literal successfully validated. |
| 227 if (quoting.raw || !containsEscape) { | 222 if (quoting.raw || !containsEscape) { |
| 228 // A string without escapes could just as well have been raw. | 223 // A string without escapes could just as well have been raw. |
| 229 return new DartString.rawString(string, length); | 224 return new DartString.rawString(string, length); |
| 230 } | 225 } |
| 231 return new DartString.escapedString(string, length); | 226 return new DartString.escapedString(string, length); |
| 232 } | 227 } |
| 233 } | 228 } |
| OLD | NEW |