Index: pkg/fasta/lib/src/quote.dart |
diff --git a/pkg/fasta/lib/src/quote.dart b/pkg/fasta/lib/src/quote.dart |
new file mode 100644 |
index 0000000000000000000000000000000000000000..cd63e811b8f52e1f876ffa54e45c35eee5d92220 |
--- /dev/null |
+++ b/pkg/fasta/lib/src/quote.dart |
@@ -0,0 +1,234 @@ |
+// Copyright (c) 2016, the Dart project authors. Please see the AUTHORS file |
+// for details. All rights reserved. Use of this source code is governed by a |
+// BSD-style license that can be found in the LICENSE file. |
+ |
+library fasta.quote; |
+ |
+import 'errors.dart' show |
+ inputError, |
+ internalError; |
+ |
+import 'package:dart_scanner/src/characters.dart' show |
+ $BACKSLASH, |
+ $BS, |
+ $CLOSE_CURLY_BRACKET, |
+ $CR, |
+ $FF, |
+ $LF, |
+ $OPEN_CURLY_BRACKET, |
+ $SPACE, |
+ $TAB, |
+ $VTAB, |
+ $b, |
+ $f, |
+ $n, |
+ $r, |
+ $t, |
+ $u, |
+ $v, |
+ $x, |
+ hexDigitValue, |
+ isHexDigit; |
+ |
+enum Quote { |
+ Single, |
+ Double, |
+ MultiLineSingle, |
+ MultiLineDouble, |
+ RawSingle, |
+ RawDouble, |
+ RawMultiLineSingle, |
+ RawMultiLineDouble, |
+} |
+ |
+Quote analyzeQuote(String first) { |
+ if (first.startsWith('"""')) return Quote.MultiLineDouble; |
+ if (first.startsWith('r"""')) return Quote.RawMultiLineDouble; |
+ if (first.startsWith("'''")) return Quote.MultiLineSingle; |
+ if (first.startsWith("r'''")) return Quote.RawMultiLineSingle; |
+ if (first.startsWith('"')) return Quote.Double; |
+ if (first.startsWith('r"')) return Quote.RawDouble; |
+ if (first.startsWith("'")) return Quote.Single; |
+ if (first.startsWith("r'")) return Quote.RawSingle; |
+ return internalError("Unexpected string literal: $first"); |
+} |
+ |
+int lengthOfOptionalWhitespacePrefix(String first, int start) { |
+ List<int> codeUnits = first.codeUnits; |
+ for (int i = start; i < codeUnits.length; i++) { |
+ int code = codeUnits[i]; |
+ if (code == $BACKSLASH) { |
+ i++; |
+ if (i < codeUnits.length) { |
+ code = codeUnits[i]; |
+ } else { |
+ break; |
+ } |
+ } |
+ if (code == $TAB || code == $SPACE) continue; |
+ if (code == $CR) { |
+ if (i + 1 < codeUnits.length && codeUnits[i] == $LF) { |
+ i++; |
+ } |
+ return i + 1; |
+ } |
+ if (code == $LF) { |
+ return i + 1; |
+ } |
Johnni Winther
2017/01/16 13:01:19
Shouldn't we break after this? If we have
String
ahe
2017/01/16 15:26:34
I think you're right. I copied this from pkg/compi
|
+ } |
+ return start; |
+} |
+ |
+int firstQuoteLength(String first, Quote quote) { |
+ switch (quote) { |
+ case Quote.Single: |
+ case Quote.Double: |
+ return 1; |
+ |
+ case Quote.MultiLineSingle: |
+ case Quote.MultiLineDouble: |
+ return lengthOfOptionalWhitespacePrefix(first, 3); |
+ |
+ case Quote.RawSingle: |
+ case Quote.RawDouble: |
+ return 2; |
+ |
+ case Quote.RawMultiLineSingle: |
+ case Quote.RawMultiLineDouble: |
+ return lengthOfOptionalWhitespacePrefix(first, 4); |
+ } |
+ return internalError("Unhandled string quote: $quote"); |
+} |
+ |
+int lastQuoteLength(Quote quote) { |
+ switch (quote) { |
+ case Quote.Single: |
+ case Quote.Double: |
+ case Quote.RawSingle: |
+ case Quote.RawDouble: |
+ return 1; |
+ |
+ case Quote.MultiLineSingle: |
+ case Quote.MultiLineDouble: |
+ case Quote.RawMultiLineSingle: |
+ case Quote.RawMultiLineDouble: |
+ return 3; |
+ } |
+ return internalError("Unhandled string quote: $quote"); |
+} |
+ |
+String unescapeFirstStringPart(String first, Quote quote) { |
+ return unescape(first.substring(firstQuoteLength(first, quote)), quote); |
+} |
+ |
+String unescapeLastStringPart(String last, Quote quote) { |
+ return unescape(last.substring(0, last.length - lastQuoteLength(quote)), |
+ quote); |
+} |
+ |
+String unescapeString(String string) { |
+ Quote quote = analyzeQuote(string); |
+ return unescape(string.substring( |
+ firstQuoteLength(string, quote), string.length - lastQuoteLength(quote)), |
+ quote); |
+} |
+ |
+String unescape(String string, Quote quote) { |
+ switch (quote) { |
+ case Quote.Single: |
+ case Quote.Double: |
+ case Quote.MultiLineSingle: |
+ case Quote.MultiLineDouble: |
+ break; |
+ |
+ case Quote.RawSingle: |
+ case Quote.RawDouble: |
+ case Quote.RawMultiLineSingle: |
+ case Quote.RawMultiLineDouble: |
+ return string; |
+ } |
+ return !string.contains("\\") ? string : unescapeCodeUnits(string.codeUnits); |
+} |
+ |
+const String incompleteSequence = "Incomplete escape sequence."; |
+ |
+const String invalidCharacter = "Invalid character in escape sequence."; |
+ |
+const String invalidCodePoint = "Invalid code point."; |
+ |
+String unescapeCodeUnits(List<int> codeUnits) { |
+ // Can't use Uint8List or Uint16List here, the code units may be larger. |
+ List<int> result = new List<int>(codeUnits.length); |
+ int resultOffset = 0; |
+ error(int offset, String message) { |
+ inputError(null, null, message); |
+ } |
+ for (int i = 0; i < codeUnits.length; i++) { |
+ int code = codeUnits[i]; |
+ if (code == $BACKSLASH) { |
+ if (codeUnits.length == ++i) return error(i, incompleteSequence); |
+ code = codeUnits[i]; |
+ /// `\n` for newline, equivalent to `\x0A`. |
+ /// `\r` for carriage return, equivalent to `\x0D`. |
+ /// `\f` for form feed, equivalent to `\x0C`. |
+ /// `\b` for backspace, equivalent to `\x08`. |
+ /// `\t` for tab, equivalent to `\x09`. |
+ /// `\v` for vertical tab, equivalent to `\x0B`. |
+ /// `\xXX` for hex escape. |
+ /// `\uXXXX` or `\u{XX?X?X?X?X?}` for Unicode hex escape. |
+ if (code == $n) { |
+ code = $LF; |
+ } else if (code == $r) { |
+ code = $CR; |
+ } else if (code == $f) { |
+ code = $FF; |
+ } else if (code == $b) { |
+ code = $BS; |
+ } else if (code == $t) { |
+ code = $TAB; |
+ } else if (code == $v) { |
+ code = $VTAB; |
+ } else if (code == $x) { |
+ // Expect exactly 2 hex digits. |
+ if (codeUnits.length <= i + 2) return error(i, incompleteSequence); |
+ code = 0; |
+ for (int j = 0; j < 2; j++) { |
+ int digit = codeUnits[++i]; |
+ if (!isHexDigit(digit)) return error(i, invalidCharacter); |
+ code = (code << 4) + hexDigitValue(digit); |
+ } |
+ } else if (code == $u) { |
+ if (codeUnits.length == i + 1) return error(i, incompleteSequence); |
+ code = codeUnits[i + 1]; |
+ if (code == $OPEN_CURLY_BRACKET) { |
+ // Expect 1-6 hex digits followed by '}'. |
+ if (codeUnits.length == ++i) return error(i, incompleteSequence); |
+ code = 0; |
+ for (int j = 0; j < 7; j++) { |
+ if (codeUnits.length == ++i) return error(i, incompleteSequence); |
+ int digit = codeUnits[i]; |
+ if (j != 0 && digit == $CLOSE_CURLY_BRACKET) break; |
+ if (!isHexDigit(digit)) return error(i, invalidCharacter); |
+ code = (code << 4) + hexDigitValue(digit); |
+ } |
+ } else { |
+ // Expect exactly 4 hex digits. |
+ code = 0; |
+ for (int j = 0; j < 4; j++) { |
+ if (codeUnits.length == ++i) return error(i, incompleteSequence); |
Johnni Winther
2017/01/16 13:01:19
Remove this and add a check before the loop, like
ahe
2017/01/16 15:26:34
Done.
|
+ int digit = codeUnits[i]; |
+ if (!isHexDigit(digit)) return error(i, invalidCharacter); |
+ code = (code << 4) + hexDigitValue(digit); |
+ } |
+ } |
+ } else { |
+ // Nothing, escaped character is passed through; |
+ } |
+ if (code > 0xFFFF) { |
+ if (code > 0x10FFFF) return error(i, invalidCodePoint); |
Johnni Winther
2017/01/16 13:01:19
Why check for '> 0xFFFF' first?
ahe
2017/01/16 15:26:33
Probably because I was tired when I copied this fr
|
+ } |
+ } |
+ result[resultOffset++] = code; |
+ } |
+ return new String.fromCharCodes(result, 0, resultOffset); |
+} |