 Chromium Code Reviews
 Chromium Code Reviews Issue 27510003:
  Scanner for UTF-8 byte arrays  (Closed) 
  Base URL: https://dart.googlecode.com/svn/branches/bleeding_edge/dart
    
  
    Issue 27510003:
  Scanner for UTF-8 byte arrays  (Closed) 
  Base URL: https://dart.googlecode.com/svn/branches/bleeding_edge/dart| OLD | NEW | 
|---|---|
| 1 // Copyright (c) 2011, the Dart project authors. Please see the AUTHORS file | 1 // Copyright (c) 2011, the Dart project authors. Please see the AUTHORS file | 
| 2 // for details. All rights reserved. Use of this source code is governed by a | 2 // for details. All rights reserved. Use of this source code is governed by a | 
| 3 // BSD-style license that can be found in the LICENSE file. | 3 // BSD-style license that can be found in the LICENSE file. | 
| 4 | 4 | 
| 5 part of scanner; | 5 part of scanner; | 
| 6 | 6 | 
| 7 /** | 7 /** | 
| 8 * Scanner that reads from a String and creates tokens that points to | 8 * Scanner that reads from a String and creates tokens that points to | 
| 9 * substrings. | 9 * substrings. | 
| 10 */ | 10 */ | 
| 11 class StringScanner extends ArrayBasedScanner<SourceString> { | 11 class StringScanner extends ArrayBasedScanner { | 
| 12 final String string; | 12 /** The file content. */ | 
| 13 String string; | |
| 13 | 14 | 
| 14 StringScanner(String this.string, {bool includeComments: false}) | 15 /** The current offset in [string]. */ | 
| 15 : super(includeComments); | 16 int scanOffset = -1; | 
| 16 | 17 | 
| 17 int nextByte() => charAt(++byteOffset); | 18 StringScanner(SourceFile file, {bool includeComments: false}) | 
| 18 | 19 : string = file.slowText(), | 
| 19 int peek() => charAt(byteOffset + 1); | 20 super(file, includeComments) { | 
| 20 | 21 ensureZeroTermination(); | 
| 21 int charAt(index) | |
| 22 => (string.length > index) ? string.codeUnitAt(index) : $EOF; | |
| 23 | |
| 24 SourceString asciiString(int start, int offset) { | |
| 25 return new SourceString.fromSubstring(string, start, byteOffset + offset); | |
| 26 } | 22 } | 
| 27 | 23 | 
| 28 SourceString utf8String(int start, int offset) { | 24 StringScanner.fromString(this.string, {bool includeComments: false}) | 
| 29 return new SourceString.fromSubstring( | 25 : super(null, includeComments) { | 
| 30 string, start, byteOffset + offset + 1); | 26 ensureZeroTermination(); | 
| 31 } | 27 } | 
| 32 | 28 | 
| 33 void appendByteStringToken(PrecedenceInfo info, SourceString value) { | 29 void ensureZeroTermination() { | 
| 34 // assert(kind != $a || keywords.get(value) == null); | 30 if (string.isEmpty || string.codeUnitAt(string.length - 1) != 0) { | 
| 35 tail.next = new StringToken.fromSource(info, value, tokenStart); | 31 // TODO(lry): abort instead of copying the array, or warn? | 
| 32 string = string + '\x00'; | |
| 33 } | |
| 34 } | |
| 35 | |
| 36 int advance() => string.codeUnitAt(++scanOffset); | |
| 37 int peek() => string.codeUnitAt(scanOffset + 1); | |
| 38 | |
| 39 int get stringOffset => scanOffset; | |
| 40 | |
| 41 int currentAsUnicode(int next) => next; | |
| 42 | |
| 43 void handleUnicode(int startScanOffset) { } | |
| 44 | |
| 45 | |
| 
ngeoffray
2013/10/18 10:19:37
Extra line.
 
lukas
2013/10/24 16:48:36
Done.
 | |
| 46 Token firstToken() => tokens.next; | |
| 47 Token previousToken() => tail; | |
| 48 | |
| 49 void appendSubstringToken(PrecedenceInfo info, int start, | |
| 50 bool asciiOnly, [int extraOffset = 0]) { | |
| 51 tail.next = new StringToken.fromSubstring(info, string, start, | |
| 52 scanOffset + extraOffset, tokenStart, true); | |
| 36 tail = tail.next; | 53 tail = tail.next; | 
| 37 } | 54 } | 
| 38 | |
| 39 void unmatchedBeginGroup(BeginGroupToken begin) { | |
| 40 SourceString error = new SourceString('unmatched "${begin.stringValue}"'); | |
| 41 Token close = | |
| 42 new StringToken.fromSource(BAD_INPUT_INFO, error, begin.charOffset); | |
| 43 // We want to ensure that unmatched BeginGroupTokens are reported | |
| 44 // as errors. However, the rest of the parser assume the groups | |
| 45 // are well-balanced and will never look at the endGroup | |
| 46 // token. This is a nice property that allows us to skip quickly | |
| 47 // over correct code. By inserting an additional error token in | |
| 48 // the stream, we can keep ignoring endGroup tokens. | |
| 49 Token next = | |
| 50 new StringToken.fromSource(BAD_INPUT_INFO, error, begin.charOffset); | |
| 51 begin.endGroup = close; | |
| 52 close.next = next; | |
| 53 next.next = begin.next; | |
| 54 } | |
| 55 } | 55 } | 
| OLD | NEW |