 Chromium Code Reviews
 Chromium Code Reviews Issue 27510003:
  Scanner for UTF-8 byte arrays  (Closed) 
  Base URL: https://dart.googlecode.com/svn/branches/bleeding_edge/dart
    
  
    Issue 27510003:
  Scanner for UTF-8 byte arrays  (Closed) 
  Base URL: https://dart.googlecode.com/svn/branches/bleeding_edge/dart| OLD | NEW | 
|---|---|
| 1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file | 1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file | 
| 2 // for details. All rights reserved. Use of this source code is governed by a | 2 // for details. All rights reserved. Use of this source code is governed by a | 
| 3 // BSD-style license that can be found in the LICENSE file. | 3 // BSD-style license that can be found in the LICENSE file. | 
| 4 | 4 | 
| 5 library source_file; | 5 library source_file; | 
| 6 | 6 | 
| 7 import 'dart:math'; | 7 import 'dart:math'; | 
| 8 import 'dart:convert' show UTF8; | |
| 8 | 9 | 
| 9 /** | 10 /** | 
| 10 * Represents a file of source code. | 11 * Represents a file of source code. The content can be either a [String] or | 
| 12 * a UTF-8 encoded [List<int>] of bytes. | |
| 11 */ | 13 */ | 
| 12 class SourceFile { | 14 abstract class SourceFile { | 
| 13 | 15 | 
| 14 /** The name of the file. */ | 16 /** The name of the file. */ | 
| 15 final String filename; | 17 final String filename; | 
| 16 | 18 | 
| 17 /** The text content of the file. */ | 19 SourceFile(this.filename); | 
| 18 final String text; | |
| 19 | 20 | 
| 20 List<int> _lineStarts; | 21 /** The text content of the file represented as a String. */ | 
| 22 String slowText(); | |
| 21 | 23 | 
| 22 SourceFile(this.filename, this.text); | 24 /** The content of the file represented as a UTF-8 encoded [List<int>]. */ | 
| 25 List<int> slowUtf8Bytes(); | |
| 23 | 26 | 
| 27 /** | |
| 28 * The length of the string representation of this source file, i.e., | |
| 29 * equivalent to [:slowText().length:], but faster. | |
| 30 */ | |
| 31 int get length; | |
| 32 | |
| 33 /** | |
| 34 * Sets the string length of this source file. For source files based on UTF-8 | |
| 35 * byte arrays, the string length is computed and assigned by the scanner. | |
| 36 */ | |
| 37 set length(v); | |
| 
ngeoffray
2013/10/18 10:19:37
int v?
 
lukas
2013/10/24 16:48:36
Done.
 | |
| 38 | |
| 39 /** | |
| 40 * A map from line numbers to offsets in the string text representation of | |
| 41 * this source file. | |
| 42 */ | |
| 24 List<int> get lineStarts { | 43 List<int> get lineStarts { | 
| 25 if (_lineStarts == null) { | 44 if (lineStartsCache == null) { | 
| 26 var starts = [0]; | 45 // When reporting errors during scanning, the line numbers are not yet | 
| 27 var index = 0; | 46 // available and need to be computed using this slow path. | 
| 28 while (index < text.length) { | 47 lineStartsCache = lineStartsFromString(slowText()); | 
| 29 index = text.indexOf('\n', index) + 1; | |
| 30 if (index <= 0) break; | |
| 31 starts.add(index); | |
| 32 } | |
| 33 starts.add(text.length + 1); | |
| 34 _lineStarts = starts; | |
| 35 } | 48 } | 
| 36 return _lineStarts; | 49 return lineStartsCache; | 
| 37 } | 50 } | 
| 38 | 51 | 
| 52 /** | |
| 53 * Sets the line numbers map for this source file. This map is computed and | |
| 54 * assigned by the scanner, avoiding a separate traversal of the source file. | |
| 55 * | |
| 56 * The map contains one additional entry at the end of the file, as if the | |
| 57 * source file had one more empty line at the end. This simplifies the binary | |
| 58 * search in [getLine]. | |
| 59 */ | |
| 60 set lineStarts(v) => lineStartsCache = v; | |
| 
ngeoffray
2013/10/18 10:19:37
v -> List<int> v.
 
lukas
2013/10/24 16:48:36
Done.
 | |
| 61 | |
| 62 List<int> lineStartsCache; | |
| 63 | |
| 64 List<int> lineStartsFromString(String text) { | |
| 65 var starts = [0]; | |
| 66 var index = 0; | |
| 67 while (index < text.length) { | |
| 68 index = text.indexOf('\n', index) + 1; | |
| 69 if (index <= 0) break; | |
| 70 starts.add(index); | |
| 71 } | |
| 72 starts.add(text.length + 1); // One additional line start at the end. | |
| 73 return starts; | |
| 74 } | |
| 75 | |
| 76 /** | |
| 77 * Returns the line number for the offset [position] in the string | |
| 78 * representation of this source file. | |
| 79 */ | |
| 39 int getLine(int position) { | 80 int getLine(int position) { | 
| 40 List<int> starts = lineStarts; | 81 List<int> starts = lineStarts; | 
| 41 if (position < 0 || starts.last <= position) { | 82 if (position < 0 || starts.last <= position) { | 
| 42 throw 'bad position #$position in file $filename with ' | 83 throw 'bad position #$position in file $filename with ' | 
| 43 'length ${text.length}.'; | 84 'length ${length}.'; | 
| 44 } | 85 } | 
| 45 int first = 0; | 86 int first = 0; | 
| 46 int count = starts.length; | 87 int count = starts.length; | 
| 47 while (count > 1) { | 88 while (count > 1) { | 
| 48 int step = count ~/ 2; | 89 int step = count ~/ 2; | 
| 49 int middle = first + step; | 90 int middle = first + step; | 
| 50 int lineStart = starts[middle]; | 91 int lineStart = starts[middle]; | 
| 51 if (position < lineStart) { | 92 if (position < lineStart) { | 
| 52 count = step; | 93 count = step; | 
| 53 } else { | 94 } else { | 
| 54 first = middle; | 95 first = middle; | 
| 55 count -= step; | 96 count -= step; | 
| 56 } | 97 } | 
| 57 } | 98 } | 
| 58 return first; | 99 return first; | 
| 59 } | 100 } | 
| 60 | 101 | 
| 102 /** | |
| 103 * Returns the column number for the offset [position] in the string | |
| 104 * representation of this source file. | |
| 105 */ | |
| 61 int getColumn(int line, int position) { | 106 int getColumn(int line, int position) { | 
| 62 return position - lineStarts[line]; | 107 return position - lineStarts[line]; | 
| 63 } | 108 } | 
| 64 | 109 | 
| 110 String slowSubstring(int start, int end); | |
| 111 | |
| 65 /** | 112 /** | 
| 66 * Create a pretty string representation from a character position | 113 * Create a pretty string representation from a character position | 
| 67 * in the file. | 114 * in the file. | 
| 68 */ | 115 */ | 
| 69 String getLocationMessage(String message, int start, int end, | 116 String getLocationMessage(String message, int start, int end, | 
| 70 bool includeText, String color(String x)) { | 117 bool includeText, String color(String x)) { | 
| 71 var line = getLine(start); | 118 var line = getLine(start); | 
| 72 var column = getColumn(line, start); | 119 var column = getColumn(line, start); | 
| 73 | 120 | 
| 74 var buf = new StringBuffer( | 121 var buf = new StringBuffer( | 
| 75 '${filename}:${line + 1}:${column + 1}: $message'); | 122 '${filename}:${line + 1}:${column + 1}: $message'); | 
| 76 if (includeText) { | 123 if (includeText) { | 
| 77 buf.write('\n'); | 124 buf.write('\n'); | 
| 78 var textLine; | 125 String textLine; | 
| 79 // +1 for 0-indexing, +1 again to avoid the last line of the file | 126 // +1 for 0-indexing, +1 again to avoid the last line of the file | 
| 80 if ((line + 2) < _lineStarts.length) { | 127 if ((line + 2) < lineStarts.length) { | 
| 81 textLine = text.substring(_lineStarts[line], _lineStarts[line+1]); | 128 textLine = slowSubstring(lineStarts[line], lineStarts[line+1]); | 
| 82 } else { | 129 } else { | 
| 83 textLine = '${text.substring(_lineStarts[line])}\n'; | 130 textLine = '${slowSubstring(lineStarts[line], length)}\n'; | 
| 84 } | 131 } | 
| 85 | 132 | 
| 86 int toColumn = min(column + (end-start), textLine.length); | 133 int toColumn = min(column + (end-start), textLine.length); | 
| 87 buf.write(textLine.substring(0, column)); | 134 buf.write(textLine.substring(0, column)); | 
| 88 buf.write(color(textLine.substring(column, toColumn))); | 135 buf.write(color(textLine.substring(column, toColumn))); | 
| 89 buf.write(textLine.substring(toColumn)); | 136 buf.write(textLine.substring(toColumn)); | 
| 90 | 137 | 
| 91 int i = 0; | 138 int i = 0; | 
| 92 for (; i < column; i++) { | 139 for (; i < column; i++) { | 
| 93 buf.write(' '); | 140 buf.write(' '); | 
| 94 } | 141 } | 
| 95 | 142 | 
| 96 for (; i < toColumn; i++) { | 143 for (; i < toColumn; i++) { | 
| 97 buf.write(color('^')); | 144 buf.write(color('^')); | 
| 98 } | 145 } | 
| 99 } | 146 } | 
| 100 | 147 | 
| 101 return buf.toString(); | 148 return buf.toString(); | 
| 102 } | 149 } | 
| 103 } | 150 } | 
| 151 | |
| 152 class Utf8BytesSourceFile extends SourceFile { | |
| 153 | |
| 154 /** The UTF-8 encoded content of the source file. */ | |
| 155 final List<int> content; | |
| 156 | |
| 157 Utf8BytesSourceFile(String filename, this.content) : super(filename); | |
| 158 | |
| 159 String slowText() => UTF8.decode(content); | |
| 160 | |
| 161 List<int> slowUtf8Bytes() => content; | |
| 162 | |
| 163 String slowSubstring(int start, int end) { | |
| 164 // TODO(lry): to make this faster, the scanner could record the UTF-8 slack | |
| 165 // for all positions of the source text. We could use [:content.sublist:]. | |
| 166 return slowText().substring(start, end); | |
| 167 } | |
| 168 | |
| 169 int get length { | |
| 170 if (lengthCache == -1) { | |
| 171 // During scanning the length is not yet assigned, so we use a slow path. | |
| 172 length = slowText().length; | |
| 173 } | |
| 174 return lengthCache; | |
| 175 } | |
| 176 set length(v) => lengthCache = v; | |
| 
ngeoffray
2013/10/18 10:19:37
int v
 
lukas
2013/10/24 16:48:36
Done.
 | |
| 177 int lengthCache = -1; | |
| 178 } | |
| 179 | |
| 180 class StringSourceFile extends SourceFile { | |
| 181 | |
| 182 final String text; | |
| 183 | |
| 184 StringSourceFile(String filename, this.text) : super(filename); | |
| 185 | |
| 186 int get length => text.length; | |
| 187 set length(v) { } | |
| 
ngeoffray
2013/10/18 10:19:37
int v
 
lukas
2013/10/24 16:48:36
Done.
 | |
| 188 | |
| 189 String slowText() => text; | |
| 190 | |
| 191 List<int> slowUtf8Bytes() => UTF8.encode(text); | |
| 192 | |
| 193 String slowSubstring(int start, int end) => text.substring(start, end); | |
| 194 } | |
| OLD | NEW |