Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(497)

Unified Diff: sdk/lib/_internal/compiler/implementation/source_file.dart

Issue 27510003: Scanner for UTF-8 byte arrays (Closed) Base URL: https://dart.googlecode.com/svn/branches/bleeding_edge/dart
Patch Set: fixes compiler tests Created 7 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: sdk/lib/_internal/compiler/implementation/source_file.dart
diff --git a/sdk/lib/_internal/compiler/implementation/source_file.dart b/sdk/lib/_internal/compiler/implementation/source_file.dart
index 3e7d2b8a37856e63a45226e3e3e9f077f2fff207..25f2afbf708d06b196bb3dabfe3901c778f9cbcd 100644
--- a/sdk/lib/_internal/compiler/implementation/source_file.dart
+++ b/sdk/lib/_internal/compiler/implementation/source_file.dart
@@ -5,42 +5,83 @@
library source_file;
import 'dart:math';
+import 'dart:convert' show UTF8;
/**
- * Represents a file of source code.
+ * Represents a file of source code. The content can be either a [String] or
+ * a UTF-8 encoded [List<int>] of bytes.
*/
-class SourceFile {
+abstract class SourceFile {
/** The name of the file. */
final String filename;
- /** The text content of the file. */
- final String text;
+ SourceFile(this.filename);
+
+ /** The text content of the file represented as a String. */
+ String slowText();
- List<int> _lineStarts;
+ /** The content of the file represented as a UTF-8 encoded [List<int>]. */
+ List<int> slowUtf8Bytes();
- SourceFile(this.filename, this.text);
+ /**
+ * The length of the string representation of this source file, i.e.,
+ * equivalent to [:slowText().length:], but faster.
+ */
+ int get length;
+ /**
+ * Sets the string length of this source file. For source files based on UTF-8
+ * byte arrays, the string length is computed and assigned by the scanner.
+ */
+ set length(v);
ngeoffray 2013/10/18 10:19:37 int v?
lukas 2013/10/24 16:48:36 Done.
+
+ /**
+ * A map from line numbers to offsets in the string text representation of
+ * this source file.
+ */
List<int> get lineStarts {
- if (_lineStarts == null) {
- var starts = [0];
- var index = 0;
- while (index < text.length) {
- index = text.indexOf('\n', index) + 1;
- if (index <= 0) break;
- starts.add(index);
- }
- starts.add(text.length + 1);
- _lineStarts = starts;
+ if (lineStartsCache == null) {
+ // When reporting errors during scanning, the line numbers are not yet
+ // available and need to be computed using this slow path.
+ lineStartsCache = lineStartsFromString(slowText());
}
- return _lineStarts;
+ return lineStartsCache;
}
+ /**
+ * Sets the line numbers map for this source file. This map is computed and
+ * assigned by the scanner, avoiding a separate traversal of the source file.
+ *
+ * The map contains one additional entry at the end of the file, as if the
+ * source file had one more empty line at the end. This simplifies the binary
+ * search in [getLine].
+ */
+ set lineStarts(v) => lineStartsCache = v;
ngeoffray 2013/10/18 10:19:37 v -> List<int> v.
lukas 2013/10/24 16:48:36 Done.
+
+ List<int> lineStartsCache;
+
+ List<int> lineStartsFromString(String text) {
+ var starts = [0];
+ var index = 0;
+ while (index < text.length) {
+ index = text.indexOf('\n', index) + 1;
+ if (index <= 0) break;
+ starts.add(index);
+ }
+ starts.add(text.length + 1); // One additional line start at the end.
+ return starts;
+ }
+
+ /**
+ * Returns the line number for the offset [position] in the string
+ * representation of this source file.
+ */
int getLine(int position) {
List<int> starts = lineStarts;
if (position < 0 || starts.last <= position) {
throw 'bad position #$position in file $filename with '
- 'length ${text.length}.';
+ 'length ${length}.';
}
int first = 0;
int count = starts.length;
@@ -58,10 +99,16 @@ class SourceFile {
return first;
}
+ /**
+ * Returns the column number for the offset [position] in the string
+ * representation of this source file.
+ */
int getColumn(int line, int position) {
return position - lineStarts[line];
}
+ String slowSubstring(int start, int end);
+
/**
* Create a pretty string representation from a character position
* in the file.
@@ -75,12 +122,12 @@ class SourceFile {
'${filename}:${line + 1}:${column + 1}: $message');
if (includeText) {
buf.write('\n');
- var textLine;
+ String textLine;
// +1 for 0-indexing, +1 again to avoid the last line of the file
- if ((line + 2) < _lineStarts.length) {
- textLine = text.substring(_lineStarts[line], _lineStarts[line+1]);
+ if ((line + 2) < lineStarts.length) {
+ textLine = slowSubstring(lineStarts[line], lineStarts[line+1]);
} else {
- textLine = '${text.substring(_lineStarts[line])}\n';
+ textLine = '${slowSubstring(lineStarts[line], length)}\n';
}
int toColumn = min(column + (end-start), textLine.length);
@@ -101,3 +148,47 @@ class SourceFile {
return buf.toString();
}
}
+
+class Utf8BytesSourceFile extends SourceFile {
+
+ /** The UTF-8 encoded content of the source file. */
+ final List<int> content;
+
+ Utf8BytesSourceFile(String filename, this.content) : super(filename);
+
+ String slowText() => UTF8.decode(content);
+
+ List<int> slowUtf8Bytes() => content;
+
+ String slowSubstring(int start, int end) {
+ // TODO(lry): to make this faster, the scanner could record the UTF-8 slack
+ // for all positions of the source text. We could use [:content.sublist:].
+ return slowText().substring(start, end);
+ }
+
+ int get length {
+ if (lengthCache == -1) {
+ // During scanning the length is not yet assigned, so we use a slow path.
+ length = slowText().length;
+ }
+ return lengthCache;
+ }
+ set length(v) => lengthCache = v;
ngeoffray 2013/10/18 10:19:37 int v
lukas 2013/10/24 16:48:36 Done.
+ int lengthCache = -1;
+}
+
+class StringSourceFile extends SourceFile {
+
+ final String text;
+
+ StringSourceFile(String filename, this.text) : super(filename);
+
+ int get length => text.length;
+ set length(v) { }
ngeoffray 2013/10/18 10:19:37 int v
lukas 2013/10/24 16:48:36 Done.
+
+ String slowText() => text;
+
+ List<int> slowUtf8Bytes() => UTF8.encode(text);
+
+ String slowSubstring(int start, int end) => text.substring(start, end);
+}

Powered by Google App Engine
This is Rietveld 408576698