sdk/lib/_internal/compiler/implementation/source_file.dart - Issue 27510003: Scanner for UTF-8 byte arrays

Unified Diff: sdk/lib/_internal/compiler/implementation/source_file.dart

Issue 27510003: Scanner for UTF-8 byte arrays (Closed) Base URL: https://dart.googlecode.com/svn/branches/bleeding_edge/dart

Patch Set: fixes compiler tests Created 7 years, 2 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

« sdk/lib/_internal/compiler/implementation/scanner/utf8_bytes_scanner.dart ('K') | « sdk/lib/_internal/compiler/implementation/script.dart ('k') | sdk/lib/_internal/compiler/implementation/source_file_provider.dart » ('j') | tests/compiler/dart2js/deferred_load_graph_segmentation_test.dart » ('J')
Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

Index: sdk/lib/_internal/compiler/implementation/source_file.dart

diff --git a/sdk/lib/_internal/compiler/implementation/source_file.dart b/sdk/lib/_internal/compiler/implementation/source_file.dart

index 3e7d2b8a37856e63a45226e3e3e9f077f2fff207..25f2afbf708d06b196bb3dabfe3901c778f9cbcd 100644

--- a/sdk/lib/_internal/compiler/implementation/source_file.dart

+++ b/sdk/lib/_internal/compiler/implementation/source_file.dart

@@ -5,42 +5,83 @@

library source_file;

import 'dart:math';

+import 'dart:convert' show UTF8;

/**

- * Represents a file of source code.

+ * Represents a file of source code. The content can be either a [String] or

+ * a UTF-8 encoded [List<int>] of bytes.

-class SourceFile {

+abstract class SourceFile {

/** The name of the file. */

final String filename;

- /** The text content of the file. */

- final String text;

+ SourceFile(this.filename);

+ /** The text content of the file represented as a String. */

+ String slowText();

- List<int> _lineStarts;

+ /** The content of the file represented as a UTF-8 encoded [List<int>]. */

+ List<int> slowUtf8Bytes();

- SourceFile(this.filename, this.text);

+ /**

+ * The length of the string representation of this source file, i.e.,

+ * equivalent to [:slowText().length:], but faster.

+ */

+ int get length;

+ /**

+ * Sets the string length of this source file. For source files based on UTF-8

+ * byte arrays, the string length is computed and assigned by the scanner.

+ */

+ set length(v);

ngeoffray 2013/10/18 10:19:37 int v?

lukas 2013/10/24 16:48:36 Done.

+ /**

+ * A map from line numbers to offsets in the string text representation of

+ * this source file.

+ */

List<int> get lineStarts {

- if (_lineStarts == null) {

- var starts = [0];

- var index = 0;

- while (index < text.length) {

- index = text.indexOf('\n', index) + 1;

- if (index <= 0) break;

- starts.add(index);

- }

- starts.add(text.length + 1);

- _lineStarts = starts;

+ if (lineStartsCache == null) {

+ // When reporting errors during scanning, the line numbers are not yet

+ // available and need to be computed using this slow path.

+ lineStartsCache = lineStartsFromString(slowText());

}

- return _lineStarts;

+ return lineStartsCache;

}

+ /**

+ * Sets the line numbers map for this source file. This map is computed and

+ * assigned by the scanner, avoiding a separate traversal of the source file.

+ *

+ * The map contains one additional entry at the end of the file, as if the

+ * source file had one more empty line at the end. This simplifies the binary

+ * search in [getLine].

+ */

+ set lineStarts(v) => lineStartsCache = v;

ngeoffray 2013/10/18 10:19:37 v -> List<int> v.

lukas 2013/10/24 16:48:36 Done.

+ List<int> lineStartsCache;

+ List<int> lineStartsFromString(String text) {

+ var starts = [0];

+ var index = 0;

+ while (index < text.length) {

+ index = text.indexOf('\n', index) + 1;

+ if (index <= 0) break;

+ starts.add(index);

+ }

+ starts.add(text.length + 1); // One additional line start at the end.

+ return starts;

+ }

+ /**

+ * Returns the line number for the offset [position] in the string

+ * representation of this source file.

+ */

int getLine(int position) {

List<int> starts = lineStarts;

if (position < 0 || starts.last <= position) {

throw 'bad position #$position in file $filename with '

- 'length ${text.length}.';

+ 'length ${length}.';

}

int first = 0;

int count = starts.length;

@@ -58,10 +99,16 @@ class SourceFile {

return first;

}

+ /**

+ * Returns the column number for the offset [position] in the string

+ * representation of this source file.

+ */

int getColumn(int line, int position) {

return position - lineStarts[line];

}

+ String slowSubstring(int start, int end);

/**

* Create a pretty string representation from a character position

* in the file.

@@ -75,12 +122,12 @@ class SourceFile {

'${filename}:${line + 1}:${column + 1}: $message');

if (includeText) {

buf.write('\n');

- var textLine;

+ String textLine;

// +1 for 0-indexing, +1 again to avoid the last line of the file

- if ((line + 2) < _lineStarts.length) {

- textLine = text.substring(_lineStarts[line], _lineStarts[line+1]);

+ if ((line + 2) < lineStarts.length) {

+ textLine = slowSubstring(lineStarts[line], lineStarts[line+1]);

} else {

- textLine = '${text.substring(_lineStarts[line])}\n';

+ textLine = '${slowSubstring(lineStarts[line], length)}\n';

}

int toColumn = min(column + (end-start), textLine.length);

@@ -101,3 +148,47 @@ class SourceFile {

return buf.toString();

}

+class Utf8BytesSourceFile extends SourceFile {

+ /** The UTF-8 encoded content of the source file. */

+ final List<int> content;

+ Utf8BytesSourceFile(String filename, this.content) : super(filename);

+ String slowText() => UTF8.decode(content);

+ List<int> slowUtf8Bytes() => content;

+ String slowSubstring(int start, int end) {

+ // TODO(lry): to make this faster, the scanner could record the UTF-8 slack

+ // for all positions of the source text. We could use [:content.sublist:].

+ return slowText().substring(start, end);

+ }

+ int get length {

+ if (lengthCache == -1) {

+ // During scanning the length is not yet assigned, so we use a slow path.

+ length = slowText().length;

+ }

+ return lengthCache;

+ }

+ set length(v) => lengthCache = v;

ngeoffray 2013/10/18 10:19:37 int v

lukas 2013/10/24 16:48:36 Done.

+ int lengthCache = -1;

+class StringSourceFile extends SourceFile {

+ final String text;

+ StringSourceFile(String filename, this.text) : super(filename);

+ int get length => text.length;

+ set length(v) { }

ngeoffray 2013/10/18 10:19:37 int v

lukas 2013/10/24 16:48:36 Done.

+ String slowText() => text;

+ List<int> slowUtf8Bytes() => UTF8.encode(text);

+ String slowSubstring(int start, int end) => text.substring(start, end);