pkg/dart_scanner/lib/src/abstract_scanner.dart - Issue 2621153006: Copy scanner and parser to own packages.

Unified Diff: pkg/dart_scanner/lib/src/abstract_scanner.dart

Issue 2621153006: Copy scanner and parser to own packages. (Closed)

Patch Set: Created 3 years, 11 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Index: pkg/dart_scanner/lib/src/abstract_scanner.dart

diff --git a/pkg/dart_scanner/lib/src/abstract_scanner.dart b/pkg/dart_scanner/lib/src/abstract_scanner.dart

new file mode 100644

index 0000000000000000000000000000000000000000..f0698611482b302293d982b6bccd4f5b91af214a

--- /dev/null

+++ b/pkg/dart_scanner/lib/src/abstract_scanner.dart

@@ -0,0 +1,1187 @@

+// BSD-style license that can be found in the LICENSE file.

+library dart2js.scanner;

+import '../io/source_file.dart' show SourceFile, Utf8BytesSourceFile;

+import '../tokens/keyword.dart' show Keyword, KeywordState;

+import '../tokens/precedence.dart';

+import '../tokens/precedence_constants.dart';

+import '../tokens/token.dart';

+import '../tokens/token_constants.dart';

+import '../util/characters.dart';

+import 'string_scanner.dart' show StringScanner;

+import 'utf8_bytes_scanner.dart' show Utf8BytesScanner;

+abstract class Scanner {

+ Token tokenize();

+ factory Scanner(SourceFile file, {bool includeComments: false}) {

+ if (file is Utf8BytesSourceFile) {

+ return new Utf8BytesScanner(file, includeComments: includeComments);

+ } else {

+ return new StringScanner(file, includeComments: includeComments);

+ }

+abstract class AbstractScanner implements Scanner {

+ // TODO(ahe): Move this class to implementation.

+ final bool includeComments;

+ /**

+ * The string offset for the next token that will be created.

+ *

+ * Note that in the [Utf8BytesScanner], [stringOffset] and [scanOffset] values

+ * are different. One string character can be encoded using multiple UTF-8

+ * bytes.

+ */

+ int tokenStart = -1;

+ /**

+ * A pointer to the token stream created by this scanner. The first token

+ * is a special token and not part of the source file. This is an

+ * implementation detail to avoids special cases in the scanner. This token

+ * is not exposed to clients of the scanner, which are expected to invoke

+ * [firstToken] to access the token stream.

+ */

+ final Token tokens = new SymbolToken(EOF_INFO, -1);

+ /**

+ * A pointer to the last scanned token.

+ */

+ Token tail;

+ /**

+ * The source file that is being scanned. This field can be [:null:].

+ * If the source file is available, the scanner assigns its [:lineStarts:] and

+ * [:length:] fields at the end of [tokenize].

+ */

+ final SourceFile file;

+ final List<int> lineStarts = <int>[0];

+ AbstractScanner(this.file, this.includeComments) {

+ this.tail = this.tokens;

+ }

+ /**

+ * Advances and returns the next character.

+ *

+ * If the next character is non-ASCII, then the returned value depends on the

+ * scanner implementation. The [Utf8BytesScanner] returns a UTF-8 byte, while

+ * the [StringScanner] returns a UTF-16 code unit.

+ *

+ * The scanner ensures that [advance] is not invoked after it returned [$EOF].

+ * This allows implementations to omit bound checks if the data structure ends

+ * with '0'.

+ */

+ int advance();

+ /**

+ * Returns the current unicode character.

+ *

+ * If the current character is ASCII, then it is returned unchanged.

+ *

+ * The [Utf8BytesScanner] decodes the next unicode code point starting at the

+ * current position. Note that every unicode character is returned as a single

+ * code point, that is, for '\u{1d11e}' it returns 119070, and the following

+ * [advance] returns the next character.

+ *

+ * The [StringScanner] returns the current character unchanged, which might

+ * be a surrogate character. In the case of '\u{1d11e}', it returns the first

+ * code unit 55348, and the following [advance] returns the second code unit

+ * 56606.

+ *

+ * Invoking [currentAsUnicode] multiple times is safe, i.e.,

+ * [:currentAsUnicode(next) == currentAsUnicode(currentAsUnicode(next)):].

+ */

+ int currentAsUnicode(int next);

+ /**

+ * Returns the character at the next poisition. Like in [advance], the

+ * [Utf8BytesScanner] returns a UTF-8 byte, while the [StringScanner] returns

+ * a UTF-16 code unit.

+ */

+ int peek();

+ /**

+ * Notifies the scanner that unicode characters were detected in either a

+ * comment or a string literal between [startScanOffset] and the current

+ * scan offset.

+ */

+ void handleUnicode(int startScanOffset);

+ /**

+ * Returns the current scan offset.

+ *

+ * In the [Utf8BytesScanner] this is the offset into the byte list, in the

+ * [StringScanner] the offset in the source string.

+ */

+ int get scanOffset;

+ /**

+ * Returns the current string offset.

+ *

+ * In the [StringScanner] this is identical to the [scanOffset]. In the

+ * [Utf8BytesScanner] it is computed based on encountered UTF-8 characters.

+ */

+ int get stringOffset;

+ /**

+ * Returns the first token scanned by this [Scanner].

+ */

+ Token firstToken();

+ /**

+ * Returns the last token scanned by this [Scanner].

+ */

+ Token previousToken();

+ /**

+ * Notifies that a new token starts at current offset.

+ */

+ void beginToken() {

+ tokenStart = stringOffset;

+ }

+ /**

+ * Appends a substring from the scan offset [:start:] to the current

+ * [:scanOffset:] plus the [:extraOffset:]. For example, if the current

+ * scanOffset is 10, then [:appendSubstringToken(5, -1):] will append the

+ * substring string [5,9).

+ *

+ * Note that [extraOffset] can only be used if the covered character(s) are

+ * known to be ASCII.

+ */

+ void appendSubstringToken(PrecedenceInfo info, int start, bool asciiOnly,

+ [int extraOffset]);