observatory_pub_packages/polymer_expressions/tokenizer.dart - Issue 816693004: Add observatory_pub_packages snapshot to third_party

Side by Side Diff: observatory_pub_packages/polymer_expressions/tokenizer.dart

Issue 816693004: Add observatory_pub_packages snapshot to third_party (Closed) Base URL: http://dart.googlecode.com/svn/third_party/

Patch Set: Created 6 years ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

« no previous file with comments | « observatory_pub_packages/polymer_expressions/src/globals.dart ('k') | observatory_pub_packages/polymer_expressions/visitor.dart » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

OLD	NEW
(Empty)
	1 // Copyright (c) 2013, the Dart project authors. Please see the AUTHORS file

	2 // for details. All rights reserved. Use of this source code is governed by a

	3 // BSD-style license that can be found in the LICENSE file.

	4

	5 library polymer_expressions.tokenizer;

	6

	7 const int _TAB = 9;

	8 const int _LF = 10;

	9 const int _VTAB = 11;

	10 const int _FF = 12;

	11 const int _CR = 13;

	12 const int _SPACE = 32;

	13 const int _BANG = 33;

	14 const int _DQ = 34;

	15 const int _$ = 36;

	16 const int _PERCENT = 37;

	17 const int _AMPERSAND = 38;

	18 const int _SQ = 39;

	19 const int _OPEN_PAREN = 40;

	20 const int _CLOSE_PAREN = 41;

	21 const int _STAR = 42;

	22 const int _PLUS = 43;

	23 const int _COMMA = 44;

	24 const int _MINUS = 45;

	25 const int _PERIOD = 46;

	26 const int _SLASH = 47;

	27 const int _0 = 48;

	28 const int _9 = 57;

	29 const int _COLON = 58;

	30 const int _LT = 60;

	31 const int _EQ = 61;

	32 const int _GT = 62;

	33 const int _QUESTION = 63;

	34 const int _A = 65;

	35 const int _Z = 90;

	36 const int _OPEN_SQUARE_BRACKET = 91;

	37 const int _BACKSLASH = 92;

	38 const int _CLOSE_SQUARE_BRACKET = 93;

	39 const int _CARET = 94;

	40 const int _US = 95;

	41 const int _a = 97;

	42 const int _f = 102;

	43 const int _n = 110;

	44 const int _r = 114;

	45 const int _t = 116;

	46 const int _v = 118;

	47 const int _z = 122;

	48 const int _OPEN_CURLY_BRACKET = 123;

	49 const int _BAR = 124;

	50 const int _CLOSE_CURLY_BRACKET = 125;

	51 const int _NBSP = 160;

	52

	53 const _OPERATORS = const [_PLUS, _MINUS, _STAR, _SLASH, _BANG, _AMPERSAND,

	54 _PERCENT, _LT, _EQ, _GT, _QUESTION, _CARET, _BAR];

	55

	56 const _GROUPERS = const [_OPEN_PAREN, _CLOSE_PAREN,

	57 _OPEN_SQUARE_BRACKET, _CLOSE_SQUARE_BRACKET,

	58 _OPEN_CURLY_BRACKET, _CLOSE_CURLY_BRACKET];

	59

	60 const _TWO_CHAR_OPS = const ['==', '!=', '<=', '>=', '\|\|', '&&'];

	61

	62 const KEYWORDS = const ['as', 'in', 'this'];

	63

	64 const _PRECEDENCE = const {

	65 '!': 0,

	66 ':': 0,

	67 ',': 0,

	68 ')': 0,

	69 ']': 0,

	70 '}': 0, // ?

	71 '?': 1,

	72 '\|\|': 2,

	73 '&&': 3,

	74 '\|': 4,

	75 '^': 5,

	76 '&': 6,

	77

	78 // equality

	79 '!=': 7,

	80 '==': 7,

	81 '!==': 7,

	82 '===': 7,

	83

	84 // relational

	85 '>=': 8,

	86 '>': 8,

	87 '<=': 8,

	88 '<': 8,

	89

	90 // additive

	91 '+': 9,

	92 '-': 9,

	93

	94 // multiplicative

	95 '%': 10,

	96 '/': 10,

	97 '*': 10,

	98

	99 // postfix

	100 '(': 11,

	101 '[': 11,

	102 '.': 11,

	103 '{': 11, //not sure this is correct

	104 };

	105

	106 const POSTFIX_PRECEDENCE = 11;

	107

	108 const int STRING_TOKEN = 1;

	109 const int IDENTIFIER_TOKEN = 2;

	110 const int DOT_TOKEN = 3;

	111 const int COMMA_TOKEN = 4;

	112 const int COLON_TOKEN = 5;

	113 const int INTEGER_TOKEN = 6;

	114 const int DECIMAL_TOKEN = 7;

	115 const int OPERATOR_TOKEN = 8;

	116 const int GROUPER_TOKEN = 9;

	117 const int KEYWORD_TOKEN = 10;

	118

	119 bool isWhitespace(int next) => next == _SPACE \|\| next == _TAB \|\| next == _NBSP;

	120

	121 bool isIdentifierOrKeywordStart(int next) => (_a <= next && next <= _z) \|\|

	122 (_A <= next && next <= _Z) \|\| next == _US \|\| next == _$ \|\| next > 127;

	123

	124 bool isIdentifier(int next) => (_a <= next && next <= _z) \|\|

	125 (_A <= next && next <= _Z) \|\| (_0 <= next && next <= _9) \|\|

	126 next == _US \|\| next == _$ \|\| next > 127;

	127

	128 bool isQuote(int next) => next == _DQ \|\| next == _SQ;

	129

	130 bool isNumber(int next) => _0 <= next && next <= _9;

	131

	132 bool isOperator(int next) => _OPERATORS.contains(next);

	133

	134 bool isGrouper(int next) => _GROUPERS.contains(next);

	135

	136 int escape(int c) {

	137 switch (c) {

	138 case _f: return _FF;

	139 case _n: return _LF;

	140 case _r: return _CR;

	141 case _t: return _TAB;

	142 case _v: return _VTAB;

	143 default: return c;

	144 }

	145 }

	146

	147 class Token {

	148 final int kind;

	149 final String value;

	150 final int precedence;

	151

	152 Token(this.kind, this.value, [this.precedence = 0]);

	153

	154 String toString() => "($kind, '$value')";

	155 }

	156

	157 class Tokenizer {

	158 final List<Token> _tokens = <Token>[];

	159 final StringBuffer _sb = new StringBuffer();

	160 final RuneIterator _iterator;

	161

	162 int _next;

	163

	164 Tokenizer(String input) : _iterator = new RuneIterator(input);

	165

	166 _advance() {

	167 _next = _iterator.moveNext() ? _iterator.current : null;

	168 }

	169

	170 List<Token> tokenize() {

	171 _advance();

	172 while(_next != null) {

	173 if (isWhitespace(_next)) {

	174 _advance();

	175 } else if (isQuote(_next)) {

	176 tokenizeString();

	177 } else if (isIdentifierOrKeywordStart(_next)) {

	178 tokenizeIdentifierOrKeyword();

	179 } else if (isNumber(_next)) {

	180 tokenizeNumber();

	181 } else if (_next == _PERIOD) {

	182 tokenizeDot();

	183 } else if (_next == _COMMA) {

	184 tokenizeComma();

	185 } else if (_next == _COLON) {

	186 tokenizeColon();

	187 } else if (isOperator(_next)) {

	188 tokenizeOperator();

	189 } else if (isGrouper(_next)) {

	190 tokenizeGrouper();

	191 } else {

	192 _advance();

	193 }

	194 }

	195 return _tokens;

	196 }

	197

	198 tokenizeString() {

	199 int quoteChar = _next;

	200 _advance();

	201 while (_next != quoteChar) {

	202 if (_next == null) throw new ParseException("unterminated string");

	203 if (_next == _BACKSLASH) {

	204 _advance();

	205 if (_next == null) throw new ParseException("unterminated string");

	206 _sb.writeCharCode(escape(_next));

	207 } else {

	208 _sb.writeCharCode(_next);

	209 }

	210 _advance();

	211 }

	212 _tokens.add(new Token(STRING_TOKEN, _sb.toString()));

	213 _sb.clear();

	214 _advance();

	215 }

	216

	217 tokenizeIdentifierOrKeyword() {

	218 while (_next != null && isIdentifier(_next)) {

	219 _sb.writeCharCode(_next);

	220 _advance();

	221 }

	222 var value = _sb.toString();

	223 if (KEYWORDS.contains(value)) {

	224 _tokens.add(new Token(KEYWORD_TOKEN, value));

	225 } else {

	226 _tokens.add(new Token(IDENTIFIER_TOKEN, value));

	227 }

	228 _sb.clear();

	229 }

	230

	231 tokenizeNumber() {

	232 while (_next != null && isNumber(_next)) {

	233 _sb.writeCharCode(_next);

	234 _advance();

	235 }

	236 if (_next == _PERIOD) {

	237 tokenizeDot();

	238 } else {

	239 _tokens.add(new Token(INTEGER_TOKEN, _sb.toString()));

	240 _sb.clear();

	241 }

	242 }

	243

	244 tokenizeDot() {

	245 _advance();

	246 if (isNumber(_next)) {

	247 tokenizeFraction();

	248 } else {

	249 _tokens.add(new Token(DOT_TOKEN, '.', POSTFIX_PRECEDENCE));

	250 }

	251 }

	252

	253 tokenizeComma() {

	254 _advance();

	255 _tokens.add(new Token(COMMA_TOKEN, ','));

	256 }

	257

	258 tokenizeColon() {

	259 _advance();

	260 _tokens.add(new Token(COLON_TOKEN, ':'));

	261 }

	262

	263 tokenizeFraction() {

	264 _sb.writeCharCode(_PERIOD);

	265 while (_next != null && isNumber(_next)) {

	266 _sb.writeCharCode(_next);

	267 _advance();

	268 }

	269 _tokens.add(new Token(DECIMAL_TOKEN, _sb.toString()));

	270 _sb.clear();

	271 }

	272

	273 tokenizeOperator() {

	274 int startChar = _next;

	275 _advance();

	276 var op;

	277 // check for 2 character operators

	278 if (isOperator(_next)) {

	279 var op2 = new String.fromCharCodes([startChar, _next]);

	280 if (_TWO_CHAR_OPS.contains(op2)) {

	281 op = op2;

	282 _advance();

	283 // kind of hacky check for === and !===, could be better / more general

	284 if (_next == _EQ && (startChar == _BANG \|\| startChar == _EQ)) {

	285 op = op2 + '=';

	286 _advance();

	287 }

	288 } else {

	289 op = new String.fromCharCode(startChar);

	290 }

	291 } else {

	292 op = new String.fromCharCode(startChar);

	293 }

	294 _tokens.add(new Token(OPERATOR_TOKEN, op, _PRECEDENCE[op]));

	295 }

	296

	297 tokenizeGrouper() {

	298 var value = new String.fromCharCode(_next);

	299 _tokens.add(new Token(GROUPER_TOKEN, value, _PRECEDENCE[value]));

	300 _advance();

	301 }

	302 }

	303

	304 class ParseException implements Exception {

	305 final String message;

	306 ParseException(this.message);

	307 String toString() => "ParseException: $message";

	308 }

OLD	NEW