src/lexer/lexer_py.re - Issue 75143002: Experimental parser: make string rules look more like ecma spec

Side by Side Diff: src/lexer/lexer_py.re

Issue 75143002: Experimental parser: make string rules look more like ecma spec (Closed) Base URL: https://v8.googlecode.com/svn/branches/experimental/parser

Patch Set: Created 7 years, 1 month ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 # Copyright 2013 the V8 project authors. All rights reserved.	1 # Copyright 2013 the V8 project authors. All rights reserved.

2 # Redistribution and use in source and binary forms, with or without	2 # Redistribution and use in source and binary forms, with or without

3 # modification, are permitted provided that the following conditions are	3 # modification, are permitted provided that the following conditions are

4 # met:	4 # met:

5 #	5 #

6 # * Redistributions of source code must retain the above copyright	6 # * Redistributions of source code must retain the above copyright

7 # notice, this list of conditions and the following disclaimer.	7 # notice, this list of conditions and the following disclaimer.

8 # * Redistributions in binary form must reproduce the above	8 # * Redistributions in binary form must reproduce the above

9 # copyright notice, this list of conditions and the following	9 # copyright notice, this list of conditions and the following

10 # disclaimer in the documentation and/or other materials provided	10 # disclaimer in the documentation and/or other materials provided

(...skipping 14 matching lines...) Expand all Loading...
25 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE	25 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE

26 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.	26 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

27	27

28 whitespace_char = [ \t\v\f\r:ws:\240];	28 whitespace_char = [ \t\v\f\r:ws:\240];

29 whitespace = whitespace_char+;	29 whitespace = whitespace_char+;

30 identifier_start = [$_a-zA-Z:lit:];	30 identifier_start = [$_a-zA-Z:lit:];

31 identifier_char = [0-9:identifier_start:];	31 identifier_char = [0-9:identifier_start:];

32 line_terminator = [\n\r];	32 line_terminator = [\n\r];

33 digit = [0-9];	33 digit = [0-9];

34 hex_digit = [0-9a-fA-F];	34 hex_digit = [0-9a-fA-F];

	35 single_escape_char = ['"\\bfnrtva];

35 maybe_exponent = /([eE][\-+]?[:digit:]+)?/;	36 maybe_exponent = /([eE][\-+]?[:digit:]+)?/;

36 number =	37 number =

37 /0[xX][:hex_digit:]+/ \| (	38 /0[xX][:hex_digit:]+/ \| (

38 /\.[:digit:]+/ maybe_exponent \|	39 /\.[:digit:]+/ maybe_exponent \|

39 /[:digit:]+(\.[:digit:]*)?/ maybe_exponent );	40 /[:digit:]+(\.[:digit:]*)?/ maybe_exponent );

	41 # TODO this is incomplete/incorrect

	42 line_terminator_sequence = (/\n\r?/)\|(/\r\n?/);

40	43

41 # grammar is	44 # grammar is

42 # regex <action_on_state_entry\|action_on_match\|transition>	45 # regex <action_on_state_entry\|action_on_match\|transition>

43 #	46 #

44 # actions can be c code enclosed in {} or identifiers to be passed to codegen	47 # actions can be c code enclosed in {} or identifiers to be passed to codegen

45 # transition must be in continue or the name of a subgraph	48 # transition must be in continue or the name of a subgraph

46	49

47 <<default>>	50 <<default>>

48 "\|=" <\|push_token(ASSIGN_BIT_OR)\|>	51 "\|=" <\|push_token(ASSIGN_BIT_OR)\|>

49 "^=" <\|push_token(ASSIGN_BIT_XOR)\|>	52 "^=" <\|push_token(ASSIGN_BIT_XOR)\|>

(...skipping 47 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
97 "<<=" <\|push_token(ASSIGN_SHL)\|>	100 "<<=" <\|push_token(ASSIGN_SHL)\|>

98 ">>=" <\|push_token(ASSIGN_SAR)\|>	101 ">>=" <\|push_token(ASSIGN_SAR)\|>

99 "<=" <\|push_token(LTE)\|>	102 "<=" <\|push_token(LTE)\|>

100 ">=" <\|push_token(GTE)\|>	103 ">=" <\|push_token(GTE)\|>

101 "<<" <\|push_token(SHL)\|>	104 "<<" <\|push_token(SHL)\|>

102 ">>" <\|push_token(SAR)\|>	105 ">>" <\|push_token(SAR)\|>

103 "<" <\|push_token(LT)\|>	106 "<" <\|push_token(LT)\|>

104 ">" <\|push_token(GT)\|>	107 ">" <\|push_token(GT)\|>

105	108

106 number <\|push_token(NUMBER)\|>	109 number <\|push_token(NUMBER)\|>

107 # is this necessary?

108 number identifier_char <\|push_token(ILLEGAL)\|>	110 number identifier_char <\|push_token(ILLEGAL)\|>

109	111

110 "(" <\|push_token(LPAREN)\|>	112 "(" <\|push_token(LPAREN)\|>

111 ")" <\|push_token(RPAREN)\|>	113 ")" <\|push_token(RPAREN)\|>

112 "[" <\|push_token(LBRACK)\|>	114 "[" <\|push_token(LBRACK)\|>

113 "]" <\|push_token(RBRACK)\|>	115 "]" <\|push_token(RBRACK)\|>

114 "{" <\|push_token(LBRACE)\|>	116 "{" <\|push_token(LBRACE)\|>

115 "}" <\|push_token(RBRACE)\|>	117 "}" <\|push_token(RBRACE)\|>

116 ":" <\|push_token(COLON)\|>	118 ":" <\|push_token(COLON)\|>

117 ";" <\|push_token(SEMICOLON)\|>	119 ";" <\|push_token(SEMICOLON)\|>

(...skipping 63 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
181 "true" <\|push_token(TRUE_LITERAL)\|>	183 "true" <\|push_token(TRUE_LITERAL)\|>

182 "try" <\|push_token(TRY)\|>	184 "try" <\|push_token(TRY)\|>

183 "typeof" <\|push_token(TYPEOF)\|>	185 "typeof" <\|push_token(TYPEOF)\|>

184 "var" <\|push_token(VAR)\|>	186 "var" <\|push_token(VAR)\|>

185 "void" <\|push_token(VOID)\|>	187 "void" <\|push_token(VOID)\|>

186 "while" <\|push_token(WHILE)\|>	188 "while" <\|push_token(WHILE)\|>

187 "with" <\|push_token(WITH)\|>	189 "with" <\|push_token(WITH)\|>

188 "yield" <\|push_token(YIELD)\|>	190 "yield" <\|push_token(YIELD)\|>

189	191

190 identifier_start <\|push_token(IDENTIFIER)\|Identifier>	192 identifier_start <\|push_token(IDENTIFIER)\|Identifier>

191 /\\u[0-9a-fA-F]{4}/ <{	193 /\\u[:hex_digit:]{4}/ <{

192 if (V8_UNLIKELY(!ValidIdentifierStart())) {	194 if (V8_UNLIKELY(!ValidIdentifierStart())) {

193 goto default_action;	195 goto default_action;

194 }	196 }

195 }\|push_token(IDENTIFIER)\|Identifier>	197 }\|push_token(IDENTIFIER)\|Identifier>

196	198

197 eof <\|terminate\|>	199 eof <\|terminate\|>

198 default_action <push_token(ILLEGAL)>	200 default_action <push_token(ILLEGAL)>

199	201

200 <<DoubleQuoteString>>	202 <<DoubleQuoteString>>

201 /\\\n\r?/ <\|\|continue>	203 "\\" line_terminator_sequence <\|\|continue>

202 /\\\r\n?/ <\|\|continue>	204 /\\[xX][:hex_digit:]{2}/ <\|\|continue>

203 /\\[xX][:hex_digit:]{2}/ <\|\|continue>	205 /\\[u][:hex_digit:]{4}/ <\|\|continue>

204 /\\[^xX\r\n]/ <\|\|continue>	206 /\\[^xXu\r\n]/ <\|\|continue>

205 /\n\|\r/ <\|push_token(ILLEGAL)\|>	207 "\\" <\|push_token(ILLEGAL)\|>

206 "\"" <\|push_token(STRING)\|>	208 /\n\|\r/ <\|push_token(ILLEGAL)\|>

207 eof <\|terminate_illegal\|>	209 "\"" <\|push_token(STRING)\|>

208 catch_all <\|\|continue>	210 eof <\|terminate_illegal\|>

	211 catch_all <\|\|continue>

209	212

210 <<SingleQuoteString>>	213 <<SingleQuoteString>>

211 /\\\n\r?/ <\|\|continue>	214 # TODO subgraph for '\'

212 /\\\r\n?/ <\|\|continue>	215 "\\" line_terminator_sequence <\|\|continue>

213 /\\[xX][:hex_digit:]{2}/ <\|\|continue>	216 /\\[xX][:hex_digit:]{2}/ <\|\|continue>

214 /\\[^xX\r\n]/ <\|\|continue>	217 /\\[u][:hex_digit:]{4}/ <\|\|continue>

215 /\n\|\r/ <\|push_token(ILLEGAL)\|>	218 /\\[^xXu\r\n]/ <\|\|continue>

216 "'" <\|push_token(STRING)\|>	219 "\\" <\|push_token(ILLEGAL)\|>

217 eof <\|terminate_illegal\|>	220 /\n\|\r/ <\|push_token(ILLEGAL)\|>

218 catch_all <\|\|continue>	221 "'" <\|push_token(STRING)\|>

	222 eof <\|terminate_illegal\|>

	223 catch_all <\|\|continue>

219	224

220 <<Identifier>>	225 <<Identifier>>

221 identifier_char <\|push_token(IDENTIFIER)\|continue>	226 identifier_char <\|push_token(IDENTIFIER)\|continue>

222 /\\u[0-9a-fA-F]{4}/ <{	227 /\\u[:hex_digit:]{4}/ <{

223 if (V8_UNLIKELY(!ValidIdentifierPart())) {	228 if (V8_UNLIKELY(!ValidIdentifierPart())) {

224 goto default_action;	229 goto default_action;

225 }	230 }

226 }\|push_token(IDENTIFIER)\|continue>	231 }\|push_token(IDENTIFIER)\|continue>

227	232

228 <<SingleLineComment>>	233 <<SingleLineComment>>

229 line_terminator <\|push_line_terminator\|>	234 line_terminator <\|push_line_terminator\|>

230 eof <\|skip_and_terminate\|>	235 eof <\|skip_and_terminate\|>

231 catch_all <\|\|continue>	236 catch_all <\|\|continue>

232	237

233 <<MultiLineComment>>	238 <<MultiLineComment>>

234 /\*+\// <\|skip\|>	239 /\*+\// <\|skip\|>

235 # TODO find a way to generate the below rule	240 # TODO find a way to generate the below rule

236 /\+[^\/]/ <\|\|continue>	241 /\+[^\/]/ <\|\|continue>

237 line_terminator <push_line_terminator\|\|continue>	242 line_terminator <push_line_terminator\|\|continue>

238 eof <\|skip_and_terminate\|>	243 eof <\|skip_and_terminate\|>

239 catch_all <\|\|continue>	244 catch_all <\|\|continue>

OLD	NEW

« no previous file with comments | « no previous file | tools/lexer_generator/regex_lexer.py » ('j') | no next file with comments »