tools/lexer_generator/rule_parser.py - Issue 137883006: Experimental parser: use Terms instead of tuples

Side by Side Diff: tools/lexer_generator/rule_parser.py

Issue 137883006: Experimental parser: use Terms instead of tuples (Closed) Base URL: https://v8.googlecode.com/svn/branches/experimental/parser

Patch Set: Created 6 years, 10 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 # Copyright 2013 the V8 project authors. All rights reserved.	1 # Copyright 2013 the V8 project authors. All rights reserved.

2 # Redistribution and use in source and binary forms, with or without	2 # Redistribution and use in source and binary forms, with or without

3 # modification, are permitted provided that the following conditions are	3 # modification, are permitted provided that the following conditions are

4 # met:	4 # met:

5 #	5 #

6 # * Redistributions of source code must retain the above copyright	6 # * Redistributions of source code must retain the above copyright

7 # notice, this list of conditions and the following disclaimer.	7 # notice, this list of conditions and the following disclaimer.

8 # * Redistributions in binary form must reproduce the above	8 # * Redistributions in binary form must reproduce the above

9 # copyright notice, this list of conditions and the following	9 # copyright notice, this list of conditions and the following

10 # disclaimer in the documentation and/or other materials provided	10 # disclaimer in the documentation and/or other materials provided

11 # with the distribution.	11 # with the distribution.

12 # * Neither the name of Google Inc. nor the names of its	12 # * Neither the name of Google Inc. nor the names of its

13 # contributors may be used to endorse or promote products derived	13 # contributors may be used to endorse or promote products derived

14 # from this software without specific prior written permission.	14 # from this software without specific prior written permission.

15 #	15 #

16 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS	16 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS

17 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT	17 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT

18 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR	18 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR

19 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT	19 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT

20 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,	20 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,

21 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT	21 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT

22 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,	22 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,

23 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY	23 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY

24 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT	24 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT

25 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE	25 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE

26 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.	26 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

27	27

28 import ply.yacc as yacc	28 import ply.yacc as yacc

29 from automaton import Term, Action	29 from action import Term, Action

30 from rule_lexer import RuleLexer	30 from rule_lexer import RuleLexer

31 from regex_parser import RegexParser	31 from regex_parser import RegexParser

32 from nfa_builder import NfaBuilder	32 from nfa_builder import NfaBuilder

33 from dfa import Dfa	33 from dfa import Dfa

34 from dfa_optimizer import DfaOptimizer	34 from dfa_optimizer import DfaOptimizer

35 from transition_keys import TransitionKey, KeyEncoding	35 from transition_keys import TransitionKey, KeyEncoding

36	36

37 class RuleParserState:	37 class RuleParserState:

38	38

39 def __init__(self, encoding):	39 def __init__(self, encoding):

(...skipping 20 matching lines...) Expand all Loading...
60 'statements : aliases rules'	60 'statements : aliases rules'

61	61

62 def p_aliases(self, p):	62 def p_aliases(self, p):

63 '''aliases : alias_rule aliases	63 '''aliases : alias_rule aliases

64 \| empty'''	64 \| empty'''

65	65

66 def p_alias_rule(self, p):	66 def p_alias_rule(self, p):

67 'alias_rule : IDENTIFIER EQUALS composite_regex SEMICOLON'	67 'alias_rule : IDENTIFIER EQUALS composite_regex SEMICOLON'

68 state = self.__state	68 state = self.__state

69 assert not p[1] in state.aliases	69 assert not p[1] in state.aliases

70 graph = p[3]	70 term = p[3]

71 state.aliases[p[1]] = graph	71 state.aliases[p[1]] = term

72 if graph[0] == 'CLASS' or graph[0] == 'NOT_CLASS':	72 if term.name() == 'CLASS' or term.name() == 'NOT_CLASS':

73 classes = state.character_classes	73 classes = state.character_classes

74 assert not p[1] in classes	74 assert not p[1] in classes, "cannot reassign alias"

75 encoding = state.encoding	75 encoding = state.encoding

76 classes[p[1]] = TransitionKey.character_class(encoding, graph, classes)	76 classes[p[1]] = TransitionKey.character_class(encoding, term, classes)

77	77

78 def p_rules(self, p):	78 def p_rules(self, p):

79 '''rules : state_change transition_rules rules	79 '''rules : state_change transition_rules rules

80 \| empty'''	80 \| empty'''

81	81

82 def p_state_change(self, p):	82 def p_state_change(self, p):

83 'state_change : GRAPH_OPEN IDENTIFIER GRAPH_CLOSE'	83 'state_change : GRAPH_OPEN IDENTIFIER GRAPH_CLOSE'

84 state = self.__state	84 state = self.__state

85 state.current_state = p[2]	85 state.current_state = p[2]

86 assert state.current_state	86 assert state.current_state

(...skipping 77 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
164 p[0] = tuple(([p[1]] + list(p[3])))	164 p[0] = tuple(([p[1]] + list(p[3])))

165 else:	165 else:

166 raise Exception()	166 raise Exception()

167	167

168 def p_composite_regex(self, p):	168 def p_composite_regex(self, p):

169 '''composite_regex : regex_parts OR regex_parts	169 '''composite_regex : regex_parts OR regex_parts

170 \| regex_parts'''	170 \| regex_parts'''

171 if len(p) == 2:	171 if len(p) == 2:

172 p[0] = p[1]	172 p[0] = p[1]

173 else:	173 else:

174 p[0] = NfaBuilder.or_graphs([p[1], p[3]])	174 p[0] = NfaBuilder.or_terms([p[1], p[3]])

175	175

176 def p_regex_parts(self, p):	176 def p_regex_parts(self, p):

177 '''regex_parts : regex_part	177 '''regex_parts : regex_part

178 \| regex_part regex_parts'''	178 \| regex_part regex_parts'''

179 p[0] = NfaBuilder.cat_graphs(p[1:])	179 p[0] = NfaBuilder.cat_terms(p[1:])

180	180

181 def p_regex_part(self, p):	181 def p_regex_part(self, p):

182 '''regex_part : LEFT_PARENTHESIS composite_regex RIGHT_PARENTHESIS modifier	182 '''regex_part : LEFT_PARENTHESIS composite_regex RIGHT_PARENTHESIS modifier

183 \| regex_string_literal modifier	183 \| regex_string_literal modifier

184 \| regex_class modifier	184 \| regex_class modifier

185 \| regex modifier	185 \| regex modifier

186 \| regex_alias modifier'''	186 \| regex_alias modifier'''

187 modifier = p[len(p)-1]	187 modifier = p[len(p)-1]

188 graph = p[2] if len(p) == 5 else p[1]	188 term = p[2] if len(p) == 5 else p[1]

189 if modifier:	189 if modifier:

190 p[0] = NfaBuilder.apply_modifier(modifier, graph)	190 p[0] = NfaBuilder.apply_modifier(modifier, term)

191 else:	191 else:

192 p[0] = graph	192 p[0] = term

193	193

194 def p_regex_string_literal(self, p):	194 def p_regex_string_literal(self, p):

195 'regex_string_literal : STRING'	195 'regex_string_literal : STRING'

196 string = p[1][1:-1]	196 string = p[1][1:-1]

197 escape_char = lambda string, char: string.replace(char, "\\" + char)	197 escape_char = lambda string, char: string.replace(char, "\\" + char)

198 string = reduce(escape_char, "+?*\|.[](){}", string).replace("\\\"", "\"")	198 string = reduce(escape_char, "+?*\|.[](){}", string).replace("\\\"", "\"")

199 p[0] = RegexParser.parse(string)	199 p[0] = RegexParser.parse(string)

200

201 def p_regex(self, p):	200 def p_regex(self, p):

202 'regex : REGEX'	201 'regex : REGEX'

203 string = p[1][1:-1].replace("\\/", "/")	202 string = p[1][1:-1].replace("\\/", "/")

204 p[0] = RegexParser.parse(string)	203 p[0] = RegexParser.parse(string)

205	204

206 def p_regex_class(self, p):	205 def p_regex_class(self, p):

207 'regex_class : CHARACTER_CLASS_REGEX'	206 'regex_class : CHARACTER_CLASS_REGEX'

208 p[0] = RegexParser.parse(p[1])	207 p[0] = RegexParser.parse(p[1])

209	208

210 def p_regex_alias(self, p):	209 def p_regex_alias(self, p):

(...skipping 33 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
244 RuleParser.__static_instance = None	243 RuleParser.__static_instance = None

245 raise	244 raise

246 parser.__state = None	245 parser.__state = None

247 assert parser_state.transitions <= set(parser_state.rules.keys())	246 assert parser_state.transitions <= set(parser_state.rules.keys())

248	247

249 class RuleProcessor(object):	248 class RuleProcessor(object):

250	249

251 def __init__(self, parser_state):	250 def __init__(self, parser_state):

252 self.__automata = {}	251 self.__automata = {}

253 self.__rule_trees = {}	252 self.__rule_trees = {}

	253 self.__default_action = None

254 self.__process_parser_state(parser_state)	254 self.__process_parser_state(parser_state)

255	255

256 @staticmethod	256 @staticmethod

257 def parse(string, encoding_name):	257 def parse(string, encoding_name):

258 parser_state = RuleParserState(KeyEncoding.get(encoding_name))	258 parser_state = RuleParserState(KeyEncoding.get(encoding_name))

259 RuleParser.parse(string, parser_state)	259 RuleParser.parse(string, parser_state)

260 return RuleProcessor(parser_state)	260 return RuleProcessor(parser_state)

261	261

262 def automata_iter(self):	262 def automata_iter(self):

263 return iter(self.__automata.items())	263 return iter(self.__automata.items())

264	264

265 def default_automata(self):	265 def default_automata(self):

266 return self.__automata['default']	266 return self.__automata['default']

267	267

268 def rule_tree_dots(self):	268 def default_action(self):

269 result = {}	269 return self.__default_action

270 for rule in self.__rule_trees:

271 result[rule] = NfaBuilder.rule_tree_dot(self.__rule_trees[rule])

272 return result

273	270

274 class Automata(object):	271 class Automata(object):

275	272

276 def __init__(self, builder, graph):	273 def __init__(self, encoding, character_classes, rule_term):

277 self.__builder = builder	274 self.__encoding = encoding

278 self.__graph = graph	275 self.__character_classes = character_classes

	276 self.__rule_term = rule_term

279 self.__nfa = None	277 self.__nfa = None

280 self.__dfa = None	278 self.__dfa = None

281 self.__minimial_dfa = None	279 self.__minimial_dfa = None

282	280

	281 def rule_term(self):

	282 return self.__rule_term

	283

283 def nfa(self):	284 def nfa(self):

284 if not self.__nfa:	285 if not self.__nfa:

285 self.__nfa = self.__builder.nfa(self.__graph)	286 self.__nfa = NfaBuilder.nfa(

	287 self.__encoding, self.__character_classes, self.__rule_term)

286 return self.__nfa	288 return self.__nfa

287	289

288 def dfa(self):	290 def dfa(self):

289 if not self.__dfa:	291 if not self.__dfa:

290 (start, dfa_nodes) = self.nfa().compute_dfa()	292 (start, dfa_nodes) = self.nfa().compute_dfa()

291 self.__dfa = Dfa(self.nfa().encoding(), start, dfa_nodes)	293 self.__dfa = Dfa(self.nfa().encoding(), start, dfa_nodes)

292 return self.__dfa	294 return self.__dfa

293	295

294 def optimize_dfa(self, log = False):	296 def optimize_dfa(self, log = False):

295 assert not self.__dfa	297 assert not self.__dfa

296 self.__dfa = DfaOptimizer.optimize(self.dfa(), log)	298 self.__dfa = DfaOptimizer.optimize(self.dfa(), log)

297	299

298 def minimal_dfa(self):	300 def minimal_dfa(self):

299 if not self.__minimial_dfa:	301 if not self.__minimial_dfa:

300 self.__minimial_dfa = self.dfa().minimize()	302 self.__minimial_dfa = self.dfa().minimize()

301 return self.__minimial_dfa	303 return self.__minimial_dfa

302	304

303 def __process_parser_state(self, parser_state):	305 def __process_parser_state(self, parser_state):

304 rule_map = {}	306 rule_map = {}

305 builder = NfaBuilder(parser_state.encoding, parser_state.character_classes)

306 assert 'default' in parser_state.rules	307 assert 'default' in parser_state.rules

307 def process(subgraph, v):	308 def process(subgraph, v):

308 graphs = []	309 graphs = []

309 for graph, precedence, action in v['regex']:	310 for graph, precedence, action in v['regex']:

310 (entry_action, match_action, transition) = action	311 (entry_action, match_action, transition) = action

311 if entry_action or match_action:	312 if entry_action or match_action:

312 action = Action(entry_action, match_action, precedence)	313 graph = NfaBuilder.add_action(

313 graph = NfaBuilder.add_action(graph, action)	314 graph, Action(entry_action, match_action, precedence))

314 if not transition:	315 if not transition:

315 pass	316 pass

316 elif transition == 'continue':	317 elif transition == 'continue':

317 assert not subgraph == 'default', 'unimplemented'	318 assert not subgraph == 'default', 'unimplemented'

318 graph = NfaBuilder.add_continue(graph)	319 graph = NfaBuilder.add_continue(graph)

319 else:	320 else:

320 assert subgraph == 'default', 'unimplemented'	321 assert subgraph == 'default', 'unimplemented'

321 graph = NfaBuilder.join_subgraph(	322 graph = NfaBuilder.join_subgraph(

322 graph, transition, rule_map[transition])	323 graph, transition, rule_map[transition])

323 graphs.append(graph)	324 graphs.append(graph)

324 graph = NfaBuilder.or_graphs(graphs)	325 graph = NfaBuilder.or_terms(graphs)

325 rule_map[subgraph] = graph	326 rule_map[subgraph] = graph

326 # process first the subgraphs, then the default graph	327 # process first the subgraphs, then the default graph

327 for k, v in parser_state.rules.items():	328 for k, v in parser_state.rules.items():

328 if k == 'default': continue	329 if k == 'default': continue

329 process(k, v)	330 process(k, v)

330 process('default', parser_state.rules['default'])	331 process('default', parser_state.rules['default'])

331 # build the automata	332 # build the automata

332 for rule_name, graph in rule_map.items():	333 for rule_name, graph in rule_map.items():

333 self.__automata[rule_name] = RuleProcessor.Automata(builder, graph)	334 self.__automata[rule_name] = RuleProcessor.Automata(

	335 parser_state.encoding, parser_state.character_classes, graph)

334 self.__rule_trees[rule_name] = graph	336 self.__rule_trees[rule_name] = graph

335 # process default_action	337 # process default_action

336 default_action = parser_state.rules['default']['default_action']	338 default_action = parser_state.rules['default']['default_action']

337 self.default_action = Action(Term.empty_term(), default_action)	339 self.__default_action = Action(Term.empty_term(), default_action)

OLD	NEW

« no previous file with comments | « tools/lexer_generator/regex_parser.py ('k') | tools/lexer_generator/term_test.py » ('j') | no next file with comments »