tools/lexer_generator/rule_parser.py - Issue 62103017: Experimental parser: rule grammar refactor

Side by Side Diff: tools/lexer_generator/rule_parser.py

Issue 62103017: Experimental parser: rule grammar refactor (Closed) Base URL: https://v8.googlecode.com/svn/branches/experimental/parser

Patch Set: Created 7 years, 1 month ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 # Copyright 2013 the V8 project authors. All rights reserved.	1 # Copyright 2013 the V8 project authors. All rights reserved.

2 # Redistribution and use in source and binary forms, with or without	2 # Redistribution and use in source and binary forms, with or without

3 # modification, are permitted provided that the following conditions are	3 # modification, are permitted provided that the following conditions are

4 # met:	4 # met:

5 #	5 #

6 # * Redistributions of source code must retain the above copyright	6 # * Redistributions of source code must retain the above copyright

7 # notice, this list of conditions and the following disclaimer.	7 # notice, this list of conditions and the following disclaimer.

8 # * Redistributions in binary form must reproduce the above	8 # * Redistributions in binary form must reproduce the above

9 # copyright notice, this list of conditions and the following	9 # copyright notice, this list of conditions and the following

10 # disclaimer in the documentation and/or other materials provided	10 # disclaimer in the documentation and/or other materials provided

(...skipping 33 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
44 self.rules = {}	44 self.rules = {}

45 self.transitions = set()	45 self.transitions = set()

46	46

47 def parse(self, string):	47 def parse(self, string):

48 return RuleParser.parse(string, self)	48 return RuleParser.parse(string, self)

49	49

50 class RuleParser:	50 class RuleParser:

51	51

52 tokens = RuleLexer.tokens	52 tokens = RuleLexer.tokens

53 __rule_precedence_counter = 0	53 __rule_precedence_counter = 0

54 __keyword_transitions = set([	54 __keyword_transitions = set(['continue'])

55 'continue', 'break', 'terminate', 'terminate_illegal', 'skip'])

56	55

57 def __init__(self):	56 def __init__(self):

58 self.__state = None	57 self.__state = None

59	58

60 def p_statements(self, p):	59 def p_statements(self, p):

61 'statements : aliases rules'	60 'statements : aliases rules'

62	61

63 def p_aliases(self, p):	62 def p_aliases(self, p):

64 '''aliases : alias_rule aliases	63 '''aliases : alias_rule aliases

65 \| empty'''	64 \| empty'''

66	65

67 def p_alias_rule(self, p):	66 def p_alias_rule(self, p):

68 'alias_rule : IDENTIFIER EQUALS composite_regex SEMICOLON'	67 'alias_rule : IDENTIFIER EQUALS composite_regex SEMICOLON'

69 state = self.__state	68 state = self.__state

70 assert not p[1] in state.aliases	69 assert not p[1] in state.aliases

71 graph = p[3]	70 graph = p[3]

72 state.aliases[p[1]] = graph	71 state.aliases[p[1]] = graph

73 if graph[0] == 'CLASS' or graph[0] == 'NOT_CLASS':	72 if graph[0] == 'CLASS' or graph[0] == 'NOT_CLASS':

74 classes = state.character_classes	73 classes = state.character_classes

75 assert not p[1] in classes	74 assert not p[1] in classes

76 classes[p[1]] = TransitionKey.character_class(graph, classes)	75 classes[p[1]] = TransitionKey.character_class(graph, classes)

77	76

78 def p_rules(self, p):	77 def p_rules(self, p):

79 '''rules : state_change transition_rules rules	78 '''rules : state_change transition_rules rules

80 \| empty'''	79 \| empty'''

81	80

82 def p_state_change(self, p):	81 def p_state_change(self, p):

83 '''state_change : LESS_THAN IDENTIFIER GREATER_THAN	82 'state_change : GRAPH_OPEN IDENTIFIER GRAPH_CLOSE'

84 \| LESS_THAN DEFAULT GREATER_THAN'''

85 state = self.__state	83 state = self.__state

86 state.current_state = p[2]	84 state.current_state = p[2]

87 assert state.current_state	85 assert state.current_state

88 if not state.current_state in state.rules:	86 if not state.current_state in state.rules:

89 state.rules[state.current_state] = {	87 state.rules[state.current_state] = {

90 'default_action': None,	88 'default_action': None,

91 'catch_all' : None,	89 'catch_all' : None,

92 'regex' : []	90 'regex' : []

93 }	91 }

94 p[0] = state.current_state	92 p[0] = state.current_state

95	93

96 def p_transition_rules(self, p):	94 def p_transition_rules(self, p):

97 '''transition_rules : transition_rule transition_rules	95 '''transition_rules : transition_rule transition_rules

98 \| empty'''	96 \| empty'''

99	97

100 def p_transition_rule(self, p):	98 def p_transition_rule(self, p):

101 '''transition_rule : composite_regex code_or_token action	99 '''transition_rule : composite_regex action

102 \| composite_regex empty action	100 \| DEFAULT_ACTION default_action

103 \| composite_regex code_or_token empty	101 \| CATCH_ALL action'''

104 \| DEFAULT_ACTION code_or_token empty	102 precedence = RuleParser.__rule_precedence_counter

105 \| CATCH_ALL empty action'''	103 RuleParser.__rule_precedence_counter += 1

106 transition = p[3]	104 action = p[2]

	105 (entry_action, match_action, transition) = action

107 if transition and not transition in self.__keyword_transitions:	106 if transition and not transition in self.__keyword_transitions:

108 assert not transition == 'default'	107 assert not transition == 'default', "can't append default graph"

109 self.__state.transitions.add(transition)	108 self.__state.transitions.add(transition)

110 RuleParser.__rule_precedence_counter += 1

111 rules = self.__state.rules[self.__state.current_state]	109 rules = self.__state.rules[self.__state.current_state]

112 code = p[2]

113 if p[1] == 'default_action':	110 if p[1] == 'default_action':

114 assert self.__state.current_state == 'default'	111 assert self.__state.current_state == 'default'

115 assert not rules['default_action']	112 assert not rules['default_action']

116 rules['default_action'] = code	113 rules['default_action'] = action

117 elif p[1] == 'catch_all':	114 elif p[1] == 'catch_all':

118 assert not rules['catch_all']	115 assert not rules['catch_all']

119 rules['catch_all'] = transition	116 rules['catch_all'] = (precedence, action)

120 else:	117 else:

121 rule = (p[1], RuleParser.__rule_precedence_counter, code, transition)	118 regex = p[1]

122 rules['regex'].append(rule)	119 rules['regex'].append((regex, precedence, action))

123	120

124 def p_code_or_token(self, p):	121 def p_action(self, p):

125 '''code_or_token : code	122 '''action : ACTION_OPEN maybe_action_part OR maybe_action_part OR maybe_tran sition ACTION_CLOSE'''

126 \| push_token'''	123 p[0] = (p[2], p[4], p[6])

	124

	125 def p_default_action(self, p):

	126 'default_action : ACTION_OPEN action_part ACTION_CLOSE'

	127 p[0] = (None, p[2], None)

	128

	129 def p_maybe_action_part(self, p):

	130 '''maybe_action_part : action_part

	131 \| empty'''

127 p[0] = p[1]	132 p[0] = p[1]

128	133

129 def p_push_token(self, p):	134 def p_action_part(self, p):

130 'push_token : PUSH_TOKEN LEFT_PARENTHESIS IDENTIFIER RIGHT_PARENTHESIS'	135 '''action_part : code

131 p[0] = (p[1], p[3])	136 \| identifier_action'''

	137 p[0] = p[1]

132	138

133 def p_action(self, p):	139 def p_maybe_transition(self, p):

134 'action : ACTION_OPEN IDENTIFIER ACTION_CLOSE'	140 '''maybe_transition : IDENTIFIER

135 p[0] = p[2]	141 \| empty'''

	142 p[0] = p[1]

	143

	144 def p_identifier_action(self, p):

	145 '''identifier_action : IDENTIFIER

	146 \| IDENTIFIER LEFT_PARENTHESIS IDENTIFIER RIGHT_PARENTHE SIS'''

	147 assert p[1] != 'code'

	148 if len(p) == 2:

	149 p[0] = (p[1], None)

	150 elif len(p) == 5:

	151 p[0] = (p[1], p[2])

	152 else:

	153 raise Exception()

136	154

137 def p_composite_regex(self, p):	155 def p_composite_regex(self, p):

138 '''composite_regex : regex_parts OR regex_parts	156 '''composite_regex : regex_parts OR regex_parts

139 \| regex_parts'''	157 \| regex_parts'''

140 if len(p) == 2:	158 if len(p) == 2:

141 p[0] = p[1]	159 p[0] = p[1]

142 else:	160 else:

143 p[0] = NfaBuilder.or_graphs([p[1], p[3]])	161 p[0] = NfaBuilder.or_graphs([p[1], p[3]])

144	162

145 def p_regex_parts(self, p):	163 def p_regex_parts(self, p):

(...skipping 121 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
267 def minimal_dfa(self):	285 def minimal_dfa(self):

268 if not self.__minimial_dfa:	286 if not self.__minimial_dfa:

269 self.__minimial_dfa = self.dfa().minimize()	287 self.__minimial_dfa = self.dfa().minimize()

270 return self.__minimial_dfa	288 return self.__minimial_dfa

271	289

272 def __process_parser_state(self, parser_state):	290 def __process_parser_state(self, parser_state):

273 rule_map = {}	291 rule_map = {}

274 builder = NfaBuilder()	292 builder = NfaBuilder()

275 builder.set_character_classes(parser_state.character_classes)	293 builder.set_character_classes(parser_state.character_classes)

276 assert 'default' in parser_state.rules	294 assert 'default' in parser_state.rules

277 def process(k, v):	295 def process(subgraph, v):

278 graphs = []	296 graphs = []

279 continues = 0	297 continues = 0

280 for (graph, precedence, code, transition) in v['regex']:	298 for graph, precedence, action in v['regex']:

281 default_code = v['default_action']	299 (entry_action, match_action, transition) = action

282 if code or default_code:	300 if entry_action or match_action:

283 (code_type, code_value) = code if code else default_code	301 action = Action(entry_action, match_action, precedence)

284 action = Action(code_type, code_value, precedence)

285 graph = NfaBuilder.add_action(graph, action)	302 graph = NfaBuilder.add_action(graph, action)

286 if not transition or transition == 'break':	303 if not transition:

287 pass	304 pass

288 elif transition == 'continue':	305 elif transition == 'continue':

289 assert not k == 'default'	306 assert not subgraph == 'default'

290 continues += 1	307 continues += 1

291 graph = NfaBuilder.add_continue(graph)	308 graph = NfaBuilder.add_continue(graph)

292 elif (transition == 'terminate' or

293 transition == 'terminate_illegal' or

294 transition == 'skip'):

295 assert not code

296 graph = NfaBuilder.add_action(graph, Action(transition, None, -1))

297 else:	309 else:

298 assert k == 'default'	310 assert subgraph == 'default'

299 subgraph_modifier = '*' if code else None	311 subgraph_modifier = None

300 graph = NfaBuilder.join_subgraph(	312 graph = NfaBuilder.join_subgraph(

301 graph, transition, rule_map[transition], subgraph_modifier)	313 graph, transition, rule_map[transition], subgraph_modifier)

302 graphs.append(graph)	314 graphs.append(graph)

303 if continues == len(graphs):	315 if continues == len(graphs):

304 graphs.append(NfaBuilder.epsilon())	316 graphs.append(NfaBuilder.epsilon())

305 if v['catch_all']:	317 if v['catch_all']:

306 assert v['catch_all'] == 'continue'	318 (precedence, catch_all) = v['catch_all']

	319 assert catch_all == (None, None, 'continue'), "unimplemented"

307 graphs.append(NfaBuilder.add_continue(NfaBuilder.catch_all()))	320 graphs.append(NfaBuilder.add_continue(NfaBuilder.catch_all()))

308 graph = NfaBuilder.or_graphs(graphs)	321 graph = NfaBuilder.or_graphs(graphs)

309 rule_map[k] = graph	322 rule_map[k] = graph

310 # process first the subgraphs, then the default graph	323 # process first the subgraphs, then the default graph

311 for k, v in parser_state.rules.items():	324 for k, v in parser_state.rules.items():

312 if k == 'default': continue	325 if k == 'default': continue

313 process(k, v)	326 process(k, v)

314 process('default', parser_state.rules['default'])	327 process('default', parser_state.rules['default'])

315 # build the automata	328 # build the automata

316 for rule_name, graph in rule_map.items():	329 for rule_name, graph in rule_map.items():

317 self.__automata[rule_name] = RuleProcessor.Automata(builder, graph)	330 self.__automata[rule_name] = RuleProcessor.Automata(builder, graph)

318	331 # process default_action

319 default_action = parser_state.rules['default']['default_action']	332 default_action = parser_state.rules['default']['default_action']

320 self.default_action = Action(default_action[0], default_action[1]) if defaul t_action else None	333 self.default_action = Action(None, default_action[1]) if default_action else None

OLD	NEW

« no previous file with comments | « tools/lexer_generator/rule_lexer.py ('k') | tools/lexer_generator/rule_parser_test.py » ('j') | no next file with comments »