| OLD | NEW |
| 1 # Copyright 2013 the V8 project authors. All rights reserved. | 1 # Copyright 2013 the V8 project authors. All rights reserved. |
| 2 # Redistribution and use in source and binary forms, with or without | 2 # Redistribution and use in source and binary forms, with or without |
| 3 # modification, are permitted provided that the following conditions are | 3 # modification, are permitted provided that the following conditions are |
| 4 # met: | 4 # met: |
| 5 # | 5 # |
| 6 # * Redistributions of source code must retain the above copyright | 6 # * Redistributions of source code must retain the above copyright |
| 7 # notice, this list of conditions and the following disclaimer. | 7 # notice, this list of conditions and the following disclaimer. |
| 8 # * Redistributions in binary form must reproduce the above | 8 # * Redistributions in binary form must reproduce the above |
| 9 # copyright notice, this list of conditions and the following | 9 # copyright notice, this list of conditions and the following |
| 10 # disclaimer in the documentation and/or other materials provided | 10 # disclaimer in the documentation and/or other materials provided |
| (...skipping 33 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 44 self.rules = {} | 44 self.rules = {} |
| 45 self.transitions = set() | 45 self.transitions = set() |
| 46 | 46 |
| 47 def parse(self, string): | 47 def parse(self, string): |
| 48 return RuleParser.parse(string, self) | 48 return RuleParser.parse(string, self) |
| 49 | 49 |
| 50 class RuleParser: | 50 class RuleParser: |
| 51 | 51 |
| 52 tokens = RuleLexer.tokens | 52 tokens = RuleLexer.tokens |
| 53 __rule_precedence_counter = 0 | 53 __rule_precedence_counter = 0 |
| 54 __keyword_transitions = set([ | 54 __keyword_transitions = set(['continue']) |
| 55 'continue', 'break', 'terminate', 'terminate_illegal', 'skip']) | |
| 56 | 55 |
| 57 def __init__(self): | 56 def __init__(self): |
| 58 self.__state = None | 57 self.__state = None |
| 59 | 58 |
| 60 def p_statements(self, p): | 59 def p_statements(self, p): |
| 61 'statements : aliases rules' | 60 'statements : aliases rules' |
| 62 | 61 |
| 63 def p_aliases(self, p): | 62 def p_aliases(self, p): |
| 64 '''aliases : alias_rule aliases | 63 '''aliases : alias_rule aliases |
| 65 | empty''' | 64 | empty''' |
| 66 | 65 |
| 67 def p_alias_rule(self, p): | 66 def p_alias_rule(self, p): |
| 68 'alias_rule : IDENTIFIER EQUALS composite_regex SEMICOLON' | 67 'alias_rule : IDENTIFIER EQUALS composite_regex SEMICOLON' |
| 69 state = self.__state | 68 state = self.__state |
| 70 assert not p[1] in state.aliases | 69 assert not p[1] in state.aliases |
| 71 graph = p[3] | 70 graph = p[3] |
| 72 state.aliases[p[1]] = graph | 71 state.aliases[p[1]] = graph |
| 73 if graph[0] == 'CLASS' or graph[0] == 'NOT_CLASS': | 72 if graph[0] == 'CLASS' or graph[0] == 'NOT_CLASS': |
| 74 classes = state.character_classes | 73 classes = state.character_classes |
| 75 assert not p[1] in classes | 74 assert not p[1] in classes |
| 76 classes[p[1]] = TransitionKey.character_class(graph, classes) | 75 classes[p[1]] = TransitionKey.character_class(graph, classes) |
| 77 | 76 |
| 78 def p_rules(self, p): | 77 def p_rules(self, p): |
| 79 '''rules : state_change transition_rules rules | 78 '''rules : state_change transition_rules rules |
| 80 | empty''' | 79 | empty''' |
| 81 | 80 |
| 82 def p_state_change(self, p): | 81 def p_state_change(self, p): |
| 83 '''state_change : LESS_THAN IDENTIFIER GREATER_THAN | 82 'state_change : GRAPH_OPEN IDENTIFIER GRAPH_CLOSE' |
| 84 | LESS_THAN DEFAULT GREATER_THAN''' | |
| 85 state = self.__state | 83 state = self.__state |
| 86 state.current_state = p[2] | 84 state.current_state = p[2] |
| 87 assert state.current_state | 85 assert state.current_state |
| 88 if not state.current_state in state.rules: | 86 if not state.current_state in state.rules: |
| 89 state.rules[state.current_state] = { | 87 state.rules[state.current_state] = { |
| 90 'default_action': None, | 88 'default_action': None, |
| 91 'catch_all' : None, | 89 'catch_all' : None, |
| 92 'regex' : [] | 90 'regex' : [] |
| 93 } | 91 } |
| 94 p[0] = state.current_state | 92 p[0] = state.current_state |
| 95 | 93 |
| 96 def p_transition_rules(self, p): | 94 def p_transition_rules(self, p): |
| 97 '''transition_rules : transition_rule transition_rules | 95 '''transition_rules : transition_rule transition_rules |
| 98 | empty''' | 96 | empty''' |
| 99 | 97 |
| 100 def p_transition_rule(self, p): | 98 def p_transition_rule(self, p): |
| 101 '''transition_rule : composite_regex code_or_token action | 99 '''transition_rule : composite_regex action |
| 102 | composite_regex empty action | 100 | DEFAULT_ACTION default_action |
| 103 | composite_regex code_or_token empty | 101 | CATCH_ALL action''' |
| 104 | DEFAULT_ACTION code_or_token empty | 102 precedence = RuleParser.__rule_precedence_counter |
| 105 | CATCH_ALL empty action''' | 103 RuleParser.__rule_precedence_counter += 1 |
| 106 transition = p[3] | 104 action = p[2] |
| 105 (entry_action, match_action, transition) = action |
| 107 if transition and not transition in self.__keyword_transitions: | 106 if transition and not transition in self.__keyword_transitions: |
| 108 assert not transition == 'default' | 107 assert not transition == 'default', "can't append default graph" |
| 109 self.__state.transitions.add(transition) | 108 self.__state.transitions.add(transition) |
| 110 RuleParser.__rule_precedence_counter += 1 | |
| 111 rules = self.__state.rules[self.__state.current_state] | 109 rules = self.__state.rules[self.__state.current_state] |
| 112 code = p[2] | |
| 113 if p[1] == 'default_action': | 110 if p[1] == 'default_action': |
| 114 assert self.__state.current_state == 'default' | 111 assert self.__state.current_state == 'default' |
| 115 assert not rules['default_action'] | 112 assert not rules['default_action'] |
| 116 rules['default_action'] = code | 113 rules['default_action'] = action |
| 117 elif p[1] == 'catch_all': | 114 elif p[1] == 'catch_all': |
| 118 assert not rules['catch_all'] | 115 assert not rules['catch_all'] |
| 119 rules['catch_all'] = transition | 116 rules['catch_all'] = (precedence, action) |
| 120 else: | 117 else: |
| 121 rule = (p[1], RuleParser.__rule_precedence_counter, code, transition) | 118 regex = p[1] |
| 122 rules['regex'].append(rule) | 119 rules['regex'].append((regex, precedence, action)) |
| 123 | 120 |
| 124 def p_code_or_token(self, p): | 121 def p_action(self, p): |
| 125 '''code_or_token : code | 122 '''action : ACTION_OPEN maybe_action_part OR maybe_action_part OR maybe_tran
sition ACTION_CLOSE''' |
| 126 | push_token''' | 123 p[0] = (p[2], p[4], p[6]) |
| 124 |
| 125 def p_default_action(self, p): |
| 126 'default_action : ACTION_OPEN action_part ACTION_CLOSE' |
| 127 p[0] = (None, p[2], None) |
| 128 |
| 129 def p_maybe_action_part(self, p): |
| 130 '''maybe_action_part : action_part |
| 131 | empty''' |
| 127 p[0] = p[1] | 132 p[0] = p[1] |
| 128 | 133 |
| 129 def p_push_token(self, p): | 134 def p_action_part(self, p): |
| 130 'push_token : PUSH_TOKEN LEFT_PARENTHESIS IDENTIFIER RIGHT_PARENTHESIS' | 135 '''action_part : code |
| 131 p[0] = (p[1], p[3]) | 136 | identifier_action''' |
| 137 p[0] = p[1] |
| 132 | 138 |
| 133 def p_action(self, p): | 139 def p_maybe_transition(self, p): |
| 134 'action : ACTION_OPEN IDENTIFIER ACTION_CLOSE' | 140 '''maybe_transition : IDENTIFIER |
| 135 p[0] = p[2] | 141 | empty''' |
| 142 p[0] = p[1] |
| 143 |
| 144 def p_identifier_action(self, p): |
| 145 '''identifier_action : IDENTIFIER |
| 146 | IDENTIFIER LEFT_PARENTHESIS IDENTIFIER RIGHT_PARENTHE
SIS''' |
| 147 assert p[1] != 'code' |
| 148 if len(p) == 2: |
| 149 p[0] = (p[1], None) |
| 150 elif len(p) == 5: |
| 151 p[0] = (p[1], p[2]) |
| 152 else: |
| 153 raise Exception() |
| 136 | 154 |
| 137 def p_composite_regex(self, p): | 155 def p_composite_regex(self, p): |
| 138 '''composite_regex : regex_parts OR regex_parts | 156 '''composite_regex : regex_parts OR regex_parts |
| 139 | regex_parts''' | 157 | regex_parts''' |
| 140 if len(p) == 2: | 158 if len(p) == 2: |
| 141 p[0] = p[1] | 159 p[0] = p[1] |
| 142 else: | 160 else: |
| 143 p[0] = NfaBuilder.or_graphs([p[1], p[3]]) | 161 p[0] = NfaBuilder.or_graphs([p[1], p[3]]) |
| 144 | 162 |
| 145 def p_regex_parts(self, p): | 163 def p_regex_parts(self, p): |
| (...skipping 121 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 267 def minimal_dfa(self): | 285 def minimal_dfa(self): |
| 268 if not self.__minimial_dfa: | 286 if not self.__minimial_dfa: |
| 269 self.__minimial_dfa = self.dfa().minimize() | 287 self.__minimial_dfa = self.dfa().minimize() |
| 270 return self.__minimial_dfa | 288 return self.__minimial_dfa |
| 271 | 289 |
| 272 def __process_parser_state(self, parser_state): | 290 def __process_parser_state(self, parser_state): |
| 273 rule_map = {} | 291 rule_map = {} |
| 274 builder = NfaBuilder() | 292 builder = NfaBuilder() |
| 275 builder.set_character_classes(parser_state.character_classes) | 293 builder.set_character_classes(parser_state.character_classes) |
| 276 assert 'default' in parser_state.rules | 294 assert 'default' in parser_state.rules |
| 277 def process(k, v): | 295 def process(subgraph, v): |
| 278 graphs = [] | 296 graphs = [] |
| 279 continues = 0 | 297 continues = 0 |
| 280 for (graph, precedence, code, transition) in v['regex']: | 298 for graph, precedence, action in v['regex']: |
| 281 default_code = v['default_action'] | 299 (entry_action, match_action, transition) = action |
| 282 if code or default_code: | 300 if entry_action or match_action: |
| 283 (code_type, code_value) = code if code else default_code | 301 action = Action(entry_action, match_action, precedence) |
| 284 action = Action(code_type, code_value, precedence) | |
| 285 graph = NfaBuilder.add_action(graph, action) | 302 graph = NfaBuilder.add_action(graph, action) |
| 286 if not transition or transition == 'break': | 303 if not transition: |
| 287 pass | 304 pass |
| 288 elif transition == 'continue': | 305 elif transition == 'continue': |
| 289 assert not k == 'default' | 306 assert not subgraph == 'default' |
| 290 continues += 1 | 307 continues += 1 |
| 291 graph = NfaBuilder.add_continue(graph) | 308 graph = NfaBuilder.add_continue(graph) |
| 292 elif (transition == 'terminate' or | |
| 293 transition == 'terminate_illegal' or | |
| 294 transition == 'skip'): | |
| 295 assert not code | |
| 296 graph = NfaBuilder.add_action(graph, Action(transition, None, -1)) | |
| 297 else: | 309 else: |
| 298 assert k == 'default' | 310 assert subgraph == 'default' |
| 299 subgraph_modifier = '*' if code else None | 311 subgraph_modifier = None |
| 300 graph = NfaBuilder.join_subgraph( | 312 graph = NfaBuilder.join_subgraph( |
| 301 graph, transition, rule_map[transition], subgraph_modifier) | 313 graph, transition, rule_map[transition], subgraph_modifier) |
| 302 graphs.append(graph) | 314 graphs.append(graph) |
| 303 if continues == len(graphs): | 315 if continues == len(graphs): |
| 304 graphs.append(NfaBuilder.epsilon()) | 316 graphs.append(NfaBuilder.epsilon()) |
| 305 if v['catch_all']: | 317 if v['catch_all']: |
| 306 assert v['catch_all'] == 'continue' | 318 (precedence, catch_all) = v['catch_all'] |
| 319 assert catch_all == (None, None, 'continue'), "unimplemented" |
| 307 graphs.append(NfaBuilder.add_continue(NfaBuilder.catch_all())) | 320 graphs.append(NfaBuilder.add_continue(NfaBuilder.catch_all())) |
| 308 graph = NfaBuilder.or_graphs(graphs) | 321 graph = NfaBuilder.or_graphs(graphs) |
| 309 rule_map[k] = graph | 322 rule_map[k] = graph |
| 310 # process first the subgraphs, then the default graph | 323 # process first the subgraphs, then the default graph |
| 311 for k, v in parser_state.rules.items(): | 324 for k, v in parser_state.rules.items(): |
| 312 if k == 'default': continue | 325 if k == 'default': continue |
| 313 process(k, v) | 326 process(k, v) |
| 314 process('default', parser_state.rules['default']) | 327 process('default', parser_state.rules['default']) |
| 315 # build the automata | 328 # build the automata |
| 316 for rule_name, graph in rule_map.items(): | 329 for rule_name, graph in rule_map.items(): |
| 317 self.__automata[rule_name] = RuleProcessor.Automata(builder, graph) | 330 self.__automata[rule_name] = RuleProcessor.Automata(builder, graph) |
| 318 | 331 # process default_action |
| 319 default_action = parser_state.rules['default']['default_action'] | 332 default_action = parser_state.rules['default']['default_action'] |
| 320 self.default_action = Action(default_action[0], default_action[1]) if defaul
t_action else None | 333 self.default_action = Action(None, default_action[1]) if default_action else
None |
| OLD | NEW |