| OLD | NEW |
| 1 # Copyright 2013 the V8 project authors. All rights reserved. | 1 # Copyright 2013 the V8 project authors. All rights reserved. |
| 2 # Redistribution and use in source and binary forms, with or without | 2 # Redistribution and use in source and binary forms, with or without |
| 3 # modification, are permitted provided that the following conditions are | 3 # modification, are permitted provided that the following conditions are |
| 4 # met: | 4 # met: |
| 5 # | 5 # |
| 6 # * Redistributions of source code must retain the above copyright | 6 # * Redistributions of source code must retain the above copyright |
| 7 # notice, this list of conditions and the following disclaimer. | 7 # notice, this list of conditions and the following disclaimer. |
| 8 # * Redistributions in binary form must reproduce the above | 8 # * Redistributions in binary form must reproduce the above |
| 9 # copyright notice, this list of conditions and the following | 9 # copyright notice, this list of conditions and the following |
| 10 # disclaimer in the documentation and/or other materials provided | 10 # disclaimer in the documentation and/or other materials provided |
| 11 # with the distribution. | 11 # with the distribution. |
| 12 # * Neither the name of Google Inc. nor the names of its | 12 # * Neither the name of Google Inc. nor the names of its |
| 13 # contributors may be used to endorse or promote products derived | 13 # contributors may be used to endorse or promote products derived |
| 14 # from this software without specific prior written permission. | 14 # from this software without specific prior written permission. |
| 15 # | 15 # |
| 16 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | 16 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
| 17 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | 17 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
| 18 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | 18 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
| 19 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | 19 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
| 20 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | 20 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
| 21 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | 21 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
| 22 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | 22 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
| 23 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | 23 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
| 24 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | 24 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| 25 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | 25 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
| 26 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | 26 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| 27 | 27 |
| 28 import ply.yacc as yacc | 28 import ply.yacc as yacc |
| 29 from automaton import Term, Action | 29 from action import Term, Action |
| 30 from rule_lexer import RuleLexer | 30 from rule_lexer import RuleLexer |
| 31 from regex_parser import RegexParser | 31 from regex_parser import RegexParser |
| 32 from nfa_builder import NfaBuilder | 32 from nfa_builder import NfaBuilder |
| 33 from dfa import Dfa | 33 from dfa import Dfa |
| 34 from dfa_optimizer import DfaOptimizer | 34 from dfa_optimizer import DfaOptimizer |
| 35 from transition_keys import TransitionKey, KeyEncoding | 35 from transition_keys import TransitionKey, KeyEncoding |
| 36 | 36 |
| 37 class RuleParserState: | 37 class RuleParserState: |
| 38 | 38 |
| 39 def __init__(self, encoding): | 39 def __init__(self, encoding): |
| (...skipping 20 matching lines...) Expand all Loading... |
| 60 'statements : aliases rules' | 60 'statements : aliases rules' |
| 61 | 61 |
| 62 def p_aliases(self, p): | 62 def p_aliases(self, p): |
| 63 '''aliases : alias_rule aliases | 63 '''aliases : alias_rule aliases |
| 64 | empty''' | 64 | empty''' |
| 65 | 65 |
| 66 def p_alias_rule(self, p): | 66 def p_alias_rule(self, p): |
| 67 'alias_rule : IDENTIFIER EQUALS composite_regex SEMICOLON' | 67 'alias_rule : IDENTIFIER EQUALS composite_regex SEMICOLON' |
| 68 state = self.__state | 68 state = self.__state |
| 69 assert not p[1] in state.aliases | 69 assert not p[1] in state.aliases |
| 70 graph = p[3] | 70 term = p[3] |
| 71 state.aliases[p[1]] = graph | 71 state.aliases[p[1]] = term |
| 72 if graph[0] == 'CLASS' or graph[0] == 'NOT_CLASS': | 72 if term.name() == 'CLASS' or term.name() == 'NOT_CLASS': |
| 73 classes = state.character_classes | 73 classes = state.character_classes |
| 74 assert not p[1] in classes | 74 assert not p[1] in classes, "cannot reassign alias" |
| 75 encoding = state.encoding | 75 encoding = state.encoding |
| 76 classes[p[1]] = TransitionKey.character_class(encoding, graph, classes) | 76 classes[p[1]] = TransitionKey.character_class(encoding, term, classes) |
| 77 | 77 |
| 78 def p_rules(self, p): | 78 def p_rules(self, p): |
| 79 '''rules : state_change transition_rules rules | 79 '''rules : state_change transition_rules rules |
| 80 | empty''' | 80 | empty''' |
| 81 | 81 |
| 82 def p_state_change(self, p): | 82 def p_state_change(self, p): |
| 83 'state_change : GRAPH_OPEN IDENTIFIER GRAPH_CLOSE' | 83 'state_change : GRAPH_OPEN IDENTIFIER GRAPH_CLOSE' |
| 84 state = self.__state | 84 state = self.__state |
| 85 state.current_state = p[2] | 85 state.current_state = p[2] |
| 86 assert state.current_state | 86 assert state.current_state |
| (...skipping 77 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 164 p[0] = tuple(([p[1]] + list(p[3]))) | 164 p[0] = tuple(([p[1]] + list(p[3]))) |
| 165 else: | 165 else: |
| 166 raise Exception() | 166 raise Exception() |
| 167 | 167 |
| 168 def p_composite_regex(self, p): | 168 def p_composite_regex(self, p): |
| 169 '''composite_regex : regex_parts OR regex_parts | 169 '''composite_regex : regex_parts OR regex_parts |
| 170 | regex_parts''' | 170 | regex_parts''' |
| 171 if len(p) == 2: | 171 if len(p) == 2: |
| 172 p[0] = p[1] | 172 p[0] = p[1] |
| 173 else: | 173 else: |
| 174 p[0] = NfaBuilder.or_graphs([p[1], p[3]]) | 174 p[0] = NfaBuilder.or_terms([p[1], p[3]]) |
| 175 | 175 |
| 176 def p_regex_parts(self, p): | 176 def p_regex_parts(self, p): |
| 177 '''regex_parts : regex_part | 177 '''regex_parts : regex_part |
| 178 | regex_part regex_parts''' | 178 | regex_part regex_parts''' |
| 179 p[0] = NfaBuilder.cat_graphs(p[1:]) | 179 p[0] = NfaBuilder.cat_terms(p[1:]) |
| 180 | 180 |
| 181 def p_regex_part(self, p): | 181 def p_regex_part(self, p): |
| 182 '''regex_part : LEFT_PARENTHESIS composite_regex RIGHT_PARENTHESIS modifier | 182 '''regex_part : LEFT_PARENTHESIS composite_regex RIGHT_PARENTHESIS modifier |
| 183 | regex_string_literal modifier | 183 | regex_string_literal modifier |
| 184 | regex_class modifier | 184 | regex_class modifier |
| 185 | regex modifier | 185 | regex modifier |
| 186 | regex_alias modifier''' | 186 | regex_alias modifier''' |
| 187 modifier = p[len(p)-1] | 187 modifier = p[len(p)-1] |
| 188 graph = p[2] if len(p) == 5 else p[1] | 188 term = p[2] if len(p) == 5 else p[1] |
| 189 if modifier: | 189 if modifier: |
| 190 p[0] = NfaBuilder.apply_modifier(modifier, graph) | 190 p[0] = NfaBuilder.apply_modifier(modifier, term) |
| 191 else: | 191 else: |
| 192 p[0] = graph | 192 p[0] = term |
| 193 | 193 |
| 194 def p_regex_string_literal(self, p): | 194 def p_regex_string_literal(self, p): |
| 195 'regex_string_literal : STRING' | 195 'regex_string_literal : STRING' |
| 196 string = p[1][1:-1] | 196 string = p[1][1:-1] |
| 197 escape_char = lambda string, char: string.replace(char, "\\" + char) | 197 escape_char = lambda string, char: string.replace(char, "\\" + char) |
| 198 string = reduce(escape_char, "+?*|.[](){}", string).replace("\\\"", "\"") | 198 string = reduce(escape_char, "+?*|.[](){}", string).replace("\\\"", "\"") |
| 199 p[0] = RegexParser.parse(string) | 199 p[0] = RegexParser.parse(string) |
| 200 | |
| 201 def p_regex(self, p): | 200 def p_regex(self, p): |
| 202 'regex : REGEX' | 201 'regex : REGEX' |
| 203 string = p[1][1:-1].replace("\\/", "/") | 202 string = p[1][1:-1].replace("\\/", "/") |
| 204 p[0] = RegexParser.parse(string) | 203 p[0] = RegexParser.parse(string) |
| 205 | 204 |
| 206 def p_regex_class(self, p): | 205 def p_regex_class(self, p): |
| 207 'regex_class : CHARACTER_CLASS_REGEX' | 206 'regex_class : CHARACTER_CLASS_REGEX' |
| 208 p[0] = RegexParser.parse(p[1]) | 207 p[0] = RegexParser.parse(p[1]) |
| 209 | 208 |
| 210 def p_regex_alias(self, p): | 209 def p_regex_alias(self, p): |
| (...skipping 33 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 244 RuleParser.__static_instance = None | 243 RuleParser.__static_instance = None |
| 245 raise | 244 raise |
| 246 parser.__state = None | 245 parser.__state = None |
| 247 assert parser_state.transitions <= set(parser_state.rules.keys()) | 246 assert parser_state.transitions <= set(parser_state.rules.keys()) |
| 248 | 247 |
| 249 class RuleProcessor(object): | 248 class RuleProcessor(object): |
| 250 | 249 |
| 251 def __init__(self, parser_state): | 250 def __init__(self, parser_state): |
| 252 self.__automata = {} | 251 self.__automata = {} |
| 253 self.__rule_trees = {} | 252 self.__rule_trees = {} |
| 253 self.__default_action = None |
| 254 self.__process_parser_state(parser_state) | 254 self.__process_parser_state(parser_state) |
| 255 | 255 |
| 256 @staticmethod | 256 @staticmethod |
| 257 def parse(string, encoding_name): | 257 def parse(string, encoding_name): |
| 258 parser_state = RuleParserState(KeyEncoding.get(encoding_name)) | 258 parser_state = RuleParserState(KeyEncoding.get(encoding_name)) |
| 259 RuleParser.parse(string, parser_state) | 259 RuleParser.parse(string, parser_state) |
| 260 return RuleProcessor(parser_state) | 260 return RuleProcessor(parser_state) |
| 261 | 261 |
| 262 def automata_iter(self): | 262 def automata_iter(self): |
| 263 return iter(self.__automata.items()) | 263 return iter(self.__automata.items()) |
| 264 | 264 |
| 265 def default_automata(self): | 265 def default_automata(self): |
| 266 return self.__automata['default'] | 266 return self.__automata['default'] |
| 267 | 267 |
| 268 def rule_tree_dots(self): | 268 def default_action(self): |
| 269 result = {} | 269 return self.__default_action |
| 270 for rule in self.__rule_trees: | |
| 271 result[rule] = NfaBuilder.rule_tree_dot(self.__rule_trees[rule]) | |
| 272 return result | |
| 273 | 270 |
| 274 class Automata(object): | 271 class Automata(object): |
| 275 | 272 |
| 276 def __init__(self, builder, graph): | 273 def __init__(self, encoding, character_classes, rule_term): |
| 277 self.__builder = builder | 274 self.__encoding = encoding |
| 278 self.__graph = graph | 275 self.__character_classes = character_classes |
| 276 self.__rule_term = rule_term |
| 279 self.__nfa = None | 277 self.__nfa = None |
| 280 self.__dfa = None | 278 self.__dfa = None |
| 281 self.__minimial_dfa = None | 279 self.__minimial_dfa = None |
| 282 | 280 |
| 281 def rule_term(self): |
| 282 return self.__rule_term |
| 283 |
| 283 def nfa(self): | 284 def nfa(self): |
| 284 if not self.__nfa: | 285 if not self.__nfa: |
| 285 self.__nfa = self.__builder.nfa(self.__graph) | 286 self.__nfa = NfaBuilder.nfa( |
| 287 self.__encoding, self.__character_classes, self.__rule_term) |
| 286 return self.__nfa | 288 return self.__nfa |
| 287 | 289 |
| 288 def dfa(self): | 290 def dfa(self): |
| 289 if not self.__dfa: | 291 if not self.__dfa: |
| 290 (start, dfa_nodes) = self.nfa().compute_dfa() | 292 (start, dfa_nodes) = self.nfa().compute_dfa() |
| 291 self.__dfa = Dfa(self.nfa().encoding(), start, dfa_nodes) | 293 self.__dfa = Dfa(self.nfa().encoding(), start, dfa_nodes) |
| 292 return self.__dfa | 294 return self.__dfa |
| 293 | 295 |
| 294 def optimize_dfa(self, log = False): | 296 def optimize_dfa(self, log = False): |
| 295 assert not self.__dfa | 297 assert not self.__dfa |
| 296 self.__dfa = DfaOptimizer.optimize(self.dfa(), log) | 298 self.__dfa = DfaOptimizer.optimize(self.dfa(), log) |
| 297 | 299 |
| 298 def minimal_dfa(self): | 300 def minimal_dfa(self): |
| 299 if not self.__minimial_dfa: | 301 if not self.__minimial_dfa: |
| 300 self.__minimial_dfa = self.dfa().minimize() | 302 self.__minimial_dfa = self.dfa().minimize() |
| 301 return self.__minimial_dfa | 303 return self.__minimial_dfa |
| 302 | 304 |
| 303 def __process_parser_state(self, parser_state): | 305 def __process_parser_state(self, parser_state): |
| 304 rule_map = {} | 306 rule_map = {} |
| 305 builder = NfaBuilder(parser_state.encoding, parser_state.character_classes) | |
| 306 assert 'default' in parser_state.rules | 307 assert 'default' in parser_state.rules |
| 307 def process(subgraph, v): | 308 def process(subgraph, v): |
| 308 graphs = [] | 309 graphs = [] |
| 309 for graph, precedence, action in v['regex']: | 310 for graph, precedence, action in v['regex']: |
| 310 (entry_action, match_action, transition) = action | 311 (entry_action, match_action, transition) = action |
| 311 if entry_action or match_action: | 312 if entry_action or match_action: |
| 312 action = Action(entry_action, match_action, precedence) | 313 graph = NfaBuilder.add_action( |
| 313 graph = NfaBuilder.add_action(graph, action) | 314 graph, Action(entry_action, match_action, precedence)) |
| 314 if not transition: | 315 if not transition: |
| 315 pass | 316 pass |
| 316 elif transition == 'continue': | 317 elif transition == 'continue': |
| 317 assert not subgraph == 'default', 'unimplemented' | 318 assert not subgraph == 'default', 'unimplemented' |
| 318 graph = NfaBuilder.add_continue(graph) | 319 graph = NfaBuilder.add_continue(graph) |
| 319 else: | 320 else: |
| 320 assert subgraph == 'default', 'unimplemented' | 321 assert subgraph == 'default', 'unimplemented' |
| 321 graph = NfaBuilder.join_subgraph( | 322 graph = NfaBuilder.join_subgraph( |
| 322 graph, transition, rule_map[transition]) | 323 graph, transition, rule_map[transition]) |
| 323 graphs.append(graph) | 324 graphs.append(graph) |
| 324 graph = NfaBuilder.or_graphs(graphs) | 325 graph = NfaBuilder.or_terms(graphs) |
| 325 rule_map[subgraph] = graph | 326 rule_map[subgraph] = graph |
| 326 # process first the subgraphs, then the default graph | 327 # process first the subgraphs, then the default graph |
| 327 for k, v in parser_state.rules.items(): | 328 for k, v in parser_state.rules.items(): |
| 328 if k == 'default': continue | 329 if k == 'default': continue |
| 329 process(k, v) | 330 process(k, v) |
| 330 process('default', parser_state.rules['default']) | 331 process('default', parser_state.rules['default']) |
| 331 # build the automata | 332 # build the automata |
| 332 for rule_name, graph in rule_map.items(): | 333 for rule_name, graph in rule_map.items(): |
| 333 self.__automata[rule_name] = RuleProcessor.Automata(builder, graph) | 334 self.__automata[rule_name] = RuleProcessor.Automata( |
| 335 parser_state.encoding, parser_state.character_classes, graph) |
| 334 self.__rule_trees[rule_name] = graph | 336 self.__rule_trees[rule_name] = graph |
| 335 # process default_action | 337 # process default_action |
| 336 default_action = parser_state.rules['default']['default_action'] | 338 default_action = parser_state.rules['default']['default_action'] |
| 337 self.default_action = Action(Term.empty_term(), default_action) | 339 self.__default_action = Action(Term.empty_term(), default_action) |
| OLD | NEW |