Index: tools/lexer_generator/rule_parser.py |
diff --git a/tools/lexer_generator/rule_parser.py b/tools/lexer_generator/rule_parser.py |
index a733801baaa66f485413871b0a0f97146e1f7107..46cce1378c01a6710728f89f464f02cec7857a70 100644 |
--- a/tools/lexer_generator/rule_parser.py |
+++ b/tools/lexer_generator/rule_parser.py |
@@ -33,48 +33,101 @@ class RuleParser: |
tokens = RuleLexer.tokens |
def __init__(self): |
- self.aliases = dict() |
- self.transitions = dict() |
- |
- def p_statement_alias(self, p): |
- 'statement : ALIAS EQUALS REGEX' |
- regex = self.lexer.lexer.lexmatch.group('regex') |
- self.aliases[p[1]] = regex |
- |
- def p_statement_condition_transition(self, p): |
- 'statement : CONDITION_BEGIN CONDITION CONDITION_END REGEX_TRANSITION' |
- old_condition = p[2] |
- regex = self.lexer.lexer.lexmatch.group('regex').strip() |
- new_condition = self.lexer.lexer.lexmatch.group('new') |
- if old_condition not in self.transitions: |
- self.transitions[old_condition] = dict() |
- self.transitions[old_condition][regex] = ('condition', new_condition) |
- |
- def p_statement_condition_body(self, p): |
- 'statement : CONDITION_BEGIN CONDITION CONDITION_END REGEX_BODY' |
- old_condition = p[2] |
- regex = self.lexer.lexer.lexmatch.group('regex').strip() |
- body = self.lexer.lexer.lexmatch.group('body').strip() |
- if old_condition not in self.transitions: |
- self.transitions[old_condition] = dict() |
- self.transitions[old_condition][regex] = ('body', body) |
- |
- def p_statement_condition_transition_body(self, p): |
- 'statement : CONDITION_BEGIN CONDITION CONDITION_END REGEX_TRANSITION_BODY' |
- old_condition = p[2] |
- regex = self.lexer.lexer.lexmatch.group('regex').strip() |
- new_condition = self.lexer.lexer.lexmatch.group('new').strip() |
- body = self.lexer.lexer.lexmatch.group('body').strip() |
- if old_condition not in self.transitions: |
- self.transitions[old_condition] = dict() |
- self.transitions[old_condition][regex] = ( |
- 'condition_and_body', new_condition, body) |
+ self.aliases = {} |
+ self.current_transition = None |
+ self.rules = {} |
+ |
+ def p_statements(self, p): |
+ 'statements : statement maybe_statements' |
+ |
+ def p_maybe_statement(self, p): |
+ '''maybe_statements : statements |
+ | empty''' |
+ |
+ def p_statement(self, p): |
+ '''statement : alias_rule |
+ | transition_rule''' |
+ |
+ def p_alias_rule(self, p): |
+ 'alias_rule : IDENTIFIER EQUALS composite_regex SEMICOLON' |
+ assert not p[1] in self.aliases |
+ self.aliases[p[1]] = p[3] |
+ |
+ def p_transition_rule(self, p): |
+ '''transition_rule : transition composite_regex code |
+ | transition composite_regex TRANSITION IDENTIFIER |
+ | transition composite_regex TRANSITION_WITH_CODE IDENTIFIER code''' |
+ transition = p[0] |
+ regex = p[2] |
+ rules = self.rules[self.current_transition] |
+ if len(p) == 4: |
+ rules.append(('simple', regex, p[3])) |
+ elif len(p) == 5: |
+ rules.append(('transition', regex, p[4])) |
+ elif len(p) == 6: |
+ rules.append(('transition_with_code', regex, p[4], p[5])) |
+ else: |
+ raise Exception() |
+ |
+ def p_transition(self, p): |
+ '''transition : LESS_THAN IDENTIFIER GREATER_THAN''' |
+ # | empty''' TODO skipping transition without sr conflict |
+ if p[1]: |
+ self.current_transition = p[2] |
+ assert self.current_transition |
+ if not self.current_transition in self.rules: |
+ self.rules[self.current_transition] = [] |
+ p[0] = self.current_transition |
+ |
+ def p_composite_regex(self, p): |
+ '''composite_regex : regex_part OR regex_part maybe_regex_parts |
+ | regex_part maybe_regex_parts''' |
+ if p[len(p)-1]: |
+ p[0] = p[1:] |
+ else: |
+ p[0] = p[1:-1] |
+ |
+ def p_maybe_regex_part(self, p): |
+ '''maybe_regex_parts : composite_regex |
+ | empty''' |
+ p[0] = p[1] |
+ |
+ def p_regex_part(self, p): |
+ '''regex_part : LEFT_PARENTHESIS composite_regex RIGHT_PARENTHESIS modifier |
+ | STRING_REGEX modifier |
+ | CHARACTER_CLASS_REGEX modifier |
+ | IDENTIFIER modifier''' |
+ if p[len(p)-1]: |
+ p[0] = p[1:] |
+ else: |
+ p[0] = p[1:-1] |
+ |
+ def p_modifier(self, p): |
+ '''modifier : PLUS |
+ | QUESTION_MARK |
+ | STAR |
+ | empty''' |
+ p[0] = p[1] |
+ |
+ def p_code(self, p): |
+ 'code : LEFT_BRACKET code_fragments RIGHT_BRACKET' |
+ p[0] = p[2].strip() |
+ |
+ def p_code_fragments(self, p): |
+ '''code_fragments : CODE_FRAGMENT code_fragments |
+ | empty''' |
+ p[0] = p[1] |
+ if len(p) == 3 and p[2]: |
+ p[0] = p[1] + p[2] |
+ |
+ def p_empty(self, p): |
+ 'empty :' |
def p_error(self, p): |
raise Exception("Syntax error in input '%s'" % p) |
def build(self, **kwargs): |
- self.parser = yacc.yacc(module=self, **kwargs) |
+ self.parser = yacc.yacc(module=self, debug=0, write_tables=0, **kwargs) |
self.lexer = RuleLexer() |
self.lexer.build(**kwargs) |