| Index: tools/lexer_generator/rule_parser.py
|
| diff --git a/tools/lexer_generator/rule_parser.py b/tools/lexer_generator/rule_parser.py
|
| index a733801baaa66f485413871b0a0f97146e1f7107..46cce1378c01a6710728f89f464f02cec7857a70 100644
|
| --- a/tools/lexer_generator/rule_parser.py
|
| +++ b/tools/lexer_generator/rule_parser.py
|
| @@ -33,48 +33,101 @@ class RuleParser:
|
| tokens = RuleLexer.tokens
|
|
|
| def __init__(self):
|
| - self.aliases = dict()
|
| - self.transitions = dict()
|
| -
|
| - def p_statement_alias(self, p):
|
| - 'statement : ALIAS EQUALS REGEX'
|
| - regex = self.lexer.lexer.lexmatch.group('regex')
|
| - self.aliases[p[1]] = regex
|
| -
|
| - def p_statement_condition_transition(self, p):
|
| - 'statement : CONDITION_BEGIN CONDITION CONDITION_END REGEX_TRANSITION'
|
| - old_condition = p[2]
|
| - regex = self.lexer.lexer.lexmatch.group('regex').strip()
|
| - new_condition = self.lexer.lexer.lexmatch.group('new')
|
| - if old_condition not in self.transitions:
|
| - self.transitions[old_condition] = dict()
|
| - self.transitions[old_condition][regex] = ('condition', new_condition)
|
| -
|
| - def p_statement_condition_body(self, p):
|
| - 'statement : CONDITION_BEGIN CONDITION CONDITION_END REGEX_BODY'
|
| - old_condition = p[2]
|
| - regex = self.lexer.lexer.lexmatch.group('regex').strip()
|
| - body = self.lexer.lexer.lexmatch.group('body').strip()
|
| - if old_condition not in self.transitions:
|
| - self.transitions[old_condition] = dict()
|
| - self.transitions[old_condition][regex] = ('body', body)
|
| -
|
| - def p_statement_condition_transition_body(self, p):
|
| - 'statement : CONDITION_BEGIN CONDITION CONDITION_END REGEX_TRANSITION_BODY'
|
| - old_condition = p[2]
|
| - regex = self.lexer.lexer.lexmatch.group('regex').strip()
|
| - new_condition = self.lexer.lexer.lexmatch.group('new').strip()
|
| - body = self.lexer.lexer.lexmatch.group('body').strip()
|
| - if old_condition not in self.transitions:
|
| - self.transitions[old_condition] = dict()
|
| - self.transitions[old_condition][regex] = (
|
| - 'condition_and_body', new_condition, body)
|
| + self.aliases = {}
|
| + self.current_transition = None
|
| + self.rules = {}
|
| +
|
| + def p_statements(self, p):
|
| + 'statements : statement maybe_statements'
|
| +
|
| + def p_maybe_statement(self, p):
|
| + '''maybe_statements : statements
|
| + | empty'''
|
| +
|
| + def p_statement(self, p):
|
| + '''statement : alias_rule
|
| + | transition_rule'''
|
| +
|
| + def p_alias_rule(self, p):
|
| + 'alias_rule : IDENTIFIER EQUALS composite_regex SEMICOLON'
|
| + assert not p[1] in self.aliases
|
| + self.aliases[p[1]] = p[3]
|
| +
|
| + def p_transition_rule(self, p):
|
| + '''transition_rule : transition composite_regex code
|
| + | transition composite_regex TRANSITION IDENTIFIER
|
| + | transition composite_regex TRANSITION_WITH_CODE IDENTIFIER code'''
|
| + transition = p[0]
|
| + regex = p[2]
|
| + rules = self.rules[self.current_transition]
|
| + if len(p) == 4:
|
| + rules.append(('simple', regex, p[3]))
|
| + elif len(p) == 5:
|
| + rules.append(('transition', regex, p[4]))
|
| + elif len(p) == 6:
|
| + rules.append(('transition_with_code', regex, p[4], p[5]))
|
| + else:
|
| + raise Exception()
|
| +
|
| + def p_transition(self, p):
|
| + '''transition : LESS_THAN IDENTIFIER GREATER_THAN'''
|
| + # | empty''' TODO skipping transition without sr conflict
|
| + if p[1]:
|
| + self.current_transition = p[2]
|
| + assert self.current_transition
|
| + if not self.current_transition in self.rules:
|
| + self.rules[self.current_transition] = []
|
| + p[0] = self.current_transition
|
| +
|
| + def p_composite_regex(self, p):
|
| + '''composite_regex : regex_part OR regex_part maybe_regex_parts
|
| + | regex_part maybe_regex_parts'''
|
| + if p[len(p)-1]:
|
| + p[0] = p[1:]
|
| + else:
|
| + p[0] = p[1:-1]
|
| +
|
| + def p_maybe_regex_part(self, p):
|
| + '''maybe_regex_parts : composite_regex
|
| + | empty'''
|
| + p[0] = p[1]
|
| +
|
| + def p_regex_part(self, p):
|
| + '''regex_part : LEFT_PARENTHESIS composite_regex RIGHT_PARENTHESIS modifier
|
| + | STRING_REGEX modifier
|
| + | CHARACTER_CLASS_REGEX modifier
|
| + | IDENTIFIER modifier'''
|
| + if p[len(p)-1]:
|
| + p[0] = p[1:]
|
| + else:
|
| + p[0] = p[1:-1]
|
| +
|
| + def p_modifier(self, p):
|
| + '''modifier : PLUS
|
| + | QUESTION_MARK
|
| + | STAR
|
| + | empty'''
|
| + p[0] = p[1]
|
| +
|
| + def p_code(self, p):
|
| + 'code : LEFT_BRACKET code_fragments RIGHT_BRACKET'
|
| + p[0] = p[2].strip()
|
| +
|
| + def p_code_fragments(self, p):
|
| + '''code_fragments : CODE_FRAGMENT code_fragments
|
| + | empty'''
|
| + p[0] = p[1]
|
| + if len(p) == 3 and p[2]:
|
| + p[0] = p[1] + p[2]
|
| +
|
| + def p_empty(self, p):
|
| + 'empty :'
|
|
|
| def p_error(self, p):
|
| raise Exception("Syntax error in input '%s'" % p)
|
|
|
| def build(self, **kwargs):
|
| - self.parser = yacc.yacc(module=self, **kwargs)
|
| + self.parser = yacc.yacc(module=self, debug=0, write_tables=0, **kwargs)
|
| self.lexer = RuleLexer()
|
| self.lexer.build(**kwargs)
|
|
|
|
|