| Index: tools/lexer_generator/rule_parser.py
|
| diff --git a/tools/lexer_generator/regex_lexer.py b/tools/lexer_generator/rule_parser.py
|
| similarity index 52%
|
| copy from tools/lexer_generator/regex_lexer.py
|
| copy to tools/lexer_generator/rule_parser.py
|
| index deabd84c5d6ab42850ec384e8179ae4ceb9e0419..0ac92ab970c6a099f260e3ca2eb7a8f08a0bc8f2 100644
|
| --- a/tools/lexer_generator/regex_lexer.py
|
| +++ b/tools/lexer_generator/rule_parser.py
|
| @@ -25,78 +25,50 @@
|
| # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
| # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
|
| -import ply.lex as lex
|
| -
|
| -class RegexLexer:
|
| -
|
| - tokens = (
|
| -
|
| - 'GROUP_BEGIN',
|
| - 'GROUP_END',
|
| -
|
| - 'CLASS_BEGIN',
|
| - 'CLASS_END',
|
| -
|
| - 'OR',
|
| - 'ONE_OR_MORE',
|
| - 'ZERO_OR_MORE',
|
| - 'ZERO_OR_ONE',
|
| - 'ANY',
|
| -
|
| - 'LITERAL',
|
| -
|
| - 'RANGE',
|
| - 'NOT',
|
| - 'CLASS_LITERAL',
|
| - )
|
| -
|
| - states = (
|
| - ('class','exclusive'),
|
| - )
|
| -
|
| - def t_ESCAPED_LITERAL(self, t):
|
| - r'\\\(|\\\)|\\\[|\\\]|\\\||\\\+|\\\*|\\\?|\\\.|\\\\'
|
| - t.type = 'LITERAL'
|
| - t.value = t.value[1:]
|
| - return t
|
| -
|
| - t_GROUP_BEGIN = r'\('
|
| - t_GROUP_END = r'\)'
|
| -
|
| - t_OR = r'\|'
|
| - t_ONE_OR_MORE = r'\+'
|
| - t_ZERO_OR_MORE = r'\*'
|
| - t_ZERO_OR_ONE = r'\?'
|
| -
|
| - t_ANY = r'\.'
|
| -
|
| - t_LITERAL = r'.'
|
| -
|
| - def t_CLASS_BEGIN(self, t):
|
| - r'\['
|
| - self.lexer.push_state('class')
|
| - return t
|
| -
|
| - def t_class_CLASS_END(self, t):
|
| - r'\]'
|
| - self.lexer.pop_state()
|
| - return t
|
| -
|
| - t_class_RANGE = '-'
|
| - t_class_NOT = '\^'
|
| -
|
| - def t_class_ESCAPED_CLASS_LITERAL(self, t):
|
| - r'\\\^|\\-'
|
| - t.type = 'CLASS_LITERAL'
|
| - t.value = t.value[1:]
|
| - return t
|
| -
|
| - t_class_CLASS_LITERAL = r'[a-zA-Z]' # fix this
|
| -
|
| - t_ANY_ignore = '\n'
|
| -
|
| - def t_ANY_error(self, t):
|
| - raise Exception("Illegal character '%s'" % t.value[0])
|
| +import ply.yacc as yacc
|
| +from rule_lexer import RuleLexer
|
| +
|
| +class RuleParser:
|
| +
|
| + tokens = RuleLexer.tokens
|
| +
|
| + aliases = dict()
|
| + transitions = dict()
|
| +
|
| + def p_statement_alias(self, p):
|
| + 'statement : ALIAS'
|
| + name = self.lexer.lexer.lexmatch.group('name')
|
| + regex = self.lexer.lexer.lexmatch.group('regex')
|
| + self.aliases[name] = regex
|
| +
|
| + def p_statement_condition_transition(self, p):
|
| + 'statement : CONDITION_TRANSITION'
|
| + old_condition = self.lexer.lexer.lexmatch.group('old')
|
| + regex = self.lexer.lexer.lexmatch.group('regex')
|
| + new_condition = self.lexer.lexer.lexmatch.group('new')
|
| + if old_condition not in self.transitions:
|
| + self.transitions[old_condition] = []
|
| + self.transitions[old_condition].append((regex, new_condition))
|
| +
|
| + def p_statement_condition(self, p):
|
| + 'statement : CONDITION'
|
| + old_condition = self.lexer.lexer.lexmatch.group('old')
|
| + regex = self.lexer.lexer.lexmatch.group('regex')
|
| + body = self.lexer.lexer.lexmatch.group('body')
|
| + if old_condition not in self.transitions:
|
| + self.transitions[old_condition] = []
|
| + self.transitions[old_condition].append((regex, body))
|
| +
|
| + def p_empty(self, p):
|
| + 'empty :'
|
| +
|
| + def p_error(self, p):
|
| + raise Exception("Syntax error in input '%s'" % p)
|
|
|
| def build(self, **kwargs):
|
| - self.lexer = lex.lex(module=self, **kwargs)
|
| + self.parser = yacc.yacc(module=self, **kwargs)
|
| + self.lexer = RuleLexer()
|
| + self.lexer.build(**kwargs)
|
| +
|
| + def parse(self, data):
|
| + return self.parser.parse(data, lexer=self.lexer.lexer)
|
|
|