tools/lexer_generator/rule_parser.py - Issue 52593002: Experimental lexer generator: parse rules (in the .re file format)

Unified Diff: tools/lexer_generator/rule_parser.py

Issue 52593002: Experimental lexer generator: parse rules (in the .re file format) (Closed) Base URL: https://v8.googlecode.com/svn/branches/experimental/parser

Patch Set: Created 7 years, 2 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: tools/lexer_generator/rule_parser.py

diff --git a/tools/lexer_generator/regex_lexer.py b/tools/lexer_generator/rule_parser.py

similarity index 52%

copy from tools/lexer_generator/regex_lexer.py

copy to tools/lexer_generator/rule_parser.py

index deabd84c5d6ab42850ec384e8179ae4ceb9e0419..0ac92ab970c6a099f260e3ca2eb7a8f08a0bc8f2 100644

--- a/tools/lexer_generator/regex_lexer.py

+++ b/tools/lexer_generator/rule_parser.py

@@ -25,78 +25,50 @@

# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE

# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

-import ply.lex as lex

-class RegexLexer:

- tokens = (

- 'GROUP_BEGIN',

- 'GROUP_END',

- 'CLASS_BEGIN',

- 'CLASS_END',

- 'OR',

- 'ONE_OR_MORE',

- 'ZERO_OR_MORE',

- 'ZERO_OR_ONE',

- 'ANY',

- 'LITERAL',

- 'RANGE',

- 'NOT',

- 'CLASS_LITERAL',

- )

- states = (

- ('class','exclusive'),

- )

- def t_ESCAPED_LITERAL(self, t):

- r'\\\(|\\\)|\\\[|\\\]|\\\||\\\+|\\\*|\\\?|\\\.|\\\\'

- t.type = 'LITERAL'

- t.value = t.value[1:]

- return t

- t_GROUP_BEGIN = r'\('

- t_GROUP_END = r'\)'

- t_OR = r'\|'

- t_ONE_OR_MORE = r'\+'

- t_ZERO_OR_MORE = r'\*'

- t_ZERO_OR_ONE = r'\?'

- t_ANY = r'\.'

- t_LITERAL = r'.'

- def t_CLASS_BEGIN(self, t):

- r'\['

- self.lexer.push_state('class')

- return t

- def t_class_CLASS_END(self, t):

- r'\]'

- self.lexer.pop_state()

- return t

- t_class_RANGE = '-'

- t_class_NOT = '\^'

- def t_class_ESCAPED_CLASS_LITERAL(self, t):

- r'\\\^|\\-'

- t.type = 'CLASS_LITERAL'

- t.value = t.value[1:]

- return t

- t_class_CLASS_LITERAL = r'[a-zA-Z]' # fix this

- t_ANY_ignore = '\n'

- def t_ANY_error(self, t):

- raise Exception("Illegal character '%s'" % t.value[0])

+import ply.yacc as yacc

+from rule_lexer import RuleLexer

+class RuleParser:

+ tokens = RuleLexer.tokens

+ aliases = dict()

+ transitions = dict()

+ def p_statement_alias(self, p):

+ 'statement : ALIAS'

+ name = self.lexer.lexer.lexmatch.group('name')

+ regex = self.lexer.lexer.lexmatch.group('regex')

+ self.aliases[name] = regex

+ def p_statement_condition_transition(self, p):

+ 'statement : CONDITION_TRANSITION'

+ old_condition = self.lexer.lexer.lexmatch.group('old')

+ regex = self.lexer.lexer.lexmatch.group('regex')

+ new_condition = self.lexer.lexer.lexmatch.group('new')

+ if old_condition not in self.transitions:

+ self.transitions[old_condition] = []

+ self.transitions[old_condition].append((regex, new_condition))

+ def p_statement_condition(self, p):

+ 'statement : CONDITION'

+ old_condition = self.lexer.lexer.lexmatch.group('old')

+ regex = self.lexer.lexer.lexmatch.group('regex')

+ body = self.lexer.lexer.lexmatch.group('body')

+ if old_condition not in self.transitions:

+ self.transitions[old_condition] = []

+ self.transitions[old_condition].append((regex, body))

+ def p_empty(self, p):

+ 'empty :'

+ def p_error(self, p):

+ raise Exception("Syntax error in input '%s'" % p)

def build(self, **kwargs):

- self.lexer = lex.lex(module=self, **kwargs)

+ self.parser = yacc.yacc(module=self, **kwargs)

+ self.lexer = RuleLexer()

+ self.lexer.build(**kwargs)

+ def parse(self, data):

+ return self.parser.parse(data, lexer=self.lexer.lexer)

« no previous file with comments | « tools/lexer_generator/rule_lexer.py ('k') | no next file » | no next file with comments »