tools/lexer_generator/rule_parser.py - Issue 149113010: Experimental parser: unify parser construction

Side by Side Diff: tools/lexer_generator/rule_parser.py

Issue 149113010: Experimental parser: unify parser construction (Closed) Base URL: https://v8.googlecode.com/svn/branches/experimental/parser

Patch Set: Created 6 years, 10 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 # Copyright 2013 the V8 project authors. All rights reserved.	1 # Copyright 2013 the V8 project authors. All rights reserved.

2 # Redistribution and use in source and binary forms, with or without	2 # Redistribution and use in source and binary forms, with or without

3 # modification, are permitted provided that the following conditions are	3 # modification, are permitted provided that the following conditions are

4 # met:	4 # met:

5 #	5 #

6 # * Redistributions of source code must retain the above copyright	6 # * Redistributions of source code must retain the above copyright

7 # notice, this list of conditions and the following disclaimer.	7 # notice, this list of conditions and the following disclaimer.

8 # * Redistributions in binary form must reproduce the above	8 # * Redistributions in binary form must reproduce the above

9 # copyright notice, this list of conditions and the following	9 # copyright notice, this list of conditions and the following

10 # disclaimer in the documentation and/or other materials provided	10 # disclaimer in the documentation and/or other materials provided

(...skipping 10 matching lines...) Expand all Loading...
21 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT	21 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT

22 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,	22 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,

23 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY	23 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY

24 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT	24 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT

25 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE	25 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE

26 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.	26 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

27	27

28 import ply.lex as lex	28 import ply.lex as lex

29 import ply.yacc as yacc	29 import ply.yacc as yacc

30 from action import Term, Action	30 from action import Term, Action

31 from regex_parser import RegexParser	31 from regex_parser import RegexParser, ParserBuilder

32 from nfa_builder import NfaBuilder	32 from nfa_builder import NfaBuilder

33 from dfa import Dfa	33 from dfa import Dfa

34 from dfa_optimizer import DfaOptimizer	34 from dfa_optimizer import DfaOptimizer

35 from transition_keys import TransitionKey, KeyEncoding	35 from transition_keys import TransitionKey, KeyEncoding

36	36

37 class RuleLexer:	37 class RuleLexer:

38	38

39 tokens = (	39 tokens = (

40 'DEFAULT_ACTION',	40 'DEFAULT_ACTION',

41 'EPSILON',	41 'EPSILON',

(...skipping 47 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
89 t_EQUALS = '='	89 t_EQUALS = '='

90 t_LEFT_PARENTHESIS = r'\('	90 t_LEFT_PARENTHESIS = r'\('

91 t_RIGHT_PARENTHESIS = r'\)'	91 t_RIGHT_PARENTHESIS = r'\)'

92 t_GRAPH_OPEN = '<<'	92 t_GRAPH_OPEN = '<<'

93 t_GRAPH_CLOSE = '>>'	93 t_GRAPH_CLOSE = '>>'

94 t_SEMICOLON = ';'	94 t_SEMICOLON = ';'

95 t_ACTION_OPEN = '<'	95 t_ACTION_OPEN = '<'

96 t_ACTION_CLOSE = '>'	96 t_ACTION_CLOSE = '>'

97 t_COMMA = ','	97 t_COMMA = ','

98	98

99 def t_LEFT_BRACKET(self, t):

100 r'{'

101 self.lexer.push_state('code')

102 self.nesting = 1

103 return t

104

105 def t_ANY_error(self, t):	99 def t_ANY_error(self, t):

106 raise Exception("Illegal character '%s'" % t.value[0])	100 raise Exception("Illegal character '%s'" % t.value[0])

107	101

108 def build(self, **kwargs):

109 self.lexer = lex.lex(module=self, **kwargs)

110

111 class RuleParserState:	102 class RuleParserState:

112	103

113 def __init__(self, encoding):	104 def __init__(self, encoding):

114 self.aliases = {}	105 self.aliases = {}

115 self.character_classes = {}	106 self.character_classes = {}

116 self.current_state = None	107 self.current_state = None

117 self.rules = {}	108 self.rules = {}

118 self.transitions = set()	109 self.transitions = set()

119 self.encoding = encoding	110 self.encoding = encoding

120	111

(...skipping 169 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
290 p[0] = NfaBuilder.apply_modifier(modifier, term)	281 p[0] = NfaBuilder.apply_modifier(modifier, term)

291 else:	282 else:

292 p[0] = term	283 p[0] = term

293	284

294 def p_regex_string_literal(self, p):	285 def p_regex_string_literal(self, p):

295 'regex_string_literal : STRING'	286 'regex_string_literal : STRING'

296 string = p[1][1:-1]	287 string = p[1][1:-1]

297 escape_char = lambda string, char: string.replace(char, "\\" + char)	288 escape_char = lambda string, char: string.replace(char, "\\" + char)

298 string = reduce(escape_char, "+?*\|.[](){}", string).replace("\\\"", "\"")	289 string = reduce(escape_char, "+?*\|.[](){}", string).replace("\\\"", "\"")

299 p[0] = RegexParser.parse(string)	290 p[0] = RegexParser.parse(string)

	291

300 def p_regex(self, p):	292 def p_regex(self, p):

301 'regex : REGEX'	293 'regex : REGEX'

302 string = p[1][1:-1].replace("\\/", "/")	294 string = p[1][1:-1].replace("\\/", "/")

303 p[0] = RegexParser.parse(string)	295 p[0] = RegexParser.parse(string)

304	296

305 def p_regex_class(self, p):	297 def p_regex_class(self, p):

306 'regex_class : CHARACTER_CLASS_REGEX'	298 'regex_class : CHARACTER_CLASS_REGEX'

307 p[0] = RegexParser.parse(p[1])	299 p[0] = RegexParser.parse(p[1])

308	300

309 def p_regex_alias(self, p):	301 def p_regex_alias(self, p):

310 'regex_alias : IDENTIFIER'	302 'regex_alias : IDENTIFIER'

311 p[0] = self.__state.aliases[p[1]]	303 p[0] = self.__state.aliases[p[1]]

312	304

313 def p_modifier(self, p):	305 def p_modifier(self, p):

314 '''modifier : PLUS	306 '''modifier : PLUS

315 \| QUESTION_MARK	307 \| QUESTION_MARK

316 \| STAR	308 \| STAR

317 \| empty'''	309 \| empty'''

318 p[0] = p[1]	310 p[0] = p[1]

319	311

320 def p_empty(self, p):	312 def p_empty(self, p):

321 'empty :'	313 'empty :'

322	314

323 def p_error(self, p):	315 def p_error(self, p):

324 raise Exception("Syntax error in input '%s'" % str(p))	316 raise Exception("Syntax error in input '%s'" % str(p))

325	317

326 def build(self, **kwargs):

327 self.parser = yacc.yacc(module=self, debug=0, write_tables=0, **kwargs)

328 self.lexer = RuleLexer()

329 self.lexer.build(**kwargs)

330

331 __static_instance = None

332 @staticmethod	318 @staticmethod

333 def parse(data, parser_state):	319 def parse(string, parser_state):

334 parser = RuleParser.__static_instance	320 new_lexer = lambda: RuleLexer()

335 if not parser:	321 new_parser = lambda: RuleParser()

336 parser = RuleParser()	322 def preparse(parser):

337 parser.build()	323 parser.__state = parser_state

338 RuleParser.__static_instance = parser	324 def postparse(parser):

339 parser.__state = parser_state	325 parser.__state = None

340 try:	326 return ParserBuilder.parse(

341 parser.parser.parse(data, lexer=parser.lexer.lexer)	327 string, "RuleParser", new_lexer, new_parser, preparse, postparse)

342 except Exception:

343 RuleParser.__static_instance = None

344 raise

345 parser.__state = None

346	328

347 class RuleProcessor(object):	329 class RuleProcessor(object):

348	330

349 def __init__(self, string, encoding_name):	331 def __init__(self, string, encoding_name):

350 self.__automata = {}	332 self.__automata = {}

351 self.__default_action = None	333 self.__default_action = None

352 self.__parser_state = RuleParserState(KeyEncoding.get(encoding_name))	334 self.__parser_state = RuleParserState(KeyEncoding.get(encoding_name))

353 RuleParser.parse(string, self.__parser_state)	335 RuleParser.parse(string, self.__parser_state)

354 self.__process_parser_state()	336 self.__process_parser_state()

355	337

(...skipping 58 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
414 for tree_name, v in parser_state.rules.items():	396 for tree_name, v in parser_state.rules.items():

415 assert v['trees'], "lexer state %s is empty" % tree_name	397 assert v['trees'], "lexer state %s is empty" % tree_name

416 rule_map[tree_name] = NfaBuilder.or_terms(v['trees'])	398 rule_map[tree_name] = NfaBuilder.or_terms(v['trees'])

417 # build the automata	399 # build the automata

418 for name, tree in rule_map.items():	400 for name, tree in rule_map.items():

419 self.__automata[name] = RuleProcessor.Automata(	401 self.__automata[name] = RuleProcessor.Automata(

420 parser_state.encoding, parser_state.character_classes, rule_map, name)	402 parser_state.encoding, parser_state.character_classes, rule_map, name)

421 # process default_action	403 # process default_action

422 default_action = parser_state.rules['default']['default_action']	404 default_action = parser_state.rules['default']['default_action']

423 self.__default_action = Action(Term.empty_term(), default_action)	405 self.__default_action = Action(Term.empty_term(), default_action)

OLD	NEW

« no previous file with comments | « tools/lexer_generator/regex_parser.py ('k') | no next file » | no next file with comments »