tools/lexer_generator/rule_parser.py - Issue 59643002: Experimental parser: simplified rule lexer and parser

Side by Side Diff: tools/lexer_generator/rule_parser.py

Issue 59643002: Experimental parser: simplified rule lexer and parser (Closed) Base URL: https://v8.googlecode.com/svn/branches/experimental/parser

Patch Set: Created 7 years, 1 month ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 # Copyright 2013 the V8 project authors. All rights reserved.	1 # Copyright 2013 the V8 project authors. All rights reserved.

2 # Redistribution and use in source and binary forms, with or without	2 # Redistribution and use in source and binary forms, with or without

3 # modification, are permitted provided that the following conditions are	3 # modification, are permitted provided that the following conditions are

4 # met:	4 # met:

5 #	5 #

6 # * Redistributions of source code must retain the above copyright	6 # * Redistributions of source code must retain the above copyright

7 # notice, this list of conditions and the following disclaimer.	7 # notice, this list of conditions and the following disclaimer.

8 # * Redistributions in binary form must reproduce the above	8 # * Redistributions in binary form must reproduce the above

9 # copyright notice, this list of conditions and the following	9 # copyright notice, this list of conditions and the following

10 # disclaimer in the documentation and/or other materials provided	10 # disclaimer in the documentation and/or other materials provided

(...skipping 15 matching lines...) Expand all Loading...
26 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.	26 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

27	27

28 import ply.yacc as yacc	28 import ply.yacc as yacc

29 from rule_lexer import RuleLexer	29 from rule_lexer import RuleLexer

30	30

31 class RuleParser:	31 class RuleParser:

32	32

33 tokens = RuleLexer.tokens	33 tokens = RuleLexer.tokens

34	34

35 def __init__(self):	35 def __init__(self):

36 self.aliases = dict()	36 self.aliases = {}

37 self.transitions = dict()	37 self.current_transition = None

	38 self.rules = {}

38	39

39 def p_statement_alias(self, p):	40 def p_statements(self, p):

40 'statement : ALIAS EQUALS REGEX'	41 'statements : statement maybe_statements'

41 regex = self.lexer.lexer.lexmatch.group('regex')

42 self.aliases[p[1]] = regex

43	42

44 def p_statement_condition_transition(self, p):	43 def p_maybe_statement(self, p):

45 'statement : CONDITION_BEGIN CONDITION CONDITION_END REGEX_TRANSITION'	44 '''maybe_statements : statements

46 old_condition = p[2]	45 \| empty'''

47 regex = self.lexer.lexer.lexmatch.group('regex').strip()

48 new_condition = self.lexer.lexer.lexmatch.group('new')

49 if old_condition not in self.transitions:

50 self.transitions[old_condition] = dict()

51 self.transitions[old_condition][regex] = ('condition', new_condition)

52	46

53 def p_statement_condition_body(self, p):	47 def p_statement(self, p):

54 'statement : CONDITION_BEGIN CONDITION CONDITION_END REGEX_BODY'	48 '''statement : alias_rule

55 old_condition = p[2]	49 \| transition_rule'''

56 regex = self.lexer.lexer.lexmatch.group('regex').strip()

57 body = self.lexer.lexer.lexmatch.group('body').strip()

58 if old_condition not in self.transitions:

59 self.transitions[old_condition] = dict()

60 self.transitions[old_condition][regex] = ('body', body)

61	50

62 def p_statement_condition_transition_body(self, p):	51 def p_alias_rule(self, p):

63 'statement : CONDITION_BEGIN CONDITION CONDITION_END REGEX_TRANSITION_BODY'	52 'alias_rule : IDENTIFIER EQUALS composite_regex SEMICOLON'

64 old_condition = p[2]	53 assert not p[1] in self.aliases

65 regex = self.lexer.lexer.lexmatch.group('regex').strip()	54 self.aliases[p[1]] = p[3]

66 new_condition = self.lexer.lexer.lexmatch.group('new').strip()	55

67 body = self.lexer.lexer.lexmatch.group('body').strip()	56 def p_transition_rule(self, p):

68 if old_condition not in self.transitions:	57 '''transition_rule : transition composite_regex code

69 self.transitions[old_condition] = dict()	58 \| transition composite_regex TRANSITION IDENTIFIER

70 self.transitions[old_condition][regex] = (	59 \| transition composite_regex TRANSITION_WITH_CODE IDENTIFIER code'''

71 'condition_and_body', new_condition, body)	60 transition = p[0]

	61 regex = p[2]

	62 rules = self.rules[self.current_transition]

	63 if len(p) == 4:

	64 rules.append(('simple', regex, p[3]))

	65 elif len(p) == 5:

	66 rules.append(('transition', regex, p[4]))

	67 elif len(p) == 6:

	68 rules.append(('transition_with_code', regex, p[4], p[5]))

	69 else:

	70 raise Exception()

	71

	72 def p_transition(self, p):

	73 '''transition : LESS_THAN IDENTIFIER GREATER_THAN'''

	74 # \| empty''' TODO skipping transition without sr conflict

	75 if p[1]:

	76 self.current_transition = p[2]

	77 assert self.current_transition

	78 if not self.current_transition in self.rules:

	79 self.rules[self.current_transition] = []

	80 p[0] = self.current_transition

	81

	82 def p_composite_regex(self, p):

	83 '''composite_regex : regex_part OR regex_part maybe_regex_parts

	84 \| regex_part maybe_regex_parts'''

	85 if p[len(p)-1]:

	86 p[0] = p[1:]

	87 else:

	88 p[0] = p[1:-1]

	89

	90 def p_maybe_regex_part(self, p):

	91 '''maybe_regex_parts : composite_regex

	92 \| empty'''

	93 p[0] = p[1]

	94

	95 def p_regex_part(self, p):

	96 '''regex_part : LEFT_PARENTHESIS composite_regex RIGHT_PARENTHESIS modifier

	97 \| STRING_REGEX modifier

	98 \| CHARACTER_CLASS_REGEX modifier

	99 \| IDENTIFIER modifier'''

	100 if p[len(p)-1]:

	101 p[0] = p[1:]

	102 else:

	103 p[0] = p[1:-1]

	104

	105 def p_modifier(self, p):

	106 '''modifier : PLUS

	107 \| QUESTION_MARK

	108 \| STAR

	109 \| empty'''

	110 p[0] = p[1]

	111

	112 def p_code(self, p):

	113 'code : LEFT_BRACKET code_fragments RIGHT_BRACKET'

	114 p[0] = p[2].strip()

	115

	116 def p_code_fragments(self, p):

	117 '''code_fragments : CODE_FRAGMENT code_fragments

	118 \| empty'''

	119 p[0] = p[1]

	120 if len(p) == 3 and p[2]:

	121 p[0] = p[1] + p[2]

	122

	123 def p_empty(self, p):

	124 'empty :'

72	125

73 def p_error(self, p):	126 def p_error(self, p):

74 raise Exception("Syntax error in input '%s'" % p)	127 raise Exception("Syntax error in input '%s'" % p)

75	128

76 def build(self, **kwargs):	129 def build(self, **kwargs):

77 self.parser = yacc.yacc(module=self, **kwargs)	130 self.parser = yacc.yacc(module=self, debug=0, write_tables=0, **kwargs)

78 self.lexer = RuleLexer()	131 self.lexer = RuleLexer()

79 self.lexer.build(**kwargs)	132 self.lexer.build(**kwargs)

80	133

81 def parse(self, data):	134 def parse(self, data):

82 return self.parser.parse(data, lexer=self.lexer.lexer)	135 return self.parser.parse(data, lexer=self.lexer.lexer)

OLD	NEW

« no previous file with comments | « tools/lexer_generator/rule_lexer.py ('k') | no next file » | no next file with comments »