tools/lexer_generator/rule_parser.py - Issue 59403010: Experimental parser: easier to read rules and default rule

Side by Side Diff: tools/lexer_generator/rule_parser.py

Issue 59403010: Experimental parser: easier to read rules and default rule (Closed) Base URL: https://v8.googlecode.com/svn/branches/experimental/parser

Patch Set: Created 7 years, 1 month ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 # Copyright 2013 the V8 project authors. All rights reserved.	1 # Copyright 2013 the V8 project authors. All rights reserved.

2 # Redistribution and use in source and binary forms, with or without	2 # Redistribution and use in source and binary forms, with or without

3 # modification, are permitted provided that the following conditions are	3 # modification, are permitted provided that the following conditions are

4 # met:	4 # met:

5 #	5 #

6 # * Redistributions of source code must retain the above copyright	6 # * Redistributions of source code must retain the above copyright

7 # notice, this list of conditions and the following disclaimer.	7 # notice, this list of conditions and the following disclaimer.

8 # * Redistributions in binary form must reproduce the above	8 # * Redistributions in binary form must reproduce the above

9 # copyright notice, this list of conditions and the following	9 # copyright notice, this list of conditions and the following

10 # disclaimer in the documentation and/or other materials provided	10 # disclaimer in the documentation and/or other materials provided

(...skipping 18 matching lines...) Expand all Loading...
29 from rule_lexer import RuleLexer	29 from rule_lexer import RuleLexer

30 from regex_parser import RegexParser	30 from regex_parser import RegexParser

31 from nfa import NfaBuilder	31 from nfa import NfaBuilder

32 from transition_keys import TransitionKey	32 from transition_keys import TransitionKey

33	33

34 class RuleParserState:	34 class RuleParserState:

35	35

36 def __init__(self):	36 def __init__(self):

37 self.aliases = {	37 self.aliases = {

38 'eof' : RegexParser.parse("[\\0]"),	38 'eof' : RegexParser.parse("[\\0]"),

39 'any' : RegexParser.parse("."),

40 }	39 }

41 self.character_classes = {}	40 self.character_classes = {}

42 self.current_transition = None	41 self.current_state = None

43 self.rules = {}	42 self.rules = {}

44	43

45 def parse(self, string):	44 def parse(self, string):

46 return RuleParser.parse(string, self)	45 return RuleParser.parse(string, self)

47	46

48 class RuleParser:	47 class RuleParser:

49	48

50 tokens = RuleLexer.tokens	49 tokens = RuleLexer.tokens

51	50

52 def __init__(self):	51 def __init__(self):

53 self.__state = None	52 self.__state = None

54	53

55 def p_statements(self, p):	54 def p_statements(self, p):

56 'statements : statement maybe_statements'	55 'statements : aliases rules'

57	56

58 def p_maybe_statement(self, p):	57 def p_aliases(self, p):

59 '''maybe_statements : statements	58 '''aliases : alias_rule aliases

60 \| empty'''	59 \| empty'''

61

62 def p_statement(self, p):

63 '''statement : alias_rule

64 \| transition_rule'''

65	60

66 def p_alias_rule(self, p):	61 def p_alias_rule(self, p):

67 'alias_rule : IDENTIFIER EQUALS composite_regex SEMICOLON'	62 'alias_rule : IDENTIFIER EQUALS composite_regex SEMICOLON'

68 state = self.__state	63 state = self.__state

69 assert not p[1] in state.aliases	64 assert not p[1] in state.aliases

70 graph = p[3]	65 graph = p[3]

71 state.aliases[p[1]] = graph	66 state.aliases[p[1]] = graph

72 if graph[0] == 'CLASS' or graph[0] == 'NOT_CLASS':	67 if graph[0] == 'CLASS' or graph[0] == 'NOT_CLASS':

73 classes = state.character_classes	68 classes = state.character_classes

74 assert not p[1] in classes	69 assert not p[1] in classes

75 classes[p[1]] = TransitionKey.character_class(graph, classes)	70 classes[p[1]] = TransitionKey.character_class(graph, classes)

76	71

	72 def p_rules(self, p):

	73 '''rules : state_change transition_rules rules

	74 \| empty'''

	75

	76 def p_state_change(self, p):

	77 '''state_change : LESS_THAN IDENTIFIER GREATER_THAN

	78 \| LESS_THAN DEFAULT GREATER_THAN'''

	79 state = self.__state

	80 state.current_state = p[2]

	81 assert state.current_state

	82 if not state.current_state in state.rules:

	83 state.rules[state.current_state] = {

	84 'default': None,

	85 'regex' : []

	86 }

	87 p[0] = state.current_state

	88

	89 def p_transition_rules(self, p):

	90 '''transition_rules : transition_rule transition_rules

	91 \| empty'''

	92

77 def p_transition_rule(self, p):	93 def p_transition_rule(self, p):

78 '''transition_rule : transition composite_regex code	94 '''transition_rule : composite_regex_or_default code action

79 \| transition composite_regex TRANSITION IDENTIFIER	95 \| composite_regex_or_default empty action

80 \| transition composite_regex TRANSITION_WITH_CODE IDENTIFIER code'''	96 \| composite_regex_or_default code empty'''

81 transition = p[0]	97 rules = self.__state.rules[self.__state.current_state]

82 regex = p[2]	98 rule = (p[1], p[2], p[3])

83 rules = self.__state.rules[self.__state.current_transition]	99 if p[1] == 'default':

84 if len(p) == 4:	100 assert not rules['default']

85 rules.append(('simple', regex, None, p[3]))	101 rules['default'] = rule

86 elif len(p) == 5:

87 rules.append(('transition', regex, p[4], None))

88 elif len(p) == 6:

89 rules.append(('transition_with_code', regex, p[4], p[5]))

90 else:	102 else:

91 raise Exception()	103 rules['regex'].append(rule)

92	104

93 def p_transition(self, p):	105 def p_action(self, p):

94 '''transition : LESS_THAN IDENTIFIER GREATER_THAN'''	106 'action : ACTION_OPEN IDENTIFIER ACTION_CLOSE'

95 # \| empty''' TODO skipping transition without sr conflict	107 p[0] = p[2]

96 state = self.__state	108

97 if p[1]:	109 def p_composite_regex_or_default(self, p):

98 state.current_transition = p[2]	110 '''composite_regex_or_default : DEFAULT

99 assert state.current_transition	111 \| composite_regex'''

100 if not state.current_transition in state.rules:	112 p[0] = p[1]

101 state.rules[state.current_transition] = []

102 p[0] = state.current_transition

103	113

104 def p_composite_regex(self, p):	114 def p_composite_regex(self, p):

105 '''composite_regex : regex_parts OR regex_parts	115 '''composite_regex : regex_parts OR regex_parts

106 \| regex_parts'''	116 \| regex_parts'''

107 if len(p) == 2:	117 if len(p) == 2:

108 p[0] = p[1]	118 p[0] = p[1]

109 else:	119 else:

110 p[0] = NfaBuilder.or_graphs([p[1], p[3]])	120 p[0] = NfaBuilder.or_graphs([p[1], p[3]])

111	121

112 def p_regex_parts(self, p):	122 def p_regex_parts(self, p):

(...skipping 64 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
177 __static_instance = None	187 __static_instance = None

178 @staticmethod	188 @staticmethod

179 def parse(data, parser_state):	189 def parse(data, parser_state):

180 if not RuleParser.__static_instance:	190 if not RuleParser.__static_instance:

181 RuleParser.__static_instance = RuleParser()	191 RuleParser.__static_instance = RuleParser()

182 RuleParser.__static_instance.build()	192 RuleParser.__static_instance.build()

183 parser = RuleParser.__static_instance	193 parser = RuleParser.__static_instance

184 parser.__state = parser_state	194 parser.__state = parser_state

185 parser.parser.parse(data, lexer=parser.lexer.lexer)	195 parser.parser.parse(data, lexer=parser.lexer.lexer)

186 parser.__state = None	196 parser.__state = None

OLD	NEW

« no previous file with comments | « tools/lexer_generator/rule_lexer.py ('k') | no next file » | no next file with comments »