tools/lexer_generator/generator.py - Issue 66613002: Experimental lexer generator: Refactoring, merge Lexer and Generator.

Side by Side Diff: tools/lexer_generator/generator.py

Issue 66613002: Experimental lexer generator: Refactoring, merge Lexer and Generator. (Closed) Base URL: https://v8.googlecode.com/svn/branches/experimental/parser

Patch Set: Created 7 years, 1 month ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 # Copyright 2013 the V8 project authors. All rights reserved.	1 # Copyright 2013 the V8 project authors. All rights reserved.

2 # Redistribution and use in source and binary forms, with or without	2 # Redistribution and use in source and binary forms, with or without

3 # modification, are permitted provided that the following conditions are	3 # modification, are permitted provided that the following conditions are

4 # met:	4 # met:

5 #	5 #

6 # * Redistributions of source code must retain the above copyright	6 # * Redistributions of source code must retain the above copyright

7 # notice, this list of conditions and the following disclaimer.	7 # notice, this list of conditions and the following disclaimer.

8 # * Redistributions in binary form must reproduce the above	8 # * Redistributions in binary form must reproduce the above

9 # copyright notice, this list of conditions and the following	9 # copyright notice, this list of conditions and the following

10 # disclaimer in the documentation and/or other materials provided	10 # disclaimer in the documentation and/or other materials provided

(...skipping 44 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
55 %s	55 %s

56 </script>	56 </script>

57 '''	57 '''

58	58

59 load_template = ''' draw('%s', '%s');'''	59 load_template = ''' draw('%s', '%s');'''

60	60

61 load_outer_template = ''' <script>	61 load_outer_template = ''' <script>

62 %s	62 %s

63 </script>'''	63 </script>'''

64	64

65 def generate_html(data):	65 class Generator(object):

66 scripts = []

67 loads = []

68 for i, (name, nfa, dfa) in enumerate(data):

69 if name == 'Normal': continue

70 (nfa_i, dfa_i) = ("nfa_%d" % i, "dfa_%d" % i)

71 scripts.append(script_template % (nfa_i, nfa.to_dot()))

72 scripts.append(script_template % (dfa_i, dfa.to_dot()))

73 loads.append(load_template % ("nfa [%s]" % name, nfa_i))

74 loads.append(load_template % ("dfa [%s]" % name, dfa_i))

75 body = "\n".join(scripts) + (load_outer_template % "\n".join(loads))

76 return file_template % body

77	66

78 def process_rules(parser_state):	67 def __init__(self, rules):

79 rule_map = {}	68 parser_state = RuleParserState()

80 builder = NfaBuilder()	69 RuleParser.parse(rules, parser_state)

81 builder.set_character_classes(parser_state.character_classes)	70 self.__automata = {}

82 assert 'default' in parser_state.rules	71 self.process_rules(parser_state)

83 def process(k, v):	72

84 assert 'default' in v	73 def generate_html(self):

85 graphs = []	74 scripts = []

86 for (graph, action) in v['regex']:	75 loads = []

87 (precedence, code, transition) = action	76 for i, name in enumerate(self.__automata):

	77 (nfa, dfa) = self.__automata[name]

	78 if name == 'Normal': continue

	79 (nfa_i, dfa_i) = ("nfa_%d" % i, "dfa_%d" % i)

	80 scripts.append(script_template % (nfa_i, nfa.to_dot()))

	81 scripts.append(script_template % (dfa_i, dfa.to_dot()))

	82 loads.append(load_template % ("nfa [%s]" % name, nfa_i))

	83 loads.append(load_template % ("dfa [%s]" % name, dfa_i))

	84 body = "\n".join(scripts) + (load_outer_template % "\n".join(loads))

	85 return file_template % body

	86

	87 def process_rules(self, parser_state):

	88 rule_map = {}

	89 builder = NfaBuilder()

	90 builder.set_character_classes(parser_state.character_classes)

	91 assert 'default' in parser_state.rules

	92 def process(k, v):

	93 assert 'default' in v

	94 graphs = []

	95 for (graph, action) in v['regex']:

	96 (precedence, code, transition) = action

	97 if code:

	98 graph = NfaBuilder.add_action(graph, (precedence, code, None))

	99 if transition == 'continue':

	100 if not v['default'][1][2] == 'continue':

	101 graph = NfaBuilder.add_continue(graph)

	102 else:

	103 pass # TODO null key

	104 elif (transition == 'break' or

	105 transition == 'terminate' or

	106 transition == 'terminate_illegal'):

	107 graph = NfaBuilder.add_action(graph, (10000, transition, None))

	108 else:

	109 assert k == 'default'

	110 graph = NfaBuilder.join_subgraph(graph, transition, rule_map[transitio n])

	111 graphs.append(graph)

	112 graph = NfaBuilder.or_graphs(graphs)

	113 # merge default action

	114 (precedence, code, transition) = v['default'][1]

	115 assert transition == 'continue' or transition == 'break'

	116 if transition == 'continue':

	117 assert k != 'default'

	118 graph = NfaBuilder.add_incoming_action(graph, (10000, k, None))

88 if code:	119 if code:

89 graph = NfaBuilder.add_action(graph, (precedence, code, None))	120 graph = NfaBuilder.add_incoming_action(graph, (precedence, code, None))

90 if transition == 'continue':	121 rule_map[k] = graph

91 if not v['default'][1][2] == 'continue':	122 for k, v in parser_state.rules.items():

92 graph = NfaBuilder.add_continue(graph)	123 if k == 'default': continue

93 else:	124 process(k, v)

94 pass # TODO null key	125 process('default', parser_state.rules['default'])

95 elif (transition == 'break' or	126 for rule_name, graph in rule_map.items():

96 transition == 'terminate' or	127 nfa = builder.nfa(graph)

97 transition == 'terminate_illegal'):	128 (start, dfa_nodes) = nfa.compute_dfa()

98 graph = NfaBuilder.add_action(graph, (10000, transition, None))	129 dfa = Dfa(start, dfa_nodes)

99 else:	130 self.__automata[rule_name] = (nfa, dfa)

100 assert k == 'default'	131

101 graph = NfaBuilder.join_subgraph(graph, transition, rule_map[transition] )	132 # Lexes strings with the help of DFAs procuded by the grammar. For sanity

102 graphs.append(graph)	133 # checking the automata.

103 graph = NfaBuilder.or_graphs(graphs)	134 def lex(self, string):

104 # merge default action	135 (nfa, dfa) = self.__automata['default'] # FIXME

105 (precedence, code, transition) = v['default'][1]	136

106 assert transition == 'continue' or transition == 'break'	137 action_stream = []

107 if transition == 'continue':	138 terminate_seen = False

108 assert k != 'default'	139 offset = 0

109 graph = NfaBuilder.add_incoming_action(graph, (10000, k, None))	140 while not terminate_seen and string:

110 if code:	141 result = list(dfa.lex(string))

111 graph = NfaBuilder.add_incoming_action(graph, (precedence, code, None))	142 last_position = 0

112 rule_map[k] = graph	143 for (action, position) in result:

113 for k, v in parser_state.rules.items():	144 action_stream.append((action[1], action[2], last_position + offset, posi tion + 1 + offset, string[last_position:(position + 1)]))

114 if k == 'default': continue	145 last_position = position

115 process(k, v)	146 if action[2] == 'terminate':

116 process('default', parser_state.rules['default'])	147 terminate_seen = True

117 html_data = []	148 string = string[(last_position + 1):]

118 for rule_name, graph in rule_map.items():	149 offset += last_position

119 nfa = builder.nfa(graph)	150 return action_stream

120 (start, dfa_nodes) = nfa.compute_dfa()

121 dfa = Dfa(start, dfa_nodes)

122 html_data.append((rule_name, nfa, dfa))

123 return html_data

124	151

125 if __name__ == '__main__':	152 if __name__ == '__main__':

126	153

127 parser = argparse.ArgumentParser()	154 parser = argparse.ArgumentParser()

128 parser.add_argument('--html')	155 parser.add_argument('--html')

129 parser.add_argument('--re', default='src/lexer/lexer_py.re')	156 parser.add_argument('--re', default='src/lexer/lexer_py.re')

	157 parser.add_argument('--input')

130 args = parser.parse_args()	158 args = parser.parse_args()

131	159

132 re_file = args.re	160 re_file = args.re

133 parser_state = RuleParserState()	161 parser_state = RuleParserState()

134 print "parsing %s" % re_file	162 print "parsing %s" % re_file

135 with open(re_file, 'r') as f:	163 with open(re_file, 'r') as f:

136 RuleParser.parse(f.read(), parser_state)	164 generator = Generator(f.read())

137 html_data = process_rules(parser_state)

138	165

139 html_file = args.html	166 html_file = args.html

140 if html_file:	167 if html_file:

141 html = generate_html(html_data)	168 html = generator.generate_html()

142 with open(args.html, 'w') as f:	169 with open(args.html, 'w') as f:

143 f.write(html)	170 f.write(html)

144 print "wrote html to %s" % html_file	171 print "wrote html to %s" % html_file

	172

	173 input_file = args.input

	174 if input_file:

	175 with open(input_file, 'r') as f:

	176 input_text = f.read() + '\0'

	177 for t in generator.lex(input_text):

	178 print t

OLD	NEW

« no previous file with comments | « no previous file | tools/lexer_generator/lexer.py » ('j') | no next file with comments »