tools/lexer_generator/lexer.py - Issue 60663007: Experimental lexer generator: First draft of a Python lexer (based on the automata).

Side by Side Diff: tools/lexer_generator/lexer.py

Issue 60663007: Experimental lexer generator: First draft of a Python lexer (based on the automata). (Closed) Base URL: https://v8.googlecode.com/svn/branches/experimental/parser

Patch Set: . Created 7 years, 1 month ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 # Copyright 2013 the V8 project authors. All rights reserved.	1 # Copyright 2013 the V8 project authors. All rights reserved.

2 # Redistribution and use in source and binary forms, with or without	2 # Redistribution and use in source and binary forms, with or without

3 # modification, are permitted provided that the following conditions are	3 # modification, are permitted provided that the following conditions are

4 # met:	4 # met:

5 #	5 #

6 # * Redistributions of source code must retain the above copyright	6 # * Redistributions of source code must retain the above copyright

7 # notice, this list of conditions and the following disclaimer.	7 # notice, this list of conditions and the following disclaimer.

8 # * Redistributions in binary form must reproduce the above	8 # * Redistributions in binary form must reproduce the above

9 # copyright notice, this list of conditions and the following	9 # copyright notice, this list of conditions and the following

10 # disclaimer in the documentation and/or other materials provided	10 # disclaimer in the documentation and/or other materials provided

(...skipping 12 matching lines...) Expand all Loading...
23 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY	23 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY

24 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT	24 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT

25 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE	25 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE

26 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.	26 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

27	27

28 import argparse	28 import argparse

29 from nfa import Nfa, NfaBuilder	29 from nfa import Nfa, NfaBuilder

30 from dfa import Dfa	30 from dfa import Dfa

31 from rule_parser import RuleParser, RuleParserState	31 from rule_parser import RuleParser, RuleParserState

32	32

33 file_template = '''	33 # FIXME: We need to move this to a common place!

34 <html>

35 <head>

36 <script src="viz.js"></script>

37 <script>

38 function draw(name, id) {

39 code = document.getElementById(id).innerHTML

40 document.body.innerHTML += "<h1>" + name + "</h1>";

41 try {

42 document.body.innerHTML += Viz(code, 'svg');

43 } catch(e) {

44 document.body.innerHTML += "<h3>error</h3>";

45 }

46 }

47 </script>

48 </head>

49 <body>

50 %s

51 </body>

52 </html>'''

53

54 script_template = ''' <script type="text/vnd.graphviz" id="%s">

55 %s

56 </script>

57 '''

58

59 load_template = ''' draw('%s', '%s');'''

60

61 load_outer_template = ''' <script>

62 %s

63 </script>'''

64

65 def generate_html(data):

66 scripts = []

67 loads = []

68 for i, (name, nfa, dfa) in enumerate(data):

69 if name == 'Normal': continue

70 (nfa_i, dfa_i) = ("nfa_%d" % i, "dfa_%d" % i)

71 scripts.append(script_template % (nfa_i, nfa.to_dot()))

72 scripts.append(script_template % (dfa_i, dfa.to_dot()))

73 loads.append(load_template % ("nfa [%s]" % name, nfa_i))

74 loads.append(load_template % ("dfa [%s]" % name, dfa_i))

75 body = "\n".join(scripts) + (load_outer_template % "\n".join(loads))

76 return file_template % body

77

78 def process_rules(parser_state):	34 def process_rules(parser_state):

79 rule_map = {}	35 dfas = {}

80 builder = NfaBuilder()	36 builder = NfaBuilder()

81 builder.set_character_classes(parser_state.character_classes)	37 builder.set_character_classes(parser_state.character_classes)

82 assert 'default' in parser_state.rules

83 for k, v in parser_state.rules.items():	38 for k, v in parser_state.rules.items():

84 assert 'default' in v

85 graphs = []	39 graphs = []

86 for (graph, action) in v['regex']:	40 for (graph, action) in v['regex']:

87 graphs.append(NfaBuilder.add_action(graph, action))	41 graphs.append(NfaBuilder.add_action(graph, action))

88 rule_map[k] = NfaBuilder.or_graphs(graphs)	42 nfa = builder.nfa(NfaBuilder.or_graphs(graphs))

89 html_data = []	43 (start_name, dfa_nodes) = nfa.compute_dfa()

90 for rule_name, graph in rule_map.items():	44 dfas[k] = Dfa(start_name, dfa_nodes)

91 nfa = builder.nfa(graph)	45 return dfas

92 (start, dfa_nodes) = nfa.compute_dfa()	46

93 dfa = Dfa(start, dfa_nodes)	47 # Lexes strings with the help of DFAs procuded by the grammar. For sanity

94 html_data.append((rule_name, nfa, dfa))	48 # checking the automata.

95 return html_data	49 class Lexer(object):

	50

	51 def __init__(self, rules):

	52 parser_state = RuleParserState()

	53 RuleParser.parse(rules, parser_state)

	54 self.dfas = process_rules(parser_state)

	55

	56 def lex(self, string):

	57 dfa = self.dfas['default'] # FIXME

	58

	59 action_stream = []

	60 terminate_seen = False

	61 offset = 0

	62 while not terminate_seen and string:

	63 result = list(dfa.lex(string))

	64 last_position = 0

	65 for (action, position) in result:

	66 action_stream.append((action[1], action[2], last_position + offset, posi tion + 1 + offset, string[last_position:(position + 1)]))

	67 last_position = position

	68 if action[2] == 'terminate':

	69 terminate_seen = True

	70 string = string[(last_position + 1):]

	71 offset += last_position

	72 return action_stream

96	73

97 if __name__ == '__main__':	74 if __name__ == '__main__':

98	75

99 parser = argparse.ArgumentParser()	76 parser = argparse.ArgumentParser()

100 parser.add_argument('--html')	77 parser.add_argument('--rules')

	78 parser.add_argument('--input')

101 args = parser.parse_args()	79 args = parser.parse_args()

102	80

103 re_file = 'src/lexer/lexer_py.re'	81 re_file = args.rules

	82 input_file = args.input

104	83

105 parser_state = RuleParserState()

106 with open(re_file, 'r') as f:	84 with open(re_file, 'r') as f:

107 RuleParser.parse(f.read(), parser_state)	85 rules = f.read()

108 html_data = process_rules(parser_state)	86 with open(input_file, 'r') as f:

	87 input_text = f.read() + '\0'

109	88

110 html_file = args.html	89 lexer = Lexer(rules)

111 if html_file:	90 for t in lexer.lex(input_text):

112 html = generate_html(html_data)	91 print t

113 with open(args.html, 'w') as f:

114 f.write(html)

115 print "wrote html to %s" % html_file

OLD	NEW

« no previous file with comments | « tools/lexer_generator/dfa.py ('k') | tools/lexer_generator/lexer_test.py » ('j') | no next file with comments »