OLD | NEW |
| (Empty) |
1 #!/usr/bin/python | |
2 # | |
3 # Copyright (c) 2012 The Native Client Authors. All rights reserved. | |
4 # Use of this source code is governed by a BSD-style license that can be | |
5 # found in the LICENSE file. | |
6 # | |
7 | |
8 """ | |
9 A simple recursive-descent parser for the table file format. | |
10 | |
11 The grammar implemented here is roughly (taking some liberties with whitespace | |
12 and comment parsing): | |
13 | |
14 table_file ::= table+ eof ; | |
15 | |
16 action ::= decoder_action arch | decoder_method | '"' | |
17 arch ::= '(' word+ ')' | |
18 citation ::= '(' word+ ')' | |
19 decoder_action ::= id (id (word (id)?)?)? | |
20 decoder_method ::= '->' id | |
21 footer ::= '+' '-' '-' | |
22 header ::= "|" (id '(' int (':' int)? ')')+ | |
23 int ::= word (where word is a sequence of digits) | |
24 id ::= word (where word is sequence of letters, digits and _) | |
25 parenthesized_exp ::= '(' (word | punctuation)+ ')' | |
26 pattern ::= 'word' | '-' | '"' | |
27 row ::= '|' pattern+ action | |
28 table ::= table_desc header row+ footer | |
29 table_desc ::= '+' '-' '-' id citation? | |
30 | |
31 If a decoder_action has more than one element, the interpretation is as follows: | |
32 id[0] = action (plus optional architecture) to apply. | |
33 id[1] = Arm rule action corresponds to. | |
34 word = Bit pattern of rule. | |
35 id[3] = Name defining additional constraints for match. | |
36 """ | |
37 | |
38 import re | |
39 import dgen_core | |
40 | |
41 def parse_tables(input): | |
42 """Entry point for the parser. Input should be a file or file-like.""" | |
43 parser = Parser() | |
44 return parser.parse(input) | |
45 | |
46 class Token(object): | |
47 """Holds a (characterized) unit of text for the parser.""" | |
48 | |
49 def __init__(self, kind, value=None): | |
50 self.kind = kind | |
51 self.value = value if value else kind | |
52 | |
53 class Parser(object): | |
54 """Parses a set of tables from the input file.""" | |
55 | |
56 def parse(self, input): | |
57 self.input = input # The remaining input to parse | |
58 decoder = dgen_core.Decoder() # The generated decoder of parse tables. | |
59 # Read tables while there. | |
60 while self._next_token().kind == '+': | |
61 self._table(decoder) | |
62 | |
63 if not self._next_token().kind == 'eof': | |
64 self._unexpected('unrecognized input found') | |
65 if not decoder.primary: | |
66 self._unexpected('No primary table defined') | |
67 if not decoder.tables(): | |
68 self._unexpected('No tables defined') | |
69 return decoder | |
70 | |
71 def __init__(self): | |
72 self._words = [] # Words left on current line, not yet parsed. | |
73 self._line_no = 0 # The current line being parsed | |
74 self._token = None # The next token from the input. | |
75 self._reached_eof = False # True when end of file reached | |
76 # Punctuation allowed. Must be ordered such that if | |
77 # p1 != p2 are in the list, and p1.startswith(p2), then | |
78 # p1 must appear before p2. | |
79 self._punctuation = ['->', '-', '+', '(', ')', '=', ':', '"', '|'] | |
80 | |
81 def _action(self, last_action, last_arch): | |
82 """ action ::= decoder_action arch | decoder_method | '"' """ | |
83 if self._next_token().kind == '"': | |
84 self._read_token('"') | |
85 return (last_action, last_arch) | |
86 if self._next_token().kind == '=': | |
87 action = self._decoder_action() | |
88 arch = None | |
89 if self._next_token().kind == '(': | |
90 arch = self._arch() | |
91 return (action, arch) | |
92 elif self._next_token().kind == '->': | |
93 return (self._decoder_method(), None) | |
94 else: | |
95 self._unexpected("Row doesn't define an action") | |
96 | |
97 def _pattern(self, col_no, last_patterns, last_action, last_arch): | |
98 pass | |
99 | |
100 def _arch(self): | |
101 """ arch ::= '(' word+ ')' """ | |
102 return ' '.join(self._parenthesized_exp()) | |
103 | |
104 def _citation(self): | |
105 """ citation ::= '(' word+ ')' """ | |
106 return ' '.join(self._parenthesized_exp()) | |
107 | |
108 def _read_id_or_none(self, read_id): | |
109 if self._next_token().kind in ['|', '+', '(']: | |
110 return None | |
111 id = self._id() if read_id else self._read_token('word').value | |
112 return None if id and id == 'None' else id | |
113 | |
114 def _decoder_action(self): | |
115 """ decoder_action ::= id (id (word (id)?)?)? """ | |
116 self._read_token('=') | |
117 name = self._read_id_or_none(True) | |
118 rule = self._read_id_or_none(True) | |
119 pattern = self._read_id_or_none(False) | |
120 constraints = self._read_id_or_none(True) | |
121 return dgen_core.DecoderAction(name, rule, pattern, constraints) | |
122 | |
123 def _decoder_method(self): | |
124 """ decoder_method ::= '->' id """ | |
125 self._read_token('->') | |
126 name = self._id() | |
127 return dgen_core.DecoderMethod(name) | |
128 | |
129 def _footer(self): | |
130 """ footer ::= '+' '-' '-' """ | |
131 self._read_token('+') | |
132 self._read_token('-') | |
133 self._read_token('-') | |
134 | |
135 def _header(self, table): | |
136 """ header ::= "|" (id '(' int (':' int)? ')')+ """ | |
137 self._read_token('|') | |
138 while not self._next_token().kind == '|': | |
139 name = self._read_token('word').value | |
140 self._read_token('(') | |
141 hi_bit = self._int() | |
142 lo_bit = hi_bit | |
143 if self._next_token().kind == ':': | |
144 self._read_token(':') | |
145 lo_bit = self._int() | |
146 self._read_token(')') | |
147 table.add_column(name, hi_bit, lo_bit) | |
148 | |
149 def _int(self): | |
150 """ int ::= word | |
151 | |
152 Int is a sequence of digits. Returns the corresponding integer. | |
153 """ | |
154 word = self._read_token('word').value | |
155 m = re.match(r'^([0-9]+)$', word) | |
156 if m: | |
157 return int(word) | |
158 else: | |
159 self._unexpected('integer expected but found "%s"' % word) | |
160 | |
161 def _id(self): | |
162 """ id ::= word | |
163 | |
164 Word starts with a letter, and followed by letters, digits, | |
165 and underscores. Returns the corresponding identifier. | |
166 """ | |
167 ident = self._read_token('word').value | |
168 m = re.match(r'^[a-zA-z][a-zA-z0-9_]*$', ident) | |
169 if not m: | |
170 self._unexpected('"%s" is not a valid identifier' % ident) | |
171 return ident | |
172 | |
173 def _parenthesized_exp(self, minlength=1): | |
174 """ parenthesized_exp ::= '(' (word | punctuation)+ ')' | |
175 | |
176 The punctuation doesn't include ')'. | |
177 Returns the sequence of token values parsed. | |
178 """ | |
179 self._read_token('(') | |
180 words = [] | |
181 while not self._at_eof() and self._next_token().kind != ')': | |
182 words.append(self._read_token().value) | |
183 if len(words) < minlength: | |
184 self._unexpected("len(parenthesized expresssion) < %s" % minlength) | |
185 self._read_token(')') | |
186 return words | |
187 | |
188 def _pattern(self, last_pattern): | |
189 """ pattern ::= 'word' | '-' | '"' | |
190 | |
191 Arguments are: | |
192 col_no - The current column entry being read. | |
193 last_patterns - The list of patterns defined on the last row. | |
194 last_action - The action defined on the last row. | |
195 last_arch - The architecture defined on the last row.. | |
196 """ | |
197 if self._next_token().kind == '"': | |
198 self._read_token('"') | |
199 return last_pattern | |
200 if self._next_token().kind in ['-', 'word']: | |
201 return self._read_token().value | |
202 self._unexpected('Malformed pattern') | |
203 | |
204 def _row(self, table, last_patterns=None, | |
205 last_action=None, last_arch= None): | |
206 """ row ::= '|' pattern+ (decoder_action arch? | decoder_method)? | |
207 | |
208 Passed in sequence of patterns and action from last row, | |
209 and returns list of patterns and action from this row. | |
210 """ | |
211 patterns = [] # Patterns as found on input. | |
212 expanded_patterns = [] # Patterns after being expanded. | |
213 self._read_token('|') | |
214 num_patterns = 0 | |
215 num_patterns_last = len(last_patterns) if last_patterns else None | |
216 while self._next_token().kind not in ['=', '->', '|', '+']: | |
217 if not last_patterns or num_patterns < num_patterns_last: | |
218 last_pattern = last_patterns[num_patterns] if last_patterns else None | |
219 pattern = self._pattern(last_pattern) | |
220 patterns.append(pattern) | |
221 expanded_patterns.append(table.define_pattern(pattern, num_patterns)) | |
222 num_patterns += 1 | |
223 else: | |
224 # Processed patterns in this row, since width is now the | |
225 # same as last row. | |
226 break; | |
227 | |
228 (action, arch) = self._action(last_action, last_arch) | |
229 table.add_row(expanded_patterns, action, arch) | |
230 return (patterns, action, arch) | |
231 | |
232 def _table(self, decoder): | |
233 """ table ::= table_desc header row+ footer """ | |
234 table = self._table_desc() | |
235 print 'Reading table %s...' % table.name | |
236 self._header(table) | |
237 (pattern, action, arch) = self._row(table) | |
238 while not self._next_token().kind == '+': | |
239 (pattern, action, arch) = self._row(table, pattern, action, arch) | |
240 if not decoder.add(table): | |
241 self._unexpected('Multiple tables with name %s' % table.name) | |
242 self._footer() | |
243 | |
244 def _table_desc(self): | |
245 """ table_desc ::= '+' '-' '-' id citation? """ | |
246 self._read_token('+') | |
247 self._read_token('-') | |
248 self._read_token('-') | |
249 name = self._id() | |
250 citation = None | |
251 if self._next_token().kind == '(': | |
252 citation = self._citation() | |
253 return dgen_core.Table(name, citation) | |
254 | |
255 def _at_eof(self): | |
256 """Returns true if next token is the eof token.""" | |
257 return self._next_token().kind == 'eof' | |
258 | |
259 def _read_token(self, kind=None): | |
260 """Reads and returns the next token from input.""" | |
261 token = self._next_token() | |
262 self._token = None | |
263 if kind and kind != token.kind: | |
264 self._unexpected('Expected "%s" but found "%s"' | |
265 % (kind, token.kind)) | |
266 return token | |
267 | |
268 def _next_token(self): | |
269 """Returns the next token from the input.""" | |
270 # First seee if cached. | |
271 if self._token: return self._token | |
272 | |
273 # If no more tokens left on the current line. read | |
274 # input till more tokens are found | |
275 while not self._reached_eof and not self._words: | |
276 self._words = self._read_line().split() | |
277 | |
278 if self._words: | |
279 # More tokens found. Convert the first word to a token. | |
280 word = self._words.pop(0) | |
281 # First remove any applicable punctuation. | |
282 for p in self._punctuation: | |
283 index = word.find(p) | |
284 if index == 0: | |
285 # Found punctuation, return it. | |
286 self._pushback(word[len(p):]) | |
287 self._token = Token(p) | |
288 return self._token | |
289 elif index > 0: | |
290 self._pushback(word[index:]) | |
291 word = word[:index] | |
292 # if reached, word doesn't contain any punctuation, so return it. | |
293 self._token = Token('word', word) | |
294 else: | |
295 # No more tokens found, assume eof. | |
296 self._token = Token('eof') | |
297 return self._token | |
298 | |
299 def _pushback(self, word): | |
300 """Puts word back onto the list of words.""" | |
301 if word: | |
302 self._words.insert(0, word) | |
303 | |
304 def _read_line(self): | |
305 """Reads the next line of input, and returns it. Otherwise None.""" | |
306 self._line_no += 1 | |
307 line = self.input.readline() | |
308 if line: | |
309 return re.sub(r'#.*', '', line).strip() | |
310 else: | |
311 self._reached_eof = True | |
312 return '' | |
313 | |
314 def _unexpected(self, context='Unexpected line in input'): | |
315 """"Reports that we didn't find the expected context. """ | |
316 raise Exception('Line %d: %s' % (self._line_no, context)) | |
OLD | NEW |