| OLD | NEW |
| (Empty) |
| 1 #!/usr/bin/python | |
| 2 # | |
| 3 # Copyright (c) 2012 The Native Client Authors. All rights reserved. | |
| 4 # Use of this source code is governed by a BSD-style license that can be | |
| 5 # found in the LICENSE file. | |
| 6 # | |
| 7 | |
| 8 """ | |
| 9 A simple recursive-descent parser for the table file format. | |
| 10 | |
| 11 The grammar implemented here is roughly (taking some liberties with whitespace | |
| 12 and comment parsing): | |
| 13 | |
| 14 table_file ::= table+ eof ; | |
| 15 | |
| 16 action ::= decoder_action arch | decoder_method | '"' | |
| 17 arch ::= '(' word+ ')' | |
| 18 citation ::= '(' word+ ')' | |
| 19 decoder_action ::= id (id (word (id)?)?)? | |
| 20 decoder_method ::= '->' id | |
| 21 footer ::= '+' '-' '-' | |
| 22 header ::= "|" (id '(' int (':' int)? ')')+ | |
| 23 int ::= word (where word is a sequence of digits) | |
| 24 id ::= word (where word is sequence of letters, digits and _) | |
| 25 parenthesized_exp ::= '(' (word | punctuation)+ ')' | |
| 26 pattern ::= 'word' | '-' | '"' | |
| 27 row ::= '|' pattern+ action | |
| 28 table ::= table_desc header row+ footer | |
| 29 table_desc ::= '+' '-' '-' id citation? | |
| 30 | |
| 31 If a decoder_action has more than one element, the interpretation is as follows: | |
| 32 id[0] = action (plus optional architecture) to apply. | |
| 33 id[1] = Arm rule action corresponds to. | |
| 34 word = Bit pattern of rule. | |
| 35 id[3] = Name defining additional constraints for match. | |
| 36 """ | |
| 37 | |
| 38 import re | |
| 39 import dgen_core | |
| 40 | |
| 41 def parse_tables(input): | |
| 42 """Entry point for the parser. Input should be a file or file-like.""" | |
| 43 parser = Parser() | |
| 44 return parser.parse(input) | |
| 45 | |
| 46 class Token(object): | |
| 47 """Holds a (characterized) unit of text for the parser.""" | |
| 48 | |
| 49 def __init__(self, kind, value=None): | |
| 50 self.kind = kind | |
| 51 self.value = value if value else kind | |
| 52 | |
| 53 class Parser(object): | |
| 54 """Parses a set of tables from the input file.""" | |
| 55 | |
| 56 def parse(self, input): | |
| 57 self.input = input # The remaining input to parse | |
| 58 decoder = dgen_core.Decoder() # The generated decoder of parse tables. | |
| 59 # Read tables while there. | |
| 60 while self._next_token().kind == '+': | |
| 61 self._table(decoder) | |
| 62 | |
| 63 if not self._next_token().kind == 'eof': | |
| 64 self._unexpected('unrecognized input found') | |
| 65 if not decoder.primary: | |
| 66 self._unexpected('No primary table defined') | |
| 67 if not decoder.tables(): | |
| 68 self._unexpected('No tables defined') | |
| 69 return decoder | |
| 70 | |
| 71 def __init__(self): | |
| 72 self._words = [] # Words left on current line, not yet parsed. | |
| 73 self._line_no = 0 # The current line being parsed | |
| 74 self._token = None # The next token from the input. | |
| 75 self._reached_eof = False # True when end of file reached | |
| 76 # Punctuation allowed. Must be ordered such that if | |
| 77 # p1 != p2 are in the list, and p1.startswith(p2), then | |
| 78 # p1 must appear before p2. | |
| 79 self._punctuation = ['->', '-', '+', '(', ')', '=', ':', '"', '|'] | |
| 80 | |
| 81 def _action(self, last_action, last_arch): | |
| 82 """ action ::= decoder_action arch | decoder_method | '"' """ | |
| 83 if self._next_token().kind == '"': | |
| 84 self._read_token('"') | |
| 85 return (last_action, last_arch) | |
| 86 if self._next_token().kind == '=': | |
| 87 action = self._decoder_action() | |
| 88 arch = None | |
| 89 if self._next_token().kind == '(': | |
| 90 arch = self._arch() | |
| 91 return (action, arch) | |
| 92 elif self._next_token().kind == '->': | |
| 93 return (self._decoder_method(), None) | |
| 94 else: | |
| 95 self._unexpected("Row doesn't define an action") | |
| 96 | |
| 97 def _pattern(self, col_no, last_patterns, last_action, last_arch): | |
| 98 pass | |
| 99 | |
| 100 def _arch(self): | |
| 101 """ arch ::= '(' word+ ')' """ | |
| 102 return ' '.join(self._parenthesized_exp()) | |
| 103 | |
| 104 def _citation(self): | |
| 105 """ citation ::= '(' word+ ')' """ | |
| 106 return ' '.join(self._parenthesized_exp()) | |
| 107 | |
| 108 def _read_id_or_none(self, read_id): | |
| 109 if self._next_token().kind in ['|', '+', '(']: | |
| 110 return None | |
| 111 id = self._id() if read_id else self._read_token('word').value | |
| 112 return None if id and id == 'None' else id | |
| 113 | |
| 114 def _decoder_action(self): | |
| 115 """ decoder_action ::= id (id (word (id)?)?)? """ | |
| 116 self._read_token('=') | |
| 117 name = self._read_id_or_none(True) | |
| 118 rule = self._read_id_or_none(True) | |
| 119 pattern = self._read_id_or_none(False) | |
| 120 constraints = self._read_id_or_none(True) | |
| 121 return dgen_core.DecoderAction(name, rule, pattern, constraints) | |
| 122 | |
| 123 def _decoder_method(self): | |
| 124 """ decoder_method ::= '->' id """ | |
| 125 self._read_token('->') | |
| 126 name = self._id() | |
| 127 return dgen_core.DecoderMethod(name) | |
| 128 | |
| 129 def _footer(self): | |
| 130 """ footer ::= '+' '-' '-' """ | |
| 131 self._read_token('+') | |
| 132 self._read_token('-') | |
| 133 self._read_token('-') | |
| 134 | |
| 135 def _header(self, table): | |
| 136 """ header ::= "|" (id '(' int (':' int)? ')')+ """ | |
| 137 self._read_token('|') | |
| 138 while not self._next_token().kind == '|': | |
| 139 name = self._read_token('word').value | |
| 140 self._read_token('(') | |
| 141 hi_bit = self._int() | |
| 142 lo_bit = hi_bit | |
| 143 if self._next_token().kind == ':': | |
| 144 self._read_token(':') | |
| 145 lo_bit = self._int() | |
| 146 self._read_token(')') | |
| 147 table.add_column(name, hi_bit, lo_bit) | |
| 148 | |
| 149 def _int(self): | |
| 150 """ int ::= word | |
| 151 | |
| 152 Int is a sequence of digits. Returns the corresponding integer. | |
| 153 """ | |
| 154 word = self._read_token('word').value | |
| 155 m = re.match(r'^([0-9]+)$', word) | |
| 156 if m: | |
| 157 return int(word) | |
| 158 else: | |
| 159 self._unexpected('integer expected but found "%s"' % word) | |
| 160 | |
| 161 def _id(self): | |
| 162 """ id ::= word | |
| 163 | |
| 164 Word starts with a letter, and followed by letters, digits, | |
| 165 and underscores. Returns the corresponding identifier. | |
| 166 """ | |
| 167 ident = self._read_token('word').value | |
| 168 m = re.match(r'^[a-zA-z][a-zA-z0-9_]*$', ident) | |
| 169 if not m: | |
| 170 self._unexpected('"%s" is not a valid identifier' % ident) | |
| 171 return ident | |
| 172 | |
| 173 def _parenthesized_exp(self, minlength=1): | |
| 174 """ parenthesized_exp ::= '(' (word | punctuation)+ ')' | |
| 175 | |
| 176 The punctuation doesn't include ')'. | |
| 177 Returns the sequence of token values parsed. | |
| 178 """ | |
| 179 self._read_token('(') | |
| 180 words = [] | |
| 181 while not self._at_eof() and self._next_token().kind != ')': | |
| 182 words.append(self._read_token().value) | |
| 183 if len(words) < minlength: | |
| 184 self._unexpected("len(parenthesized expresssion) < %s" % minlength) | |
| 185 self._read_token(')') | |
| 186 return words | |
| 187 | |
| 188 def _pattern(self, last_pattern): | |
| 189 """ pattern ::= 'word' | '-' | '"' | |
| 190 | |
| 191 Arguments are: | |
| 192 col_no - The current column entry being read. | |
| 193 last_patterns - The list of patterns defined on the last row. | |
| 194 last_action - The action defined on the last row. | |
| 195 last_arch - The architecture defined on the last row.. | |
| 196 """ | |
| 197 if self._next_token().kind == '"': | |
| 198 self._read_token('"') | |
| 199 return last_pattern | |
| 200 if self._next_token().kind in ['-', 'word']: | |
| 201 return self._read_token().value | |
| 202 self._unexpected('Malformed pattern') | |
| 203 | |
| 204 def _row(self, table, last_patterns=None, | |
| 205 last_action=None, last_arch= None): | |
| 206 """ row ::= '|' pattern+ (decoder_action arch? | decoder_method)? | |
| 207 | |
| 208 Passed in sequence of patterns and action from last row, | |
| 209 and returns list of patterns and action from this row. | |
| 210 """ | |
| 211 patterns = [] # Patterns as found on input. | |
| 212 expanded_patterns = [] # Patterns after being expanded. | |
| 213 self._read_token('|') | |
| 214 num_patterns = 0 | |
| 215 num_patterns_last = len(last_patterns) if last_patterns else None | |
| 216 while self._next_token().kind not in ['=', '->', '|', '+']: | |
| 217 if not last_patterns or num_patterns < num_patterns_last: | |
| 218 last_pattern = last_patterns[num_patterns] if last_patterns else None | |
| 219 pattern = self._pattern(last_pattern) | |
| 220 patterns.append(pattern) | |
| 221 expanded_patterns.append(table.define_pattern(pattern, num_patterns)) | |
| 222 num_patterns += 1 | |
| 223 else: | |
| 224 # Processed patterns in this row, since width is now the | |
| 225 # same as last row. | |
| 226 break; | |
| 227 | |
| 228 (action, arch) = self._action(last_action, last_arch) | |
| 229 table.add_row(expanded_patterns, action, arch) | |
| 230 return (patterns, action, arch) | |
| 231 | |
| 232 def _table(self, decoder): | |
| 233 """ table ::= table_desc header row+ footer """ | |
| 234 table = self._table_desc() | |
| 235 print 'Reading table %s...' % table.name | |
| 236 self._header(table) | |
| 237 (pattern, action, arch) = self._row(table) | |
| 238 while not self._next_token().kind == '+': | |
| 239 (pattern, action, arch) = self._row(table, pattern, action, arch) | |
| 240 if not decoder.add(table): | |
| 241 self._unexpected('Multiple tables with name %s' % table.name) | |
| 242 self._footer() | |
| 243 | |
| 244 def _table_desc(self): | |
| 245 """ table_desc ::= '+' '-' '-' id citation? """ | |
| 246 self._read_token('+') | |
| 247 self._read_token('-') | |
| 248 self._read_token('-') | |
| 249 name = self._id() | |
| 250 citation = None | |
| 251 if self._next_token().kind == '(': | |
| 252 citation = self._citation() | |
| 253 return dgen_core.Table(name, citation) | |
| 254 | |
| 255 def _at_eof(self): | |
| 256 """Returns true if next token is the eof token.""" | |
| 257 return self._next_token().kind == 'eof' | |
| 258 | |
| 259 def _read_token(self, kind=None): | |
| 260 """Reads and returns the next token from input.""" | |
| 261 token = self._next_token() | |
| 262 self._token = None | |
| 263 if kind and kind != token.kind: | |
| 264 self._unexpected('Expected "%s" but found "%s"' | |
| 265 % (kind, token.kind)) | |
| 266 return token | |
| 267 | |
| 268 def _next_token(self): | |
| 269 """Returns the next token from the input.""" | |
| 270 # First seee if cached. | |
| 271 if self._token: return self._token | |
| 272 | |
| 273 # If no more tokens left on the current line. read | |
| 274 # input till more tokens are found | |
| 275 while not self._reached_eof and not self._words: | |
| 276 self._words = self._read_line().split() | |
| 277 | |
| 278 if self._words: | |
| 279 # More tokens found. Convert the first word to a token. | |
| 280 word = self._words.pop(0) | |
| 281 # First remove any applicable punctuation. | |
| 282 for p in self._punctuation: | |
| 283 index = word.find(p) | |
| 284 if index == 0: | |
| 285 # Found punctuation, return it. | |
| 286 self._pushback(word[len(p):]) | |
| 287 self._token = Token(p) | |
| 288 return self._token | |
| 289 elif index > 0: | |
| 290 self._pushback(word[index:]) | |
| 291 word = word[:index] | |
| 292 # if reached, word doesn't contain any punctuation, so return it. | |
| 293 self._token = Token('word', word) | |
| 294 else: | |
| 295 # No more tokens found, assume eof. | |
| 296 self._token = Token('eof') | |
| 297 return self._token | |
| 298 | |
| 299 def _pushback(self, word): | |
| 300 """Puts word back onto the list of words.""" | |
| 301 if word: | |
| 302 self._words.insert(0, word) | |
| 303 | |
| 304 def _read_line(self): | |
| 305 """Reads the next line of input, and returns it. Otherwise None.""" | |
| 306 self._line_no += 1 | |
| 307 line = self.input.readline() | |
| 308 if line: | |
| 309 return re.sub(r'#.*', '', line).strip() | |
| 310 else: | |
| 311 self._reached_eof = True | |
| 312 return '' | |
| 313 | |
| 314 def _unexpected(self, context='Unexpected line in input'): | |
| 315 """"Reports that we didn't find the expected context. """ | |
| 316 raise Exception('Line %d: %s' % (self._line_no, context)) | |
| OLD | NEW |