src/trusted/validator_arm/dgen_input.py - Issue 10191011: Moving dgen_ scripts out of validator_arm into a common directory.

Side by Side Diff: src/trusted/validator_arm/dgen_input.py

Issue 10191011: Moving dgen_ scripts out of validator_arm into a common directory. (Closed)

Patch Set: Updates per code review. Created 8 years, 8 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
	(Empty)
1 #!/usr/bin/python

2 #

3 # Copyright (c) 2012 The Native Client Authors. All rights reserved.

4 # Use of this source code is governed by a BSD-style license that can be

5 # found in the LICENSE file.

6 #

7

8 """

9 A simple recursive-descent parser for the table file format.

10

11 The grammar implemented here is roughly (taking some liberties with whitespace

12 and comment parsing):

13

14 table_file ::= table+ eof ;

15

16 action ::= decoder_action arch \| decoder_method \| '"'

17 arch ::= '(' word+ ')'

18 citation ::= '(' word+ ')'

19 decoder_action ::= id (id (word (id)?)?)?

20 decoder_method ::= '->' id

21 footer ::= '+' '-' '-'

22 header ::= "\|" (id '(' int (':' int)? ')')+

23 int ::= word (where word is a sequence of digits)

24 id ::= word (where word is sequence of letters, digits and _)

25 parenthesized_exp ::= '(' (word \| punctuation)+ ')'

26 pattern ::= 'word' \| '-' \| '"'

27 row ::= '\|' pattern+ action

28 table ::= table_desc header row+ footer

29 table_desc ::= '+' '-' '-' id citation?

30

31 If a decoder_action has more than one element, the interpretation is as follows:

32 id[0] = action (plus optional architecture) to apply.

33 id[1] = Arm rule action corresponds to.

34 word = Bit pattern of rule.

35 id[3] = Name defining additional constraints for match.

36 """

37

38 import re

39 import dgen_core

40

41 def parse_tables(input):

42 """Entry point for the parser. Input should be a file or file-like."""

43 parser = Parser()

44 return parser.parse(input)

45

46 class Token(object):

47 """Holds a (characterized) unit of text for the parser."""

48

49 def __init__(self, kind, value=None):

50 self.kind = kind

51 self.value = value if value else kind

52

53 class Parser(object):

54 """Parses a set of tables from the input file."""

55

56 def parse(self, input):

57 self.input = input # The remaining input to parse

58 decoder = dgen_core.Decoder() # The generated decoder of parse tables.

59 # Read tables while there.

60 while self._next_token().kind == '+':

61 self._table(decoder)

62

63 if not self._next_token().kind == 'eof':

64 self._unexpected('unrecognized input found')

65 if not decoder.primary:

66 self._unexpected('No primary table defined')

67 if not decoder.tables():

68 self._unexpected('No tables defined')

69 return decoder

70

71 def __init__(self):

72 self._words = [] # Words left on current line, not yet parsed.

73 self._line_no = 0 # The current line being parsed

74 self._token = None # The next token from the input.

75 self._reached_eof = False # True when end of file reached

76 # Punctuation allowed. Must be ordered such that if

77 # p1 != p2 are in the list, and p1.startswith(p2), then

78 # p1 must appear before p2.

79 self._punctuation = ['->', '-', '+', '(', ')', '=', ':', '"', '\|']

80

81 def _action(self, last_action, last_arch):

82 """ action ::= decoder_action arch \| decoder_method \| '"' """

83 if self._next_token().kind == '"':

84 self._read_token('"')

85 return (last_action, last_arch)

86 if self._next_token().kind == '=':

87 action = self._decoder_action()

88 arch = None

89 if self._next_token().kind == '(':

90 arch = self._arch()

91 return (action, arch)

92 elif self._next_token().kind == '->':

93 return (self._decoder_method(), None)

94 else:

95 self._unexpected("Row doesn't define an action")

96

97 def _pattern(self, col_no, last_patterns, last_action, last_arch):

98 pass

99

100 def _arch(self):

101 """ arch ::= '(' word+ ')' """

102 return ' '.join(self._parenthesized_exp())

103

104 def _citation(self):

105 """ citation ::= '(' word+ ')' """

106 return ' '.join(self._parenthesized_exp())

107

108 def _read_id_or_none(self, read_id):

109 if self._next_token().kind in ['\|', '+', '(']:

110 return None

111 id = self._id() if read_id else self._read_token('word').value

112 return None if id and id == 'None' else id

113

114 def _decoder_action(self):

115 """ decoder_action ::= id (id (word (id)?)?)? """

116 self._read_token('=')

117 name = self._read_id_or_none(True)

118 rule = self._read_id_or_none(True)

119 pattern = self._read_id_or_none(False)

120 constraints = self._read_id_or_none(True)

121 return dgen_core.DecoderAction(name, rule, pattern, constraints)

122

123 def _decoder_method(self):

124 """ decoder_method ::= '->' id """

125 self._read_token('->')

126 name = self._id()

127 return dgen_core.DecoderMethod(name)

128

129 def _footer(self):

130 """ footer ::= '+' '-' '-' """

131 self._read_token('+')

132 self._read_token('-')

133 self._read_token('-')

134

135 def _header(self, table):

136 """ header ::= "\|" (id '(' int (':' int)? ')')+ """

137 self._read_token('\|')

138 while not self._next_token().kind == '\|':

139 name = self._read_token('word').value

140 self._read_token('(')

141 hi_bit = self._int()

142 lo_bit = hi_bit

143 if self._next_token().kind == ':':

144 self._read_token(':')

145 lo_bit = self._int()

146 self._read_token(')')

147 table.add_column(name, hi_bit, lo_bit)

148

149 def _int(self):

150 """ int ::= word

151

152 Int is a sequence of digits. Returns the corresponding integer.

153 """

154 word = self._read_token('word').value

155 m = re.match(r'^([0-9]+)$', word)

156 if m:

157 return int(word)

158 else:

159 self._unexpected('integer expected but found "%s"' % word)

160

161 def _id(self):

162 """ id ::= word

163

164 Word starts with a letter, and followed by letters, digits,

165 and underscores. Returns the corresponding identifier.

166 """

167 ident = self._read_token('word').value

168 m = re.match(r'^[a-zA-z][a-zA-z0-9_]*$', ident)

169 if not m:

170 self._unexpected('"%s" is not a valid identifier' % ident)

171 return ident

172

173 def _parenthesized_exp(self, minlength=1):

174 """ parenthesized_exp ::= '(' (word \| punctuation)+ ')'

175

176 The punctuation doesn't include ')'.

177 Returns the sequence of token values parsed.

178 """

179 self._read_token('(')

180 words = []

181 while not self._at_eof() and self._next_token().kind != ')':

182 words.append(self._read_token().value)

183 if len(words) < minlength:

184 self._unexpected("len(parenthesized expresssion) < %s" % minlength)

185 self._read_token(')')

186 return words

187

188 def _pattern(self, last_pattern):

189 """ pattern ::= 'word' \| '-' \| '"'

190

191 Arguments are:

192 col_no - The current column entry being read.

193 last_patterns - The list of patterns defined on the last row.

194 last_action - The action defined on the last row.

195 last_arch - The architecture defined on the last row..

196 """

197 if self._next_token().kind == '"':

198 self._read_token('"')

199 return last_pattern

200 if self._next_token().kind in ['-', 'word']:

201 return self._read_token().value

202 self._unexpected('Malformed pattern')

203

204 def _row(self, table, last_patterns=None,

205 last_action=None, last_arch= None):

206 """ row ::= '\|' pattern+ (decoder_action arch? \| decoder_method)?

207

208 Passed in sequence of patterns and action from last row,

209 and returns list of patterns and action from this row.

210 """

211 patterns = [] # Patterns as found on input.

212 expanded_patterns = [] # Patterns after being expanded.

213 self._read_token('\|')

214 num_patterns = 0

215 num_patterns_last = len(last_patterns) if last_patterns else None

216 while self._next_token().kind not in ['=', '->', '\|', '+']:

217 if not last_patterns or num_patterns < num_patterns_last:

218 last_pattern = last_patterns[num_patterns] if last_patterns else None

219 pattern = self._pattern(last_pattern)

220 patterns.append(pattern)

221 expanded_patterns.append(table.define_pattern(pattern, num_patterns))

222 num_patterns += 1

223 else:

224 # Processed patterns in this row, since width is now the

225 # same as last row.

226 break;

227

228 (action, arch) = self._action(last_action, last_arch)

229 table.add_row(expanded_patterns, action, arch)

230 return (patterns, action, arch)

231

232 def _table(self, decoder):

233 """ table ::= table_desc header row+ footer """

234 table = self._table_desc()

235 print 'Reading table %s...' % table.name

236 self._header(table)

237 (pattern, action, arch) = self._row(table)

238 while not self._next_token().kind == '+':

239 (pattern, action, arch) = self._row(table, pattern, action, arch)

240 if not decoder.add(table):

241 self._unexpected('Multiple tables with name %s' % table.name)

242 self._footer()

243

244 def _table_desc(self):

245 """ table_desc ::= '+' '-' '-' id citation? """

246 self._read_token('+')

247 self._read_token('-')

248 self._read_token('-')

249 name = self._id()

250 citation = None

251 if self._next_token().kind == '(':

252 citation = self._citation()

253 return dgen_core.Table(name, citation)

254

255 def _at_eof(self):

256 """Returns true if next token is the eof token."""

257 return self._next_token().kind == 'eof'

258

259 def _read_token(self, kind=None):

260 """Reads and returns the next token from input."""

261 token = self._next_token()

262 self._token = None

263 if kind and kind != token.kind:

264 self._unexpected('Expected "%s" but found "%s"'

265 % (kind, token.kind))

266 return token

267

268 def _next_token(self):

269 """Returns the next token from the input."""

270 # First seee if cached.

271 if self._token: return self._token

272

273 # If no more tokens left on the current line. read

274 # input till more tokens are found

275 while not self._reached_eof and not self._words:

276 self._words = self._read_line().split()

277

278 if self._words:

279 # More tokens found. Convert the first word to a token.

280 word = self._words.pop(0)

281 # First remove any applicable punctuation.

282 for p in self._punctuation:

283 index = word.find(p)

284 if index == 0:

285 # Found punctuation, return it.

286 self._pushback(word[len(p):])

287 self._token = Token(p)

288 return self._token

289 elif index > 0:

290 self._pushback(word[index:])

291 word = word[:index]

292 # if reached, word doesn't contain any punctuation, so return it.

293 self._token = Token('word', word)

294 else:

295 # No more tokens found, assume eof.

296 self._token = Token('eof')

297 return self._token

298

299 def _pushback(self, word):

300 """Puts word back onto the list of words."""

301 if word:

302 self._words.insert(0, word)

303

304 def _read_line(self):

305 """Reads the next line of input, and returns it. Otherwise None."""

306 self._line_no += 1

307 line = self.input.readline()

308 if line:

309 return re.sub(r'#.*', '', line).strip()

310 else:

311 self._reached_eof = True

312 return ''

313

314 def _unexpected(self, context='Unexpected line in input'):

315 """"Reports that we didn't find the expected context. """

316 raise Exception('Line %d: %s' % (self._line_no, context))

OLD	NEW

« no previous file with comments | « src/trusted/validator_arm/dgen_dump.py ('k') | src/trusted/validator_arm/dgen_opt.py » ('j') | no next file with comments »