src/trusted/validator/x86/decoder/generator/ncdecode_forms.h - Issue 625923004: Delete old x86 validator.

Side by Side Diff: src/trusted/validator/x86/decoder/generator/ncdecode_forms.h

Issue 625923004: Delete old x86 validator. (Closed) Base URL: svn://svn.chromium.org/native_client/trunk/src/native_client

Patch Set: rebase master Created 6 years, 2 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
	(Empty)
1 /*

2 * Copyright (c) 2012 The Native Client Authors. All rights reserved.

3 * Use of this source code is governed by a BSD-style license that can be

4 * found in the LICENSE file.

5 */

6

7 /*

8 * Set of predefined instruction forms (via procedure calls), providing

9 * a more concise way of specifying opcodes.

10 */

11

12 #ifndef NATIVE_CLIENT_SRC_TRUSTED_VALIDATOR_X86_DECODER_GENERATOR_NCDECODE_FORMS _H__

13 #define NATIVE_CLIENT_SRC_TRUSTED_VALIDATOR_X86_DECODER_GENERATOR_NCDECODE_FORMS _H__

14

15 #ifndef NACL_TRUSTED_BUT_NOT_TCB

16 #error("This file is not meant for use in the TCB")

17 #endif

18

19 #include "native_client/src/trusted/validator/x86/decoder/ncopcode_desc.h"

20

21 struct NaClSymbolTable;

22

23 /* Defines the general category of instruction, and is used to define

24 * set/use values for instructions. That is, most X86 instructions

25 * have the form:

26 *

27 * OP Dest, Source

28 *

29 * where OP is some mnemonic name, the first argument is the DEST (because

30 * side effecting operations change this value), and Source is a second (use)

31 * argument.

32 *

33 * Note: Unary operands assume form:

34 *

35 * OP Dest

36 *

37 * Reading the text associated with each instruction, one should be able to

38 * categorize (most) instructions, into one of the following:

39 */

40 typedef enum NaClInstCat {

41 /* The following are for categorizing operands with a single operand. */

42 UnarySet, /* The value of Dest is set to a predetermined value. */

43 UnaryUpdate, /* Dest := f(Dest) for some f. */

44 /* The following are for categorizing operations with 2 or more operands. */

45 Move, /* Dest := f(Source) for some f. */

46 O2Move, /* Dest1, Dest2 = f(source) for some f. */

47 Binary, /* Dest := f(Dest, Source) for some f. */

48 O2Binary, /* Dest1,Dest2 = f(Dest2, Source) for some f. */

49 Nary, /* Dest := f(dest, source1, ..., sourcen) for some f. */

50 O1Nary, /* Dest := f(source1, ..., sourcen) for some f. */

51 O3Nary, /* Dest1,Dest2,Dest3 = f(source1, ..., sourcen) for some f. */

52 Compare, /* Sets flag using f(Dest, Source). The value of Dest is not

53 * modified.

54 */

55 Exchange, /* Dest := f(Dest, Source) for some f, and

56 * Source := g(Dest, Source) for some g.

57 */

58 Push, /* Implicit first (stack) argument is updated, and the

59 * value of the Dest is not modified.

60 */

61 Pop, /* Implicit first (stack) argument is updated, and

62 * dest := f() for some f (i.e. f gets the value on

63 * top of the stack).

64 */

65 Call, /* Implicit ip first argument that is updated. Stack second

66 * argument that is updated. Third argument is used.

67 */

68 SysCall, /* Implicit ip first argument that is updated, Implicit register

69 * second argument that is set.

70 */

71 SysJump, /* First four arguments are set (eip, esp, cs, ss). Remaining

72 * (if any) are used.

73 */

74 Return, /* Implicit ip first argument that is set. Stack second

75 * argument that is updated. Third argument, if given, is used.

76 */

77 SysRet, /* Implicit ip first argument that is set. Implicit register

78 * second argument that is used.

79 */

80 Jump, /* Implicit first (IP) argument is updated to the

81 * value of the Dest argument.

82 */

83 Uses, /* All arguments are uses. */

84 Sets, /* All arguments are set. */

85 Lea, /* Address calculation, and hence, operand 2 is neither used

86 * nor set.

87 */

88 Cpuid, /* Sets all, uses starting with third. */

89 Other, /* No implicit set/use implications. */

90 } NaClInstCat;

91

92 /* Defines the maximum length of an opcode sequence descriptor (see

93 * comment for typedef NaClOpcodeSeq). Note: two extra bytes have been added

94 * for SL(x) and END_OPCODE_SEQ entries.

95 */

96 #define NACL_OPCODE_SEQ_SIZE (NACL_MAX_OPCODE_BYTES + 2)

97

98 /* Models an opcode sequence. Used by NaClInInstructionSet to describe

99 * an instruction implemented by a sequence of bytes. Macro SL(N) is used

100 * to describe an additional value N, which appears in the modrm mod field.

101 * Macro END_OPCODE_SEQ is an placeholder, ignore value, defining the end of the

102 * opcode sequence.

103 *

104 * 0..256 => Opcode byte.

105 * PR(N) => prefix byte value N.

106 * SL(N) => /N

107 * END_OPCODE_SEQ => Not part of prefix.

108 */

109 typedef int16_t NaClOpcodeSeq[NACL_OPCODE_SEQ_SIZE];

110

111 /* Value denoting the end of an opcode sequence (descriptor). */

112 #define END_OPCODE_SEQ 512

113

114 /* Define value in modrm (i.e. /n in opcode sequence). */

115 #define SL(n) (-((n) + 1))

116

117 /* Define prefix value for opcode sequence. */

118 #define PR(n) SL(n) - END_OPCODE_SEQ

119

120 /* Model an instruction by its mnemonic and opcode sequence. */

121 typedef struct NaClNameOpcodeSeq {

122 NaClMnemonic name;

123 NaClOpcodeSeq opcode_seq;

124 } NaClNameOpcodeSeq;

125

126 /* Returns true iff the current instruction has one of the given mnemonic names,

127 * or is defined by one of the name and opcode sequences. Note: It is safe to

128 * pass NULL for names or name_and_opcode_seq, if the corresponding size

129 * parameter is zero.

130 */

131 Bool NaClInInstructionSet(const NaClMnemonic* names,

132 size_t names_size,

133 const NaClNameOpcodeSeq* name_and_opcode_seq,

134 size_t name_and_opcode_seq_size);

135

136 /* Model of a an operand processing function. */

137 typedef void (*NaClDefOperand)(void);

138

139 /***************************************************************************

140 * This section is the new API for defining instructions. It uses a string,

141 * describing the instruction to model. In addition, a symbol table is passed

142 * in to define possible substitutions.

143 *

144 * The string defining the instruction is called an "opcode description string".

145 *

146 * Examples: The following are some examples of opcode description strings.

147 *

148 * "06: Push {%@sp}, %es" - Defines (opcode 06) that pushes register es

149 * "07: Pop {%@sp}, %es" - Defines (opcode 07) that pops into register es.

150 * "69/r: Imul $Gv, $Ev, $Iz" - Defines (opcode 69) a signed multiply.

151 * "0fba/7: Btc $Ev, $Ib" - Defines (opcode 0f ba, with opcode extension

152 * 7 in the modrm mod field) a bit test and complement.

153 * "90+@i: Xchg $r8v, $rAX" - Defines (opcode 90+i) exchange register/memory

154 * with register.

155 *

156 * A (symbol table) substitution is defined as follows:

157 *

158 * (1) It begins with the character '@';

159 * (2) Its name is an alphanumeric sequence; and

160 * (3) The name is terminated by a character in the charset ' :+/{}'.

161 *

162 * The general form of an opcode description string is a sequence of

163 * hex values defining the opcode prefix, and the opcode byte. This

164 * sequence of values must be terminated with a colon (:). No spaces

165 * are allowed in this sequence.

166 *

167 * If the instruction uses the modrm byte, a '/r' must immediately follow

168 * the sequence of hex values (and must appear before the colon).

169 *

170 * If the instruction is continued in the modrm mod field (i.e. a value 0..7),

171 * the characters /N (where N is in 0..7) must immediately follow the sequence

172 * of hex values (and must appear before the colon).

173 *

174 * If the instruction encodes a register value as part of the opcode byte,

175 * the value of the register defined is the string '+R' (where R is in 0..7),

176 * and must immediately follow the sequence of hex values (and must appear

177 * before the colon).

178

179 * Note: If the instruction uses an operand print form that uses the modrm

180 * value (such as $E or $G), then it is not necessary to add the

181 * /r suffix to the sequence of hex values.

182 *

183 * After the colon, the mnemonic name of the instruction must appear. An

184 * arbitrary number of spaces can appear between the colon, and the mnemonic

185 * name. The mnemonic name is then followed by zero or more operands.

186 * Each operand can be separated by an arbitrary sequence of spaces and/or

187 * commas.

188 *

189 * Each operand specifies a register and/or memory address. An operand

190 * may not contain spaces.

191 *

192 * If the operand is implicit (i.e. should not appear when printing a

193 * decoded instruction), it should be enclosed in curly braces. In general,

194 * we put implicit operands first, but there are no rules defining where an

195 * implicit operand may appear.

196 *

197 * A register begins with the character '%', and is followed by its name.

198 * Register names are case insensitive. Legal values are any operand kind

199 * defined in ncopcode_operand_kind.enum that begins with the text 'Reg'.

200 *

201 * A print form begins with the character '$", and is followed by a name.

202 * Print forms are, in general, defined by Appendex section A.1 - Opcode-Syntax

203 * Notation in AMD document 24594-Rev.3.14-September 2007, "AMD64 Architecture

204 * Programmer's manual Volume 3: General-Purpose and System Instructions".

205 * Exceptions are made for descriptions used in that appendex, but are

206 * not documented in this section. For clarity, the rules are explicitly

207 * defined as follows: A print form consists of a FORM, followed by

208 * a SIZE specification.

209 *

210 * Valid FORMs are:

211 * A - Far pointer is encoded in the instruction.

212 * C - Control register specified by the ModRM reg field.

213 * D - Debug register specified by the ModRM reg field.

214 * E - General purpose register or memory operand specified by the ModRm

215 * byte. Memory addresses can be computed from a segment register,

216 * SIB byte, and/or displacement.

217 * F - rFLAGS register.

218 * G - General purpose register specified by the ModRm reg field.

219 * I - Immediate value.

220 * J - The instruction includes a relative offset that is added to the rIP

221 * register.

222 * M - A memory operand specified by the ModRM byte.

223 * O - The offset of an operand is encoded in the instruction. There is no

224 * ModRm byte in the instruction. Complex addressing using the SIB byte

225 * cannot be done.

226 * P - 64-bit MMX register specified by the ModRM reg field.

227 * PR - 64 bit MMX register specified by the ModRM r/m field. The ModRM mod

228 * field must be 11b.

229 * Q - 64 bit MMX register or memory operand specified by the ModRM byte.

230 * Memory addresses can be computed from a segment register, SIB byte,

231 * and/or displacement.

232 * R - General purpose register specified by the ModRM r/m field. The ModRM

233 * mod field must be 11b.

234 * S - Segment register specified by the ModRM reg field.

235 * U - The R/M field of the ModR/M byte selects a 128-bit XMM register.

236 * V - 128-bit XMM register specified by the ModRM reg field.

237 * VR - 128-bit XMM register specified by the ModRM r/m field. The ModRM mod

238 * field must be 11b.

239 * W - 128 Xmm register or memory operand specified by the ModRm Byte. Memory

240 * addresses can be computed from a segment register, SIB byte, and/or

241 * displacement.

242 * X - A memory operand addressed by the DS.rSI registers. Used in string

243 * instructions.

244 * Y - A memory operand addressed by the ES.rDI registers. Used in string

245 * instructions.

246 * Z - A memory operand addressed by the DS.rDI registers. Used in maskmov

247 * instructions.

248 * r8 - The 8 registers rAX, rCX, rDX, rBX, rSP, rBP, rSI, rDI, and the

249 * optional registers r8-r15 if REX.b is set, based on the register value

250 * embedded in the opcode.

251 * SG - segment address defined by a G expression and the segment register in

252 * the corresponding mnemonic (lds, les, lfs, lgs, lss).

253 * rAX - The register AX, EAX, or RAX, depending on SIZE.

254 * rBP - The register BP, EBP, or RBP, depending on SIZE.

255 * rBX - The register BX, EBX, or RBX, depending on SIZE.

256 * rCX - The register CX, ECX, or RCX, depending on SIZE.

257 * rDI - The register DI, EDI, or RDI, depending on SIZE.

258 * rDX - The register DX, EDX, or RDX, depending on SIZE.

259 * rSI - The register SI, ESI, or RSI, depending on SIZE.

260 * rSP - The register SP, ESP, or RSP, depending on SIZE.

261 *

262 * Note: r8 is not in the manual cited above. It has been added to deal with

263 * instructions with an embedded register in the opcode. In such cases, this

264 * value allows a single defining call to be used (within a for loop),

265 * rather than writing eight separate rules (one for each possible register

266 * value).

267 *

268 * Note: SG is also not in the manual cited above. It has been added to deal

269 * with the instructions lds, les, lfs, lgs, and lss, which generate a

270 * segment address from a General purpose register specified in the ModRm reg

271 * field.

272 *

273 * Note: Z is also not in the manual cited above. It has been added to deal with

274 * the implicit argument of maskmov instructions.

275 *

276 * Valid SIZEs are:

277 * a - Two 16-bit or 32-bit memory operands, depending on the effective

278 * operand size. Used in the BOUND instruction.

279 * b - A byte, irrespective of the effective operand size.

280 * d - A doubleword (32-bits), irrespective of the effective operand size.

281 * dq - A double-quadword (128 bits), irrespective of the effective operand

282 * size.

283 * p - A 32-bit or 48-bit far pointer, depending on the effective operand

284 * size.

285 * pd - A 128-bit double-precision floating point vector operand (packed

286 * double).

287 * pi - A 64-bit MMX operand (packed integer).

288 * ps - A 128-bit single precision floating point vector operand (packed

289 * single).

290 * q - A quadword, irrespective of the effective operand size.

291 * s - A 6-byte or 10-byte pseudo-descriptor.

292 * sd - A scalar double-precision floating point operand (scalar double).

293 * si - A scalar doubleword (32-bit) integer operand (scalar integer).

294 * ss - A scalar single-precision floating-point operand (scalar single).

295 * w - A word, irrespective of the effective operand size.

296 * v - A word, doubleword, or quadword, depending on the effective operand

297 * size.

298 * va - A word, doubleword, or quadword, depending on the effective address

299 * size.

300 * vw - A word only when the effective operand size matches.

301 * vd - A doubleword only when the effective operand size matches.

302 * vq - A quadword only when the effective operand size matches.

303 * w - A word, irrespective of the effective operand size.

304 * z - A word if the effective operand size is 16 bits, or a doubleword

305 * if the effective operand size is 32 or 64 bits.

306 * zw - A word only when the effective operand size matches.

307 * zd - A doubleword only when the effective operand size is 32 or 64 bits.

308 * f - A memory access (of small size, i.e. less than 100 bytes),

309 * irrespective of the operand size (as modified by the prefix 66,

310 and the Rex.w prefix). Should only be used with $M arguments.

311 * When this size modifier $Mf is used (unlike $M which allows

312 * prefix 66), prefix 66 is illegal.

313 * Note: When $Mf is used, the (small) size differences are not

314 * important for the validator. Hence, it doesn't matter if we are

315 * more accurate.

316 *

317 * Note: vw, vd, vq, zw, and zd are not in the manual cited above. However,

318 * they have been added so that sub-variants of an v/z instruction (not

319 * specified in the manual) can be specified.

320 *

321 * In addition, this code adds the following special print forms:

322 * One - The literal constant 1.

323 *

324 * Note: The AMD manual uses some slash notations (such as d/q) which isn't

325 * explicitly defined. In general, we allow such notation as specified in

326 * the AMD manual. Depending on the use, it can mean any of the following:

327 * (1) In 32-bit mode, d is used. In 64-bit mode, q is used.

328 * (2) only 32-bit or 64-bit values are allowed.

329 * In addition, when the nmemonic name changes based on which value is chosen

330 * in d/q, we use d/q/d to denote the 32-bit case, and d/q/q to denote the

331 * 64 bit case.

332 *

333 * Because some instructions may need to add flags and/or additional operands

334 * outside the string context, instructions are modeled using a pair of calls

335 * (i.e. a Begin and End form). The Begin form starts defining the instruction,

336 * and the End form completes and installs the modeled instruction. Any

337 * additional model changes for the instruction being defined should

338 * appear between these call pairs.

339 *

340 * For instructions not needing to do special touchups, a simpler Define form

341 * exists that simply dispatches calls to the corresponding Begin and End forms.

342 ***************************************************************************/

343

344 /* Defines target machine.*/

345 typedef enum {

346 T32, /* 32 only. */

347 T64, /* 64 only. */

348 Tall, /* both 32 and 64. */

349 } NaClTargetPlatform;

350

351 /* Defines the beginning of the modeling of a platform instruction.

352 * Parameters are:

353 * platform - The platform(s) the instruction applies to.

354 * desc - the opcode description string.

355 * insttype - The category of the instruction (defines the effects of CPUID).

356 * st - The symbol table to use while defining the instruction.

357 */

358 void NaClBegDefPlatform(NaClTargetPlatform platform,

359 const char* desc, NaClInstType insttype,

360 struct NaClSymbolTable* st);

361

362 /* Defines the beginning of the modeling of both a x86-32 and x86-64

363 * instruction.

364 * Parameters are:

365 * desc - the opcode description string.

366 * insttype - The category of the instruction (defines the effects of CPUID).

367 * st - The symbol table to use while defining the instruction.

368 */

369 void NaClBegDef(const char* desc, NaClInstType insttype,

370 struct NaClSymbolTable* st);

371

372 /* Defines the beginning of the modeling of a x86-32 instruction without

373 * an equivalent x86-64 version.

374 * Parameters are:

375 * desc - the opcode description string.

376 * insttype - The category of the instruction (defines the effects of CPUID).

377 * st - The symbol table to use while defining the instruction.

378 */

379 void NaClBegD32(const char* desc, NaClInstType insttype,

380 struct NaClSymbolTable* st);

381

382 /* Defines the beginning of the modeling of a x86-64 instruction without

383 * an equivalent x86-32 version.

384 * Parameters are:

385 * desc - the opcode description string.

386 * insttype - The category of the instruction (defines the effects of CPUID).

387 * st - The symbol table to use while defining the instruction.

388 */

389 void NaClBegD64(const char* desc, NaClInstType insttype,

390 struct NaClSymbolTable* st);

391

392 /* Defines the end of the modeling of an instruction. Must be paired with

393 * a call to NaClBegDef, NaClBegD32, or NaClBegD64.

394 * Parameters are:

395 * icat - The set/use categorization for the instruction being defined.

396 */

397 void NaClEndDef(NaClInstCat icat);

398

399 /* Defines a platform instruction, using dispatching

400 * calls to NaClBegDefPlatform and NaClEndDef.

401 * Parameters are:

402 * platform - the platform(s) the instruction applies to.

403 * desc - the opcode description string.

404 * insttype - The category of the instruction (defines the effects of CPUID).

405 * st - The symbol table to use while defining the instruction.

406 * icat - The set/use categorization for the instruction being defined.

407 */

408 void NaClDefinePlatform(NaClTargetPlatform platform,

409 const char* desc, NaClInstType insttype,

410 struct NaClSymbolTable* st, NaClInstCat cat);

411

412 /* Defines both a x86-32 and x86-64 instruction, using dispatching

413 * calls to NaClBegDef and NaClEndDef.

414 * Parameters are:

415 * desc - the opcode description string.

416 * insttype - The category of the instruction (defines the effects of CPUID).

417 * st - The symbol table to use while defining the instruction.

418 * icat - The set/use categorization for the instruction being defined.

419 */

420 void NaClDefine(const char* desc, NaClInstType insttype,

421 struct NaClSymbolTable* st, NaClInstCat cat);

422

423 /* Defines a x86-32 instruction without an equivalent x86-64 version, using

424 * dispatching calls to NaClBegD32 and NaClEndDef.

425 * Parameters are:

426 * desc - the opcode description string.

427 * insttype - The category of the instruction (defines the effects of CPUID).

428 * st - The symbol table to use while defining the instruction.

429 * icat - The set/use categorization for the instruction being defined.

430 */

431 void NaClDef_32(const char* desc, NaClInstType insttype,

432 struct NaClSymbolTable* st, NaClInstCat cat);

433

434 /* Defines a x86-64 instruction without an equivalent x86-32 version, using

435 * dispatching calls to NaClBegD32 and NaClEndDef.

436 * Parameters are:

437 * desc - the opcode description string.

438 * insttype - The category of the instruction (defines the effects of CPUID).

439 * st - The symbol table to use while defining the instruction.

440 * icat - The set/use categorization for the instruction being defined.

441 */

442 void NaClDef_64(const char* desc, NaClInstType insttype,

443 struct NaClSymbolTable* st, NaClInstCat cat);

444

445

446 /* Defines a set of instructions, for all values of min <= i <= max (bound

447 * in a local symbol table), using calls to NaClDefine on the remaining

448 * arguments. In addition, opcodes of the form "xx+@i:", within the description

449 * string are automatically added to generate the opcode value xx+i.

450 * In addition, the value of min and max must be between 0 and 7.

451 * Parameters are:

452 * desc - the opcode description string.

453 * min - The starting value to iterate i on.

454 * max - The ending value to iterate i on.

455 * insttype - The category of the instruction (defines the effects of CPUID).

456 * st - The symbol table to use while defining the instruction.

457 * icat - The set/use categorization for the instruction being defined.

458 */

459 void NaClDefIter(const char* desc, int min, int max,

460 NaClInstType insttype, struct NaClSymbolTable* st,

461 NaClInstCat cat);

462

463 /* Defines a set of instructions, for all values of min <= reg <= max (bound

464 * in a local symbol), using calls to NaClDefine on the remaining arguments.

465 * In addition, the value of min and max must be between 0 and 255. Typically

466 * used to generate register values that are part of the opcode.

467 * Parameters are:

468 * desc - the opcode description string.

469 * min - The starting value to iterate reg on.

470 * max - The ending value to iterate reg on.

471 * insttype - The category of the instruction (defines the effects of CPUID).

472 * st - The symbol table to use while defining the instruction.

473 * icat - The set/use categorization for the instruction being defined.

474 */

475 void NaClDefReg(const char* desc, int min, int max,

476 NaClInstType insttype, struct NaClSymbolTable* st,

477 NaClInstCat cat);

478

479

480 #endif /* NATIVE_CLIENT_SRC_TRUSTED_VALIDATOR_X86_DECODER_GENERATOR_NCDECODE_FOR MS_H__ */

OLD	NEW