Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(3)

Side by Side Diff: src/trusted/validator/x86/decoder/generator/ncdecode_forms.h

Issue 625923004: Delete old x86 validator. (Closed) Base URL: svn://svn.chromium.org/native_client/trunk/src/native_client
Patch Set: rebase master Created 6 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
(Empty)
1 /*
2 * Copyright (c) 2012 The Native Client Authors. All rights reserved.
3 * Use of this source code is governed by a BSD-style license that can be
4 * found in the LICENSE file.
5 */
6
7 /*
8 * Set of predefined instruction forms (via procedure calls), providing
9 * a more concise way of specifying opcodes.
10 */
11
12 #ifndef NATIVE_CLIENT_SRC_TRUSTED_VALIDATOR_X86_DECODER_GENERATOR_NCDECODE_FORMS _H__
13 #define NATIVE_CLIENT_SRC_TRUSTED_VALIDATOR_X86_DECODER_GENERATOR_NCDECODE_FORMS _H__
14
15 #ifndef NACL_TRUSTED_BUT_NOT_TCB
16 #error("This file is not meant for use in the TCB")
17 #endif
18
19 #include "native_client/src/trusted/validator/x86/decoder/ncopcode_desc.h"
20
21 struct NaClSymbolTable;
22
23 /* Defines the general category of instruction, and is used to define
24 * set/use values for instructions. That is, most X86 instructions
25 * have the form:
26 *
27 * OP Dest, Source
28 *
29 * where OP is some mnemonic name, the first argument is the DEST (because
30 * side effecting operations change this value), and Source is a second (use)
31 * argument.
32 *
33 * Note: Unary operands assume form:
34 *
35 * OP Dest
36 *
37 * Reading the text associated with each instruction, one should be able to
38 * categorize (most) instructions, into one of the following:
39 */
40 typedef enum NaClInstCat {
41 /* The following are for categorizing operands with a single operand. */
42 UnarySet, /* The value of Dest is set to a predetermined value. */
43 UnaryUpdate, /* Dest := f(Dest) for some f. */
44 /* The following are for categorizing operations with 2 or more operands. */
45 Move, /* Dest := f(Source) for some f. */
46 O2Move, /* Dest1, Dest2 = f(source) for some f. */
47 Binary, /* Dest := f(Dest, Source) for some f. */
48 O2Binary, /* Dest1,Dest2 = f(Dest2, Source) for some f. */
49 Nary, /* Dest := f(dest, source1, ..., sourcen) for some f. */
50 O1Nary, /* Dest := f(source1, ..., sourcen) for some f. */
51 O3Nary, /* Dest1,Dest2,Dest3 = f(source1, ..., sourcen) for some f. */
52 Compare, /* Sets flag using f(Dest, Source). The value of Dest is not
53 * modified.
54 */
55 Exchange, /* Dest := f(Dest, Source) for some f, and
56 * Source := g(Dest, Source) for some g.
57 */
58 Push, /* Implicit first (stack) argument is updated, and the
59 * value of the Dest is not modified.
60 */
61 Pop, /* Implicit first (stack) argument is updated, and
62 * dest := f() for some f (i.e. f gets the value on
63 * top of the stack).
64 */
65 Call, /* Implicit ip first argument that is updated. Stack second
66 * argument that is updated. Third argument is used.
67 */
68 SysCall, /* Implicit ip first argument that is updated, Implicit register
69 * second argument that is set.
70 */
71 SysJump, /* First four arguments are set (eip, esp, cs, ss). Remaining
72 * (if any) are used.
73 */
74 Return, /* Implicit ip first argument that is set. Stack second
75 * argument that is updated. Third argument, if given, is used.
76 */
77 SysRet, /* Implicit ip first argument that is set. Implicit register
78 * second argument that is used.
79 */
80 Jump, /* Implicit first (IP) argument is updated to the
81 * value of the Dest argument.
82 */
83 Uses, /* All arguments are uses. */
84 Sets, /* All arguments are set. */
85 Lea, /* Address calculation, and hence, operand 2 is neither used
86 * nor set.
87 */
88 Cpuid, /* Sets all, uses starting with third. */
89 Other, /* No implicit set/use implications. */
90 } NaClInstCat;
91
92 /* Defines the maximum length of an opcode sequence descriptor (see
93 * comment for typedef NaClOpcodeSeq). Note: two extra bytes have been added
94 * for SL(x) and END_OPCODE_SEQ entries.
95 */
96 #define NACL_OPCODE_SEQ_SIZE (NACL_MAX_OPCODE_BYTES + 2)
97
98 /* Models an opcode sequence. Used by NaClInInstructionSet to describe
99 * an instruction implemented by a sequence of bytes. Macro SL(N) is used
100 * to describe an additional value N, which appears in the modrm mod field.
101 * Macro END_OPCODE_SEQ is an placeholder, ignore value, defining the end of the
102 * opcode sequence.
103 *
104 * 0..256 => Opcode byte.
105 * PR(N) => prefix byte value N.
106 * SL(N) => /N
107 * END_OPCODE_SEQ => Not part of prefix.
108 */
109 typedef int16_t NaClOpcodeSeq[NACL_OPCODE_SEQ_SIZE];
110
111 /* Value denoting the end of an opcode sequence (descriptor). */
112 #define END_OPCODE_SEQ 512
113
114 /* Define value in modrm (i.e. /n in opcode sequence). */
115 #define SL(n) (-((n) + 1))
116
117 /* Define prefix value for opcode sequence. */
118 #define PR(n) SL(n) - END_OPCODE_SEQ
119
120 /* Model an instruction by its mnemonic and opcode sequence. */
121 typedef struct NaClNameOpcodeSeq {
122 NaClMnemonic name;
123 NaClOpcodeSeq opcode_seq;
124 } NaClNameOpcodeSeq;
125
126 /* Returns true iff the current instruction has one of the given mnemonic names,
127 * or is defined by one of the name and opcode sequences. Note: It is safe to
128 * pass NULL for names or name_and_opcode_seq, if the corresponding size
129 * parameter is zero.
130 */
131 Bool NaClInInstructionSet(const NaClMnemonic* names,
132 size_t names_size,
133 const NaClNameOpcodeSeq* name_and_opcode_seq,
134 size_t name_and_opcode_seq_size);
135
136 /* Model of a an operand processing function. */
137 typedef void (*NaClDefOperand)(void);
138
139 /***************************************************************************
140 * This section is the new API for defining instructions. It uses a string,
141 * describing the instruction to model. In addition, a symbol table is passed
142 * in to define possible substitutions.
143 *
144 * The string defining the instruction is called an "opcode description string".
145 *
146 * Examples: The following are some examples of opcode description strings.
147 *
148 * "06: Push {%@sp}, %es" - Defines (opcode 06) that pushes register es
149 * "07: Pop {%@sp}, %es" - Defines (opcode 07) that pops into register es.
150 * "69/r: Imul $Gv, $Ev, $Iz" - Defines (opcode 69) a signed multiply.
151 * "0fba/7: Btc $Ev, $Ib" - Defines (opcode 0f ba, with opcode extension
152 * 7 in the modrm mod field) a bit test and complement.
153 * "90+@i: Xchg $r8v, $rAX" - Defines (opcode 90+i) exchange register/memory
154 * with register.
155 *
156 * A (symbol table) substitution is defined as follows:
157 *
158 * (1) It begins with the character '@';
159 * (2) Its name is an alphanumeric sequence; and
160 * (3) The name is terminated by a character in the charset ' :+/{}'.
161 *
162 * The general form of an opcode description string is a sequence of
163 * hex values defining the opcode prefix, and the opcode byte. This
164 * sequence of values must be terminated with a colon (:). No spaces
165 * are allowed in this sequence.
166 *
167 * If the instruction uses the modrm byte, a '/r' must immediately follow
168 * the sequence of hex values (and must appear before the colon).
169 *
170 * If the instruction is continued in the modrm mod field (i.e. a value 0..7),
171 * the characters /N (where N is in 0..7) must immediately follow the sequence
172 * of hex values (and must appear before the colon).
173 *
174 * If the instruction encodes a register value as part of the opcode byte,
175 * the value of the register defined is the string '+R' (where R is in 0..7),
176 * and must immediately follow the sequence of hex values (and must appear
177 * before the colon).
178
179 * Note: If the instruction uses an operand print form that uses the modrm
180 * value (such as $E or $G), then it is not necessary to add the
181 * /r suffix to the sequence of hex values.
182 *
183 * After the colon, the mnemonic name of the instruction must appear. An
184 * arbitrary number of spaces can appear between the colon, and the mnemonic
185 * name. The mnemonic name is then followed by zero or more operands.
186 * Each operand can be separated by an arbitrary sequence of spaces and/or
187 * commas.
188 *
189 * Each operand specifies a register and/or memory address. An operand
190 * may not contain spaces.
191 *
192 * If the operand is implicit (i.e. should not appear when printing a
193 * decoded instruction), it should be enclosed in curly braces. In general,
194 * we put implicit operands first, but there are no rules defining where an
195 * implicit operand may appear.
196 *
197 * A register begins with the character '%', and is followed by its name.
198 * Register names are case insensitive. Legal values are any operand kind
199 * defined in ncopcode_operand_kind.enum that begins with the text 'Reg'.
200 *
201 * A print form begins with the character '$", and is followed by a name.
202 * Print forms are, in general, defined by Appendex section A.1 - Opcode-Syntax
203 * Notation in AMD document 24594-Rev.3.14-September 2007, "AMD64 Architecture
204 * Programmer's manual Volume 3: General-Purpose and System Instructions".
205 * Exceptions are made for descriptions used in that appendex, but are
206 * not documented in this section. For clarity, the rules are explicitly
207 * defined as follows: A print form consists of a FORM, followed by
208 * a SIZE specification.
209 *
210 * Valid FORMs are:
211 * A - Far pointer is encoded in the instruction.
212 * C - Control register specified by the ModRM reg field.
213 * D - Debug register specified by the ModRM reg field.
214 * E - General purpose register or memory operand specified by the ModRm
215 * byte. Memory addresses can be computed from a segment register,
216 * SIB byte, and/or displacement.
217 * F - rFLAGS register.
218 * G - General purpose register specified by the ModRm reg field.
219 * I - Immediate value.
220 * J - The instruction includes a relative offset that is added to the rIP
221 * register.
222 * M - A memory operand specified by the ModRM byte.
223 * O - The offset of an operand is encoded in the instruction. There is no
224 * ModRm byte in the instruction. Complex addressing using the SIB byte
225 * cannot be done.
226 * P - 64-bit MMX register specified by the ModRM reg field.
227 * PR - 64 bit MMX register specified by the ModRM r/m field. The ModRM mod
228 * field must be 11b.
229 * Q - 64 bit MMX register or memory operand specified by the ModRM byte.
230 * Memory addresses can be computed from a segment register, SIB byte,
231 * and/or displacement.
232 * R - General purpose register specified by the ModRM r/m field. The ModRM
233 * mod field must be 11b.
234 * S - Segment register specified by the ModRM reg field.
235 * U - The R/M field of the ModR/M byte selects a 128-bit XMM register.
236 * V - 128-bit XMM register specified by the ModRM reg field.
237 * VR - 128-bit XMM register specified by the ModRM r/m field. The ModRM mod
238 * field must be 11b.
239 * W - 128 Xmm register or memory operand specified by the ModRm Byte. Memory
240 * addresses can be computed from a segment register, SIB byte, and/or
241 * displacement.
242 * X - A memory operand addressed by the DS.rSI registers. Used in string
243 * instructions.
244 * Y - A memory operand addressed by the ES.rDI registers. Used in string
245 * instructions.
246 * Z - A memory operand addressed by the DS.rDI registers. Used in maskmov
247 * instructions.
248 * r8 - The 8 registers rAX, rCX, rDX, rBX, rSP, rBP, rSI, rDI, and the
249 * optional registers r8-r15 if REX.b is set, based on the register value
250 * embedded in the opcode.
251 * SG - segment address defined by a G expression and the segment register in
252 * the corresponding mnemonic (lds, les, lfs, lgs, lss).
253 * rAX - The register AX, EAX, or RAX, depending on SIZE.
254 * rBP - The register BP, EBP, or RBP, depending on SIZE.
255 * rBX - The register BX, EBX, or RBX, depending on SIZE.
256 * rCX - The register CX, ECX, or RCX, depending on SIZE.
257 * rDI - The register DI, EDI, or RDI, depending on SIZE.
258 * rDX - The register DX, EDX, or RDX, depending on SIZE.
259 * rSI - The register SI, ESI, or RSI, depending on SIZE.
260 * rSP - The register SP, ESP, or RSP, depending on SIZE.
261 *
262 * Note: r8 is not in the manual cited above. It has been added to deal with
263 * instructions with an embedded register in the opcode. In such cases, this
264 * value allows a single defining call to be used (within a for loop),
265 * rather than writing eight separate rules (one for each possible register
266 * value).
267 *
268 * Note: SG is also not in the manual cited above. It has been added to deal
269 * with the instructions lds, les, lfs, lgs, and lss, which generate a
270 * segment address from a General purpose register specified in the ModRm reg
271 * field.
272 *
273 * Note: Z is also not in the manual cited above. It has been added to deal with
274 * the implicit argument of maskmov instructions.
275 *
276 * Valid SIZEs are:
277 * a - Two 16-bit or 32-bit memory operands, depending on the effective
278 * operand size. Used in the BOUND instruction.
279 * b - A byte, irrespective of the effective operand size.
280 * d - A doubleword (32-bits), irrespective of the effective operand size.
281 * dq - A double-quadword (128 bits), irrespective of the effective operand
282 * size.
283 * p - A 32-bit or 48-bit far pointer, depending on the effective operand
284 * size.
285 * pd - A 128-bit double-precision floating point vector operand (packed
286 * double).
287 * pi - A 64-bit MMX operand (packed integer).
288 * ps - A 128-bit single precision floating point vector operand (packed
289 * single).
290 * q - A quadword, irrespective of the effective operand size.
291 * s - A 6-byte or 10-byte pseudo-descriptor.
292 * sd - A scalar double-precision floating point operand (scalar double).
293 * si - A scalar doubleword (32-bit) integer operand (scalar integer).
294 * ss - A scalar single-precision floating-point operand (scalar single).
295 * w - A word, irrespective of the effective operand size.
296 * v - A word, doubleword, or quadword, depending on the effective operand
297 * size.
298 * va - A word, doubleword, or quadword, depending on the effective address
299 * size.
300 * vw - A word only when the effective operand size matches.
301 * vd - A doubleword only when the effective operand size matches.
302 * vq - A quadword only when the effective operand size matches.
303 * w - A word, irrespective of the effective operand size.
304 * z - A word if the effective operand size is 16 bits, or a doubleword
305 * if the effective operand size is 32 or 64 bits.
306 * zw - A word only when the effective operand size matches.
307 * zd - A doubleword only when the effective operand size is 32 or 64 bits.
308 * f - A memory access (of small size, i.e. less than 100 bytes),
309 * irrespective of the operand size (as modified by the prefix 66,
310 and the Rex.w prefix). Should only be used with $M arguments.
311 * When this size modifier $Mf is used (unlike $M which allows
312 * prefix 66), prefix 66 is illegal.
313 * Note: When $Mf is used, the (small) size differences are not
314 * important for the validator. Hence, it doesn't matter if we are
315 * more accurate.
316 *
317 * Note: vw, vd, vq, zw, and zd are not in the manual cited above. However,
318 * they have been added so that sub-variants of an v/z instruction (not
319 * specified in the manual) can be specified.
320 *
321 * In addition, this code adds the following special print forms:
322 * One - The literal constant 1.
323 *
324 * Note: The AMD manual uses some slash notations (such as d/q) which isn't
325 * explicitly defined. In general, we allow such notation as specified in
326 * the AMD manual. Depending on the use, it can mean any of the following:
327 * (1) In 32-bit mode, d is used. In 64-bit mode, q is used.
328 * (2) only 32-bit or 64-bit values are allowed.
329 * In addition, when the nmemonic name changes based on which value is chosen
330 * in d/q, we use d/q/d to denote the 32-bit case, and d/q/q to denote the
331 * 64 bit case.
332 *
333 * Because some instructions may need to add flags and/or additional operands
334 * outside the string context, instructions are modeled using a pair of calls
335 * (i.e. a Begin and End form). The Begin form starts defining the instruction,
336 * and the End form completes and installs the modeled instruction. Any
337 * additional model changes for the instruction being defined should
338 * appear between these call pairs.
339 *
340 * For instructions not needing to do special touchups, a simpler Define form
341 * exists that simply dispatches calls to the corresponding Begin and End forms.
342 ***************************************************************************/
343
344 /* Defines target machine.*/
345 typedef enum {
346 T32, /* 32 only. */
347 T64, /* 64 only. */
348 Tall, /* both 32 and 64. */
349 } NaClTargetPlatform;
350
351 /* Defines the beginning of the modeling of a platform instruction.
352 * Parameters are:
353 * platform - The platform(s) the instruction applies to.
354 * desc - the opcode description string.
355 * insttype - The category of the instruction (defines the effects of CPUID).
356 * st - The symbol table to use while defining the instruction.
357 */
358 void NaClBegDefPlatform(NaClTargetPlatform platform,
359 const char* desc, NaClInstType insttype,
360 struct NaClSymbolTable* st);
361
362 /* Defines the beginning of the modeling of both a x86-32 and x86-64
363 * instruction.
364 * Parameters are:
365 * desc - the opcode description string.
366 * insttype - The category of the instruction (defines the effects of CPUID).
367 * st - The symbol table to use while defining the instruction.
368 */
369 void NaClBegDef(const char* desc, NaClInstType insttype,
370 struct NaClSymbolTable* st);
371
372 /* Defines the beginning of the modeling of a x86-32 instruction without
373 * an equivalent x86-64 version.
374 * Parameters are:
375 * desc - the opcode description string.
376 * insttype - The category of the instruction (defines the effects of CPUID).
377 * st - The symbol table to use while defining the instruction.
378 */
379 void NaClBegD32(const char* desc, NaClInstType insttype,
380 struct NaClSymbolTable* st);
381
382 /* Defines the beginning of the modeling of a x86-64 instruction without
383 * an equivalent x86-32 version.
384 * Parameters are:
385 * desc - the opcode description string.
386 * insttype - The category of the instruction (defines the effects of CPUID).
387 * st - The symbol table to use while defining the instruction.
388 */
389 void NaClBegD64(const char* desc, NaClInstType insttype,
390 struct NaClSymbolTable* st);
391
392 /* Defines the end of the modeling of an instruction. Must be paired with
393 * a call to NaClBegDef, NaClBegD32, or NaClBegD64.
394 * Parameters are:
395 * icat - The set/use categorization for the instruction being defined.
396 */
397 void NaClEndDef(NaClInstCat icat);
398
399 /* Defines a platform instruction, using dispatching
400 * calls to NaClBegDefPlatform and NaClEndDef.
401 * Parameters are:
402 * platform - the platform(s) the instruction applies to.
403 * desc - the opcode description string.
404 * insttype - The category of the instruction (defines the effects of CPUID).
405 * st - The symbol table to use while defining the instruction.
406 * icat - The set/use categorization for the instruction being defined.
407 */
408 void NaClDefinePlatform(NaClTargetPlatform platform,
409 const char* desc, NaClInstType insttype,
410 struct NaClSymbolTable* st, NaClInstCat cat);
411
412 /* Defines both a x86-32 and x86-64 instruction, using dispatching
413 * calls to NaClBegDef and NaClEndDef.
414 * Parameters are:
415 * desc - the opcode description string.
416 * insttype - The category of the instruction (defines the effects of CPUID).
417 * st - The symbol table to use while defining the instruction.
418 * icat - The set/use categorization for the instruction being defined.
419 */
420 void NaClDefine(const char* desc, NaClInstType insttype,
421 struct NaClSymbolTable* st, NaClInstCat cat);
422
423 /* Defines a x86-32 instruction without an equivalent x86-64 version, using
424 * dispatching calls to NaClBegD32 and NaClEndDef.
425 * Parameters are:
426 * desc - the opcode description string.
427 * insttype - The category of the instruction (defines the effects of CPUID).
428 * st - The symbol table to use while defining the instruction.
429 * icat - The set/use categorization for the instruction being defined.
430 */
431 void NaClDef_32(const char* desc, NaClInstType insttype,
432 struct NaClSymbolTable* st, NaClInstCat cat);
433
434 /* Defines a x86-64 instruction without an equivalent x86-32 version, using
435 * dispatching calls to NaClBegD32 and NaClEndDef.
436 * Parameters are:
437 * desc - the opcode description string.
438 * insttype - The category of the instruction (defines the effects of CPUID).
439 * st - The symbol table to use while defining the instruction.
440 * icat - The set/use categorization for the instruction being defined.
441 */
442 void NaClDef_64(const char* desc, NaClInstType insttype,
443 struct NaClSymbolTable* st, NaClInstCat cat);
444
445
446 /* Defines a set of instructions, for all values of min <= i <= max (bound
447 * in a local symbol table), using calls to NaClDefine on the remaining
448 * arguments. In addition, opcodes of the form "xx+@i:", within the description
449 * string are automatically added to generate the opcode value xx+i.
450 * In addition, the value of min and max must be between 0 and 7.
451 * Parameters are:
452 * desc - the opcode description string.
453 * min - The starting value to iterate i on.
454 * max - The ending value to iterate i on.
455 * insttype - The category of the instruction (defines the effects of CPUID).
456 * st - The symbol table to use while defining the instruction.
457 * icat - The set/use categorization for the instruction being defined.
458 */
459 void NaClDefIter(const char* desc, int min, int max,
460 NaClInstType insttype, struct NaClSymbolTable* st,
461 NaClInstCat cat);
462
463 /* Defines a set of instructions, for all values of min <= reg <= max (bound
464 * in a local symbol), using calls to NaClDefine on the remaining arguments.
465 * In addition, the value of min and max must be between 0 and 255. Typically
466 * used to generate register values that are part of the opcode.
467 * Parameters are:
468 * desc - the opcode description string.
469 * min - The starting value to iterate reg on.
470 * max - The ending value to iterate reg on.
471 * insttype - The category of the instruction (defines the effects of CPUID).
472 * st - The symbol table to use while defining the instruction.
473 * icat - The set/use categorization for the instruction being defined.
474 */
475 void NaClDefReg(const char* desc, int min, int max,
476 NaClInstType insttype, struct NaClSymbolTable* st,
477 NaClInstCat cat);
478
479
480 #endif /* NATIVE_CLIENT_SRC_TRUSTED_VALIDATOR_X86_DECODER_GENERATOR_NCDECODE_FOR MS_H__ */
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698