| Index: src/trusted/validator_ragel/decoding.h
|
| ===================================================================
|
| --- src/trusted/validator_ragel/decoding.h (revision 11020)
|
| +++ src/trusted/validator_ragel/decoding.h (working copy)
|
| @@ -5,11 +5,12 @@
|
| */
|
|
|
| /*
|
| - * This file contains common parts of x86-32 and x86-64 internals (inline
|
| - * functions and defines).
|
| + * This file contains common parts of ia32 and x86-64 decoder and validator
|
| + * internals (inline functions which are used to pull useful information from
|
| + * "well-known" bytes of the instruction: REX and VEX prefixes, ModR/M byte and
|
| + * so on).
|
| *
|
| - * We only include simple schematic diagrams here. For full description see
|
| - * AMD/Intel manuals.
|
| + * See full description in AMD/Intel manuals.
|
| */
|
|
|
| #ifndef NATIVE_CLIENT_SRC_TRUSTED_VALIDATOR_RAGEL_DECODING_H_
|
| @@ -25,12 +26,10 @@
|
|
|
|
|
| /*
|
| - * Opcode with register number embedded:
|
| + * Opcode-with-register byte format:
|
| *
|
| - * 7 6 5 4 3 2 1 0
|
| - * ┌───────┬───────┬───────┬───────┬───────┬───────┬───────┬───────┒
|
| - * │ Opcode │ register number ┃
|
| - * ┕━━━━━━━┷━━━━━━━┷━━━━━━━┷━━━━━━━┷━━━━━━━┷━━━━━━━┷━━━━━━━┷━━━━━━━┛
|
| + * bits 0-2: register number
|
| + * bits 3-7: actual opcode
|
| */
|
| static FORCEINLINE uint8_t RegFromOpcode(uint8_t modrm) {
|
| return modrm & 0x07;
|
| @@ -39,17 +38,16 @@
|
| /*
|
| * ModRM byte format:
|
| *
|
| - * 7 6 5 4 3 2 1 0
|
| - * ┌───────┬───────┬───────┬───────┬───────┬───────┬───────┬───────┒
|
| - * │ mod │ reg │ r/m ┃
|
| - * ┕━━━━━━━┷━━━━━━━┷━━━━━━━┷━━━━━━━┷━━━━━━━┷━━━━━━━┷━━━━━━━┷━━━━━━━┛
|
| + * bits 0-2: r/m
|
| + * bits 3-5: reg
|
| + * bits 6-7: mod
|
| */
|
| static FORCEINLINE uint8_t ModFromModRM(uint8_t modrm) {
|
| return modrm >> 6;
|
| }
|
|
|
| static FORCEINLINE uint8_t RegFromModRM(uint8_t modrm) {
|
| - return (modrm & 0x38) >> 3;
|
| + return (modrm >> 3) & 0x07;
|
| }
|
|
|
| static FORCEINLINE uint8_t RMFromModRM(uint8_t modrm) {
|
| @@ -59,17 +57,16 @@
|
| /*
|
| * SIB byte format:
|
| *
|
| - * 7 6 5 4 3 2 1 0
|
| - * ┌───────┬───────┬───────┬───────┬───────┬───────┬───────┬───────┒
|
| - * │ scale │ index │ base ┃
|
| - * ┕━━━━━━━┷━━━━━━━┷━━━━━━━┷━━━━━━━┷━━━━━━━┷━━━━━━━┷━━━━━━━┷━━━━━━━┛
|
| + * bits 0-2: base
|
| + * bits 3-5: index
|
| + * bits 6-7: scale
|
| */
|
| static FORCEINLINE uint8_t ScaleFromSIB(uint8_t sib) {
|
| return sib >> 6;
|
| }
|
|
|
| static FORCEINLINE uint8_t IndexFromSIB(uint8_t sib) {
|
| - return (sib & 0x38) >> 3;
|
| + return (sib >> 3) & 0x07;
|
| }
|
|
|
| static FORCEINLINE uint8_t BaseFromSIB(uint8_t sib) {
|
| @@ -79,10 +76,11 @@
|
| /*
|
| * REX byte format:
|
| *
|
| - * 7 6 5 4 3 2 1 0
|
| - * ┌───────┬───────┬───────┬───────┬───────┬───────┬───────┬───────┒
|
| - * │ 0 │ 1 │ 0 │ 0 │ W │ R │ X │ B ┃
|
| - * ┕━━━━━━━┷━━━━━━━┷━━━━━━━┷━━━━━━━┷━━━━━━━┷━━━━━━━┷━━━━━━━┷━━━━━━━┛
|
| + * bit 0: B (Base)
|
| + * bit 1: X (indeX)
|
| + * bit 2: R (Register)
|
| + * bit 3: W (Wide)
|
| + * 4-7 bits: 0x4 (REX signature)
|
| */
|
|
|
| enum {
|
| @@ -92,14 +90,17 @@
|
| REX_W = 8
|
| };
|
|
|
| +/* How much to add to "base register" number: 0 or 8 */
|
| static FORCEINLINE uint8_t BaseExtentionFromREX(uint8_t rex) {
|
| return (rex & REX_B) << 3;
|
| }
|
|
|
| +/* How much to add to "index register" number: 0 or 8 */
|
| static FORCEINLINE uint8_t IndexExtentionFromREX(uint8_t rex) {
|
| return (rex & REX_X) << 2;
|
| }
|
|
|
| +/* How much to add to "register operand" number: 0 or 8 */
|
| static FORCEINLINE uint8_t RegisterExtentionFromREX(uint8_t rex) {
|
| return (rex & REX_R) << 1;
|
| }
|
| @@ -107,10 +108,11 @@
|
| /*
|
| * VEX 2nd byte format:
|
| *
|
| - * 7 6 5 4 3 2 1 0
|
| - * ┌───────┬───────┬───────┬───────┬───────┬───────┬───────┬───────┒
|
| - * │ ¬R │ ¬X │ ¬B │ opcode map selector ┃
|
| - * ┕━━━━━━━┷━━━━━━━┷━━━━━━━┷━━━━━━━┷━━━━━━━┷━━━━━━━┷━━━━━━━┷━━━━━━━┛
|
| + * bits 0-4: opcode selector
|
| + * bit 5: inverted B (Base)
|
| + * bit 6: inverted X (indeX)
|
| + * bit 7: inverted R (Register)
|
| + *
|
| */
|
|
|
| enum {
|
| @@ -122,18 +124,20 @@
|
| VEX_MAPA = 0x0a,
|
| VEX_B = 0x20,
|
| VEX_X = 0x40,
|
| - VEX_R = 0x80,
|
| - VEX_W = 0x80
|
| + VEX_R = 0x80
|
| };
|
|
|
| +/* How much to add to "base register" number: 0 or 8 */
|
| static FORCEINLINE uint8_t BaseExtentionFromVEX(uint8_t vex2) {
|
| return ((~vex2) & VEX_B) >> 2;
|
| }
|
|
|
| +/* How much to add to "index register" number: 0 or 8 */
|
| static FORCEINLINE uint8_t IndexExtentionFromVEX(uint8_t vex2) {
|
| return ((~vex2) & VEX_X) >> 3;
|
| }
|
|
|
| +/* How much to add to "register operand" number: 0 or 8 */
|
| static FORCEINLINE uint8_t RegisterExtentionFromVEX(uint8_t vex2) {
|
| return ((~vex2) & VEX_R) >> 4;
|
| }
|
| @@ -141,27 +145,37 @@
|
| /*
|
| * VEX 3rd byte format:
|
| *
|
| - * 7 6 5 4 3 2 1 0
|
| - * ┌───────┬───────┬───────┬───────┬───────┬───────┬───────┬───────┒
|
| - * │ W │ ¬vvvv (register number) │ L │ pp ┃
|
| - * ┕━━━━━━━┷━━━━━━━┷━━━━━━━┷━━━━━━━┷━━━━━━━┷━━━━━━━┷━━━━━━━┷━━━━━━━┛
|
| + * bits 0-1: pp (Packed Prefix)
|
| + * bit 2: L (Long)
|
| + * bits 3-6: negated vvvv (register number)
|
| + * bit 7: W (Wide)
|
| */
|
|
|
| +enum {
|
| + VEX_PP_NONE = 0x00,
|
| + VEX_PP_0X66 = 0x01,
|
| + VEX_PP_0XF3 = 0x02,
|
| + VEX_PP_0XF2 = 0x03,
|
| + VEX_L = 0x04,
|
| + VEX_VVVV = 0x78,
|
| + VEX_W = 0x80
|
| +};
|
| +
|
| +
|
| static FORCEINLINE uint8_t GetOperandFromVexIA32(uint8_t vex3) {
|
| - return ((~vex3) & 0x38) >> 3;
|
| + return ((~vex3) & VEX_VVVV) >> 3;
|
| }
|
|
|
| static FORCEINLINE uint8_t GetOperandFromVexAMD64(uint8_t vex3) {
|
| - return ((~vex3) & 0x78) >> 3;
|
| + return ((~vex3) & VEX_VVVV) >> 3;
|
| }
|
|
|
| /*
|
| - * is4 byte format:
|
| + * is4/is5 byte format:
|
| *
|
| - * 7 6 5 4 3 2 1 0
|
| - * ┌───────┬───────┬───────┬───────┬───────┬───────┬───────┬───────┒
|
| - * │ vvvv (register number) │ 0 │ 0 │ imm2 or zero ┃
|
| - * ┕━━━━━━━┷━━━━━━━┷━━━━━━━┷━━━━━━━┷━━━━━━━┷━━━━━━━┷━━━━━━━┷━━━━━━━┛
|
| + * bits 0-1: imm2 or zero
|
| + * bits 2-3: 0
|
| + * bits 4-7: register number
|
| */
|
| static FORCEINLINE uint8_t RegisterFromIS4(uint8_t is4) {
|
| return is4 >> 4;
|
| @@ -174,26 +188,30 @@
|
| * to this function: it will be converted to signed value and then
|
| * sign-extended to become 64-bit value.
|
| *
|
| - * Smaller values can be obtained by restricting this value further (which is
|
| - * safe according to the C language specification: see 6.2.1.2 in C90 and
|
| + * Return values can be restricted to smaller unsigned type when needed (which
|
| + * is safe according to the C language specification: see 6.2.1.2 in C90 and
|
| * 6.3.1.3.2 in C99 specification).
|
| *
|
| * Note that these operations are safe but slightly unusual: they come very
|
| * close to the edge of what "well-behaved C program is not supposed to do",
|
| * but they stay on the "safe" side of this boundary. Specifically: this
|
| - * behavior triggers "implementation-defined behavior" (see 6.2.1.2 in C90
|
| - * specification and 6.3.1.3.3 in C99 specification) which sounds suspiciously
|
| - * similar to the dreaded "undefined behavior", but in reality these two are
|
| - * quite different: any program which triggers "undefined behavior" is not a
|
| - * valid C program at all, but program which triggers "implementation-defined
|
| - * behavior" is quite valid C program. What this program actually *does*
|
| - * depends on the specification of a given C compiler: each particular
|
| - * implementation must decide for itself what it'll do in this particular case
|
| - * and *stick* *to* *it*. If the implementation uses two's-complement negative
|
| - * numbers (and all the implementation which can compile this code *must*
|
| - * support two's-complement arythmetic - see 7.18.1.1 in C99 specification) then
|
| - * the easiest thing to do is to do what we need here - this is what all known
|
| - * compilers for all known platforms are actually doing.
|
| + * (conversion to intXX_t) behavior triggers "implementation-defined behavior"
|
| + * (see 6.2.1.2 in C90 specification and 6.3.1.3.3 in C99 specification) which
|
| + * sounds suspiciously similar to the dreaded "undefined behavior", but in
|
| + * reality these two are quite different: any program which triggers "undefined
|
| + * behavior" is not a valid C program at all, but program which triggers
|
| + * "implementation-defined behavior" is quite valid C program. What this
|
| + * program actually *does* depends on the specification of a given C compiler:
|
| + * each particular implementation must decide for itself what it'll do in this
|
| + * particular case and *stick* *to* *it*. If the implementation actually uses
|
| + * two's-complement negative numbers (and all the implementation which can
|
| + * compile this code *must* support two's-complement arythmetic - see 7.18.1.1
|
| + * in C99 specification) then the easiest thing to do is to do what we need
|
| + * here - this is what all known compilers for all known platforms are actually
|
| + * doing.
|
| + *
|
| + * Conversion from intXX_t to uint64_t is always safe (same as before: see
|
| + * see 6.2.1.2 in C90 specification and 6.3.1.3.2 in C99 specification).
|
| */
|
| static FORCEINLINE uint64_t SignExtend8Bit(uint64_t value) {
|
| return (int8_t)value;
|
|
|