| Index: tools/traceline/traceline/sidestep/mini_disassembler.cc
|
| diff --git a/tools/traceline/traceline/sidestep/mini_disassembler.cc b/tools/traceline/traceline/sidestep/mini_disassembler.cc
|
| new file mode 100755
|
| index 0000000000000000000000000000000000000000..a603ebe637acdba06525b0bdb038bab785fdaa03
|
| --- /dev/null
|
| +++ b/tools/traceline/traceline/sidestep/mini_disassembler.cc
|
| @@ -0,0 +1,416 @@
|
| +// Copyright 2008, Google Inc.
|
| +// All rights reserved.
|
| +//
|
| +// Redistribution and use in source and binary forms, with or without
|
| +// modification, are permitted provided that the following conditions are
|
| +// met:
|
| +//
|
| +// * Redistributions of source code must retain the above copyright
|
| +// notice, this list of conditions and the following disclaimer.
|
| +// * Redistributions in binary form must reproduce the above
|
| +// copyright notice, this list of conditions and the following disclaimer
|
| +// in the documentation and/or other materials provided with the
|
| +// distribution.
|
| +// * Neither the name of Google Inc. nor the names of its
|
| +// contributors may be used to endorse or promote products derived from
|
| +// this software without specific prior written permission.
|
| +//
|
| +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
| +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
| +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
| +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
| +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
| +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
| +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
| +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
| +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
| +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
| +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
| +
|
| +// Implementation of MiniDisassembler.
|
| +
|
| +#include "sidestep/mini_disassembler.h"
|
| +
|
| +namespace sidestep {
|
| +
|
| +MiniDisassembler::MiniDisassembler(bool operand_default_is_32_bits,
|
| + bool address_default_is_32_bits)
|
| + : operand_default_is_32_bits_(operand_default_is_32_bits),
|
| + address_default_is_32_bits_(address_default_is_32_bits) {
|
| + Initialize();
|
| +}
|
| +
|
| +MiniDisassembler::MiniDisassembler()
|
| + : operand_default_is_32_bits_(true),
|
| + address_default_is_32_bits_(true) {
|
| + Initialize();
|
| +}
|
| +
|
| +InstructionType MiniDisassembler::Disassemble(
|
| + unsigned char* start_byte,
|
| + unsigned int* instruction_bytes) {
|
| + // Clean up any state from previous invocations.
|
| + Initialize();
|
| +
|
| + // Start by processing any prefixes.
|
| + unsigned char* current_byte = start_byte;
|
| + unsigned int size = 0;
|
| + InstructionType instruction_type = ProcessPrefixes(current_byte, &size);
|
| +
|
| + if (IT_UNKNOWN == instruction_type)
|
| + return instruction_type;
|
| +
|
| + current_byte += size;
|
| + size = 0;
|
| +
|
| + // Invariant: We have stripped all prefixes, and the operand_is_32_bits_
|
| + // and address_is_32_bits_ flags are correctly set.
|
| +
|
| + instruction_type = ProcessOpcode(current_byte, 0, &size);
|
| +
|
| + // Check for error processing instruction
|
| + if ((IT_UNKNOWN == instruction_type_) || (IT_UNUSED == instruction_type_)) {
|
| + return IT_UNKNOWN;
|
| + }
|
| +
|
| + current_byte += size;
|
| +
|
| + // Invariant: operand_bytes_ indicates the total size of operands
|
| + // specified by the opcode and/or ModR/M byte and/or SIB byte.
|
| + // pCurrentByte points to the first byte after the ModR/M byte, or after
|
| + // the SIB byte if it is present (i.e. the first byte of any operands
|
| + // encoded in the instruction).
|
| +
|
| + // We get the total length of any prefixes, the opcode, and the ModR/M and
|
| + // SIB bytes if present, by taking the difference of the original starting
|
| + // address and the current byte (which points to the first byte of the
|
| + // operands if present, or to the first byte of the next instruction if
|
| + // they are not). Adding the count of bytes in the operands encoded in
|
| + // the instruction gives us the full length of the instruction in bytes.
|
| + *instruction_bytes += operand_bytes_ + (current_byte - start_byte);
|
| +
|
| + // Return the instruction type, which was set by ProcessOpcode().
|
| + return instruction_type_;
|
| +}
|
| +
|
| +void MiniDisassembler::Initialize() {
|
| + operand_is_32_bits_ = operand_default_is_32_bits_;
|
| + address_is_32_bits_ = address_default_is_32_bits_;
|
| + operand_bytes_ = 0;
|
| + have_modrm_ = false;
|
| + should_decode_modrm_ = false;
|
| + instruction_type_ = IT_UNKNOWN;
|
| + got_f2_prefix_ = false;
|
| + got_f3_prefix_ = false;
|
| + got_66_prefix_ = false;
|
| +}
|
| +
|
| +InstructionType MiniDisassembler::ProcessPrefixes(unsigned char* start_byte,
|
| + unsigned int* size) {
|
| + InstructionType instruction_type = IT_GENERIC;
|
| + const Opcode& opcode = s_ia32_opcode_map_[0].table_[*start_byte];
|
| +
|
| + switch (opcode.type_) {
|
| + case IT_PREFIX_ADDRESS:
|
| + address_is_32_bits_ = !address_default_is_32_bits_;
|
| + goto nochangeoperand;
|
| + case IT_PREFIX_OPERAND:
|
| + operand_is_32_bits_ = !operand_default_is_32_bits_;
|
| + nochangeoperand:
|
| + case IT_PREFIX:
|
| +
|
| + if (0xF2 == (*start_byte))
|
| + got_f2_prefix_ = true;
|
| + else if (0xF3 == (*start_byte))
|
| + got_f3_prefix_ = true;
|
| + else if (0x66 == (*start_byte))
|
| + got_66_prefix_ = true;
|
| +
|
| + instruction_type = opcode.type_;
|
| + (*size)++;
|
| + // we got a prefix, so add one and check next byte
|
| + ProcessPrefixes(start_byte + 1, size);
|
| + default:
|
| + break; // not a prefix byte
|
| + }
|
| +
|
| + return instruction_type;
|
| +}
|
| +
|
| +InstructionType MiniDisassembler::ProcessOpcode(unsigned char* start_byte,
|
| + unsigned int table_index,
|
| + unsigned int* size) {
|
| + const OpcodeTable& table = s_ia32_opcode_map_[table_index]; // Get our table
|
| + unsigned char current_byte = (*start_byte) >> table.shift_;
|
| + current_byte = current_byte & table.mask_; // Mask out the bits we will use
|
| +
|
| + // Check whether the byte we have is inside the table we have.
|
| + if (current_byte < table.min_lim_ || current_byte > table.max_lim_) {
|
| + instruction_type_ = IT_UNKNOWN;
|
| + return instruction_type_;
|
| + }
|
| +
|
| + const Opcode& opcode = table.table_[current_byte];
|
| + if (IT_UNUSED == opcode.type_) {
|
| + // This instruction is not used by the IA-32 ISA, so we indicate
|
| + // this to the user. Probably means that we were pointed to
|
| + // a byte in memory that was not the start of an instruction.
|
| + instruction_type_ = IT_UNUSED;
|
| + return instruction_type_;
|
| + } else if (IT_REFERENCE == opcode.type_) {
|
| + // We are looking at an opcode that has more bytes (or is continued
|
| + // in the ModR/M byte). Recursively find the opcode definition in
|
| + // the table for the opcode's next byte.
|
| + (*size)++;
|
| + ProcessOpcode(start_byte + 1, opcode.table_index_, size);
|
| + return instruction_type_;
|
| + }
|
| +
|
| + const SpecificOpcode* specific_opcode = reinterpret_cast<
|
| + const SpecificOpcode*>(&opcode);
|
| + if (opcode.is_prefix_dependent_) {
|
| + if (got_f2_prefix_ && opcode.opcode_if_f2_prefix_.mnemonic_ != 0) {
|
| + specific_opcode = &opcode.opcode_if_f2_prefix_;
|
| + } else if (got_f3_prefix_ && opcode.opcode_if_f3_prefix_.mnemonic_ != 0) {
|
| + specific_opcode = &opcode.opcode_if_f3_prefix_;
|
| + } else if (got_66_prefix_ && opcode.opcode_if_66_prefix_.mnemonic_ != 0) {
|
| + specific_opcode = &opcode.opcode_if_66_prefix_;
|
| + }
|
| + }
|
| +
|
| + // Inv: The opcode type is known.
|
| + instruction_type_ = specific_opcode->type_;
|
| +
|
| + // Let's process the operand types to see if we have any immediate
|
| + // operands, and/or a ModR/M byte.
|
| +
|
| + ProcessOperand(specific_opcode->flag_dest_);
|
| + ProcessOperand(specific_opcode->flag_source_);
|
| + ProcessOperand(specific_opcode->flag_aux_);
|
| +
|
| + // Inv: We have processed the opcode and incremented operand_bytes_
|
| + // by the number of bytes of any operands specified by the opcode
|
| + // that are stored in the instruction (not registers etc.). Now
|
| + // we need to return the total number of bytes for the opcode and
|
| + // for the ModR/M or SIB bytes if they are present.
|
| +
|
| + if (table.mask_ != 0xff) {
|
| + if (have_modrm_) {
|
| + // we're looking at a ModR/M byte so we're not going to
|
| + // count that into the opcode size
|
| + ProcessModrm(start_byte, size);
|
| + return IT_GENERIC;
|
| + } else {
|
| + // need to count the ModR/M byte even if it's just being
|
| + // used for opcode extension
|
| + (*size)++;
|
| + return IT_GENERIC;
|
| + }
|
| + } else {
|
| + if (have_modrm_) {
|
| + // The ModR/M byte is the next byte.
|
| + (*size)++;
|
| + ProcessModrm(start_byte + 1, size);
|
| + return IT_GENERIC;
|
| + } else {
|
| + (*size)++;
|
| + return IT_GENERIC;
|
| + }
|
| + }
|
| +}
|
| +
|
| +bool MiniDisassembler::ProcessOperand(int flag_operand) {
|
| + bool succeeded = true;
|
| + if (AM_NOT_USED == flag_operand)
|
| + return succeeded;
|
| +
|
| + // Decide what to do based on the addressing mode.
|
| + switch (flag_operand & AM_MASK) {
|
| + // No ModR/M byte indicated by these addressing modes, and no
|
| + // additional (e.g. immediate) parameters.
|
| + case AM_A: // Direct address
|
| + case AM_F: // EFLAGS register
|
| + case AM_X: // Memory addressed by the DS:SI register pair
|
| + case AM_Y: // Memory addressed by the ES:DI register pair
|
| + case AM_IMPLICIT: // Parameter is implicit, occupies no space in
|
| + // instruction
|
| + break;
|
| +
|
| + // There is a ModR/M byte but it does not necessarily need
|
| + // to be decoded.
|
| + case AM_C: // reg field of ModR/M selects a control register
|
| + case AM_D: // reg field of ModR/M selects a debug register
|
| + case AM_G: // reg field of ModR/M selects a general register
|
| + case AM_P: // reg field of ModR/M selects an MMX register
|
| + case AM_R: // mod field of ModR/M may refer only to a general register
|
| + case AM_S: // reg field of ModR/M selects a segment register
|
| + case AM_T: // reg field of ModR/M selects a test register
|
| + case AM_V: // reg field of ModR/M selects a 128-bit XMM register
|
| + have_modrm_ = true;
|
| + break;
|
| +
|
| + // In these addressing modes, there is a ModR/M byte and it needs to be
|
| + // decoded. No other (e.g. immediate) params than indicated in ModR/M.
|
| + case AM_E: // Operand is either a general-purpose register or memory,
|
| + // specified by ModR/M byte
|
| + case AM_M: // ModR/M byte will refer only to memory
|
| + case AM_Q: // Operand is either an MMX register or memory (complex
|
| + // evaluation), specified by ModR/M byte
|
| + case AM_W: // Operand is either a 128-bit XMM register or memory (complex
|
| + // eval), specified by ModR/M byte
|
| + have_modrm_ = true;
|
| + should_decode_modrm_ = true;
|
| + break;
|
| +
|
| + // These addressing modes specify an immediate or an offset value
|
| + // directly, so we need to look at the operand type to see how many
|
| + // bytes.
|
| + case AM_I: // Immediate data.
|
| + case AM_J: // Jump to offset.
|
| + case AM_O: // Operand is at offset.
|
| + switch (flag_operand & OT_MASK) {
|
| + case OT_B: // Byte regardless of operand-size attribute.
|
| + operand_bytes_ += OS_BYTE;
|
| + break;
|
| + case OT_C: // Byte or word, depending on operand-size attribute.
|
| + if (operand_is_32_bits_)
|
| + operand_bytes_ += OS_WORD;
|
| + else
|
| + operand_bytes_ += OS_BYTE;
|
| + break;
|
| + case OT_D: // Doubleword, regardless of operand-size attribute.
|
| + operand_bytes_ += OS_DOUBLE_WORD;
|
| + break;
|
| + case OT_DQ: // Double-quadword, regardless of operand-size attribute.
|
| + operand_bytes_ += OS_DOUBLE_QUAD_WORD;
|
| + break;
|
| + case OT_P: // 32-bit or 48-bit pointer, depending on operand-size
|
| + // attribute.
|
| + if (operand_is_32_bits_)
|
| + operand_bytes_ += OS_48_BIT_POINTER;
|
| + else
|
| + operand_bytes_ += OS_32_BIT_POINTER;
|
| + break;
|
| + case OT_PS: // 128-bit packed single-precision floating-point data.
|
| + operand_bytes_ += OS_128_BIT_PACKED_SINGLE_PRECISION_FLOATING;
|
| + break;
|
| + case OT_Q: // Quadword, regardless of operand-size attribute.
|
| + operand_bytes_ += OS_QUAD_WORD;
|
| + break;
|
| + case OT_S: // 6-byte pseudo-descriptor.
|
| + operand_bytes_ += OS_PSEUDO_DESCRIPTOR;
|
| + break;
|
| + case OT_SD: // Scalar Double-Precision Floating-Point Value
|
| + case OT_PD: // Unaligned packed double-precision floating point value
|
| + operand_bytes_ += OS_DOUBLE_PRECISION_FLOATING;
|
| + break;
|
| + case OT_SS:
|
| + // Scalar element of a 128-bit packed single-precision
|
| + // floating data.
|
| + // We simply return enItUnknown since we don't have to support
|
| + // floating point
|
| + succeeded = false;
|
| + break;
|
| + case OT_V: // Word or doubleword, depending on operand-size attribute.
|
| + if (operand_is_32_bits_)
|
| + operand_bytes_ += OS_DOUBLE_WORD;
|
| + else
|
| + operand_bytes_ += OS_WORD;
|
| + break;
|
| + case OT_W: // Word, regardless of operand-size attribute.
|
| + operand_bytes_ += OS_WORD;
|
| + break;
|
| +
|
| + // Can safely ignore these.
|
| + case OT_A: // Two one-word operands in memory or two double-word
|
| + // operands in memory
|
| + case OT_PI: // Quadword MMX technology register (e.g. mm0)
|
| + case OT_SI: // Doubleword integer register (e.g., eax)
|
| + break;
|
| +
|
| + default:
|
| + break;
|
| + }
|
| + break;
|
| +
|
| + default:
|
| + break;
|
| + }
|
| +
|
| + return succeeded;
|
| +}
|
| +
|
| +bool MiniDisassembler::ProcessModrm(unsigned char* start_byte,
|
| + unsigned int* size) {
|
| + // If we don't need to decode, we just return the size of the ModR/M
|
| + // byte (there is never a SIB byte in this case).
|
| + if (!should_decode_modrm_) {
|
| + (*size)++;
|
| + return true;
|
| + }
|
| +
|
| + // We never care about the reg field, only the combination of the mod
|
| + // and r/m fields, so let's start by packing those fields together into
|
| + // 5 bits.
|
| + unsigned char modrm = (*start_byte);
|
| + unsigned char mod = modrm & 0xC0; // mask out top two bits to get mod field
|
| + modrm = modrm & 0x07; // mask out bottom 3 bits to get r/m field
|
| + mod = mod >> 3; // shift the mod field to the right place
|
| + modrm = mod | modrm; // combine the r/m and mod fields as discussed
|
| + mod = mod >> 3; // shift the mod field to bits 2..0
|
| +
|
| + // Invariant: modrm contains the mod field in bits 4..3 and the r/m field
|
| + // in bits 2..0, and mod contains the mod field in bits 2..0
|
| +
|
| + const ModrmEntry* modrm_entry = 0;
|
| + if (address_is_32_bits_)
|
| + modrm_entry = &s_ia32_modrm_map_[modrm];
|
| + else
|
| + modrm_entry = &s_ia16_modrm_map_[modrm];
|
| +
|
| + // Invariant: modrm_entry points to information that we need to decode
|
| + // the ModR/M byte.
|
| +
|
| + // Add to the count of operand bytes, if the ModR/M byte indicates
|
| + // that some operands are encoded in the instruction.
|
| + if (modrm_entry->is_encoded_in_instruction_)
|
| + operand_bytes_ += modrm_entry->operand_size_;
|
| +
|
| + // Process the SIB byte if necessary, and return the count
|
| + // of ModR/M and SIB bytes.
|
| + if (modrm_entry->use_sib_byte_) {
|
| + (*size)++;
|
| + return ProcessSib(start_byte + 1, mod, size);
|
| + } else {
|
| + (*size)++;
|
| + return true;
|
| + }
|
| +}
|
| +
|
| +bool MiniDisassembler::ProcessSib(unsigned char* start_byte,
|
| + unsigned char mod,
|
| + unsigned int* size) {
|
| + // get the mod field from the 2..0 bits of the SIB byte
|
| + unsigned char sib_base = (*start_byte) & 0x07;
|
| + if (0x05 == sib_base) {
|
| + switch (mod) {
|
| + case 0x00: // mod == 00
|
| + case 0x02: // mod == 10
|
| + operand_bytes_ += OS_DOUBLE_WORD;
|
| + break;
|
| + case 0x01: // mod == 01
|
| + operand_bytes_ += OS_BYTE;
|
| + break;
|
| + case 0x03: // mod == 11
|
| + // According to the IA-32 docs, there does not seem to be a disp
|
| + // value for this value of mod
|
| + default:
|
| + break;
|
| + }
|
| + }
|
| +
|
| + (*size)++;
|
| + return true;
|
| +}
|
| +
|
| +}; // namespace sidestep
|
|
|