| Index: src/arm64/simulator-arm64.cc | 
| diff --git a/src/arm64/simulator-arm64.cc b/src/arm64/simulator-arm64.cc | 
| index b536fd5e9ce8271854c6c8212f12be744519da92..caba8843682455cbe51f9fdadb5ae023665c6e46 100644 | 
| --- a/src/arm64/simulator-arm64.cc | 
| +++ b/src/arm64/simulator-arm64.cc | 
| @@ -43,14 +43,15 @@ namespace internal { | 
| #define MAGENTA "35" | 
| #define CYAN    "36" | 
| #define WHITE   "37" | 
| + | 
| typedef char const * const TEXT_COLOUR; | 
| TEXT_COLOUR clr_normal         = FLAG_log_colour ? COLOUR(NORMAL)       : ""; | 
| TEXT_COLOUR clr_flag_name      = FLAG_log_colour ? COLOUR_BOLD(WHITE)   : ""; | 
| TEXT_COLOUR clr_flag_value     = FLAG_log_colour ? COLOUR(NORMAL)       : ""; | 
| TEXT_COLOUR clr_reg_name       = FLAG_log_colour ? COLOUR_BOLD(CYAN)    : ""; | 
| TEXT_COLOUR clr_reg_value      = FLAG_log_colour ? COLOUR(CYAN)         : ""; | 
| -TEXT_COLOUR clr_fpreg_name     = FLAG_log_colour ? COLOUR_BOLD(MAGENTA) : ""; | 
| -TEXT_COLOUR clr_fpreg_value    = FLAG_log_colour ? COLOUR(MAGENTA)      : ""; | 
| +TEXT_COLOUR clr_vreg_name = FLAG_log_colour ? COLOUR_BOLD(MAGENTA) : ""; | 
| +TEXT_COLOUR clr_vreg_value = FLAG_log_colour ? COLOUR(MAGENTA) : ""; | 
| TEXT_COLOUR clr_memory_address = FLAG_log_colour ? COLOUR_BOLD(BLUE)    : ""; | 
| TEXT_COLOUR clr_debug_number   = FLAG_log_colour ? COLOUR_BOLD(YELLOW)  : ""; | 
| TEXT_COLOUR clr_debug_message  = FLAG_log_colour ? COLOUR(YELLOW)       : ""; | 
| @@ -233,20 +234,20 @@ void Simulator::CheckPCSComplianceAndRun() { | 
|  | 
| #ifdef DEBUG | 
| CHECK_EQ(kNumberOfCalleeSavedRegisters, kCalleeSaved.Count()); | 
| -  CHECK_EQ(kNumberOfCalleeSavedFPRegisters, kCalleeSavedFP.Count()); | 
| +  CHECK_EQ(kNumberOfCalleeSavedVRegisters, kCalleeSavedV.Count()); | 
|  | 
| int64_t saved_registers[kNumberOfCalleeSavedRegisters]; | 
| -  uint64_t saved_fpregisters[kNumberOfCalleeSavedFPRegisters]; | 
| +  uint64_t saved_fpregisters[kNumberOfCalleeSavedVRegisters]; | 
|  | 
| CPURegList register_list = kCalleeSaved; | 
| -  CPURegList fpregister_list = kCalleeSavedFP; | 
| +  CPURegList fpregister_list = kCalleeSavedV; | 
|  | 
| for (int i = 0; i < kNumberOfCalleeSavedRegisters; i++) { | 
| // x31 is not a caller saved register, so no need to specify if we want | 
| // the stack or zero. | 
| saved_registers[i] = xreg(register_list.PopLowestIndex().code()); | 
| } | 
| -  for (int i = 0; i < kNumberOfCalleeSavedFPRegisters; i++) { | 
| +  for (int i = 0; i < kNumberOfCalleeSavedVRegisters; i++) { | 
| saved_fpregisters[i] = | 
| dreg_bits(fpregister_list.PopLowestIndex().code()); | 
| } | 
| @@ -258,11 +259,11 @@ void Simulator::CheckPCSComplianceAndRun() { | 
| CHECK_EQ(original_stack, sp()); | 
| // Check that callee-saved registers have been preserved. | 
| register_list = kCalleeSaved; | 
| -  fpregister_list = kCalleeSavedFP; | 
| +  fpregister_list = kCalleeSavedV; | 
| for (int i = 0; i < kNumberOfCalleeSavedRegisters; i++) { | 
| CHECK_EQ(saved_registers[i], xreg(register_list.PopLowestIndex().code())); | 
| } | 
| -  for (int i = 0; i < kNumberOfCalleeSavedFPRegisters; i++) { | 
| +  for (int i = 0; i < kNumberOfCalleeSavedVRegisters; i++) { | 
| DCHECK(saved_fpregisters[i] == | 
| dreg_bits(fpregister_list.PopLowestIndex().code())); | 
| } | 
| @@ -277,11 +278,11 @@ void Simulator::CheckPCSComplianceAndRun() { | 
|  | 
| // In theory d0 to d7 can be used for return values, but V8 only uses d0 | 
| // for now . | 
| -  fpregister_list = kCallerSavedFP; | 
| +  fpregister_list = kCallerSavedV; | 
| fpregister_list.Remove(d0); | 
|  | 
| CorruptRegisters(®ister_list, kCallerSavedRegisterCorruptionValue); | 
| -  CorruptRegisters(&fpregister_list, kCallerSavedFPRegisterCorruptionValue); | 
| +  CorruptRegisters(&fpregister_list, kCallerSavedVRegisterCorruptionValue); | 
| #endif | 
| } | 
|  | 
| @@ -296,7 +297,7 @@ void Simulator::CorruptRegisters(CPURegList* list, uint64_t value) { | 
| set_xreg(code, value | code); | 
| } | 
| } else { | 
| -    DCHECK(list->type() == CPURegister::kFPRegister); | 
| +    DCHECK_EQ(list->type(), CPURegister::kVRegister); | 
| while (!list->IsEmpty()) { | 
| unsigned code = list->PopLowestIndex().code(); | 
| set_dreg_bits(code, value | code); | 
| @@ -308,10 +309,10 @@ void Simulator::CorruptRegisters(CPURegList* list, uint64_t value) { | 
| void Simulator::CorruptAllCallerSavedCPURegisters() { | 
| // Corrupt alters its parameter so copy them first. | 
| CPURegList register_list = kCallerSaved; | 
| -  CPURegList fpregister_list = kCallerSavedFP; | 
| +  CPURegList fpregister_list = kCallerSavedV; | 
|  | 
| CorruptRegisters(®ister_list, kCallerSavedRegisterCorruptionValue); | 
| -  CorruptRegisters(&fpregister_list, kCallerSavedFPRegisterCorruptionValue); | 
| +  CorruptRegisters(&fpregister_list, kCallerSavedVRegisterCorruptionValue); | 
| } | 
| #endif | 
|  | 
| @@ -419,7 +420,7 @@ void Simulator::ResetState() { | 
| for (unsigned i = 0; i < kNumberOfRegisters; i++) { | 
| set_xreg(i, 0xbadbeef); | 
| } | 
| -  for (unsigned i = 0; i < kNumberOfFPRegisters; i++) { | 
| +  for (unsigned i = 0; i < kNumberOfVRegisters; i++) { | 
| // Set FP registers to a value that is NaN in both 32-bit and 64-bit FP. | 
| set_dreg_bits(i, 0x7ff000007f800001UL); | 
| } | 
| @@ -446,6 +447,10 @@ Simulator::~Simulator() { | 
|  | 
|  | 
| void Simulator::Run() { | 
| +  // Flush any written registers before executing anything, so that | 
| +  // manually-set registers are logged _before_ the first instruction. | 
| +  LogAllWrittenRegisters(); | 
| + | 
| pc_modified_ = false; | 
| while (pc_ != kEndOfSimAddress) { | 
| ExecuteInstruction(); | 
| @@ -823,8 +828,9 @@ const char* Simulator::vreg_names[] = { | 
|  | 
|  | 
| const char* Simulator::WRegNameForCode(unsigned code, Reg31Mode mode) { | 
| -  STATIC_ASSERT(arraysize(Simulator::wreg_names) == (kNumberOfRegisters + 1)); | 
| -  DCHECK(code < kNumberOfRegisters); | 
| +  static_assert(arraysize(Simulator::wreg_names) == (kNumberOfRegisters + 1), | 
| +                "Array must be large enough to hold all register names."); | 
| +  DCHECK_LT(code, static_cast<unsigned>(kNumberOfRegisters)); | 
| // The modulo operator has no effect here, but it silences a broken GCC | 
| // warning about out-of-bounds array accesses. | 
| code %= kNumberOfRegisters; | 
| @@ -838,8 +844,9 @@ const char* Simulator::WRegNameForCode(unsigned code, Reg31Mode mode) { | 
|  | 
|  | 
| const char* Simulator::XRegNameForCode(unsigned code, Reg31Mode mode) { | 
| -  STATIC_ASSERT(arraysize(Simulator::xreg_names) == (kNumberOfRegisters + 1)); | 
| -  DCHECK(code < kNumberOfRegisters); | 
| +  static_assert(arraysize(Simulator::xreg_names) == (kNumberOfRegisters + 1), | 
| +                "Array must be large enough to hold all register names."); | 
| +  DCHECK_LT(code, static_cast<unsigned>(kNumberOfRegisters)); | 
| code %= kNumberOfRegisters; | 
|  | 
| // If the code represents the stack pointer, index the name after zr. | 
| @@ -851,23 +858,70 @@ const char* Simulator::XRegNameForCode(unsigned code, Reg31Mode mode) { | 
|  | 
|  | 
| const char* Simulator::SRegNameForCode(unsigned code) { | 
| -  STATIC_ASSERT(arraysize(Simulator::sreg_names) == kNumberOfFPRegisters); | 
| -  DCHECK(code < kNumberOfFPRegisters); | 
| -  return sreg_names[code % kNumberOfFPRegisters]; | 
| +  static_assert(arraysize(Simulator::sreg_names) == kNumberOfVRegisters, | 
| +                "Array must be large enough to hold all register names."); | 
| +  DCHECK_LT(code, static_cast<unsigned>(kNumberOfVRegisters)); | 
| +  return sreg_names[code % kNumberOfVRegisters]; | 
| } | 
|  | 
|  | 
| const char* Simulator::DRegNameForCode(unsigned code) { | 
| -  STATIC_ASSERT(arraysize(Simulator::dreg_names) == kNumberOfFPRegisters); | 
| -  DCHECK(code < kNumberOfFPRegisters); | 
| -  return dreg_names[code % kNumberOfFPRegisters]; | 
| +  static_assert(arraysize(Simulator::dreg_names) == kNumberOfVRegisters, | 
| +                "Array must be large enough to hold all register names."); | 
| +  DCHECK_LT(code, static_cast<unsigned>(kNumberOfVRegisters)); | 
| +  return dreg_names[code % kNumberOfVRegisters]; | 
| } | 
|  | 
|  | 
| const char* Simulator::VRegNameForCode(unsigned code) { | 
| -  STATIC_ASSERT(arraysize(Simulator::vreg_names) == kNumberOfFPRegisters); | 
| -  DCHECK(code < kNumberOfFPRegisters); | 
| -  return vreg_names[code % kNumberOfFPRegisters]; | 
| +  static_assert(arraysize(Simulator::vreg_names) == kNumberOfVRegisters, | 
| +                "Array must be large enough to hold all register names."); | 
| +  DCHECK_LT(code, static_cast<unsigned>(kNumberOfVRegisters)); | 
| +  return vreg_names[code % kNumberOfVRegisters]; | 
| +} | 
| + | 
| +void LogicVRegister::ReadUintFromMem(VectorFormat vform, int index, | 
| +                                     uint64_t addr) const { | 
| +  switch (LaneSizeInBitsFromFormat(vform)) { | 
| +    case 8: | 
| +      register_.Insert(index, SimMemory::Read<uint8_t>(addr)); | 
| +      break; | 
| +    case 16: | 
| +      register_.Insert(index, SimMemory::Read<uint16_t>(addr)); | 
| +      break; | 
| +    case 32: | 
| +      register_.Insert(index, SimMemory::Read<uint32_t>(addr)); | 
| +      break; | 
| +    case 64: | 
| +      register_.Insert(index, SimMemory::Read<uint64_t>(addr)); | 
| +      break; | 
| +    default: | 
| +      UNREACHABLE(); | 
| +      return; | 
| +  } | 
| +} | 
| + | 
| +void LogicVRegister::WriteUintToMem(VectorFormat vform, int index, | 
| +                                    uint64_t addr) const { | 
| +  switch (LaneSizeInBitsFromFormat(vform)) { | 
| +    case 8: | 
| +      SimMemory::Write<uint8_t>(addr, static_cast<uint8_t>(Uint(vform, index))); | 
| +      break; | 
| +    case 16: | 
| +      SimMemory::Write<uint16_t>(addr, | 
| +                                 static_cast<uint16_t>(Uint(vform, index))); | 
| +      break; | 
| +    case 32: | 
| +      SimMemory::Write<uint32_t>(addr, | 
| +                                 static_cast<uint32_t>(Uint(vform, index))); | 
| +      break; | 
| +    case 64: | 
| +      SimMemory::Write<uint64_t>(addr, Uint(vform, index)); | 
| +      break; | 
| +    default: | 
| +      UNREACHABLE(); | 
| +      return; | 
| +  } | 
| } | 
|  | 
|  | 
| @@ -878,7 +932,7 @@ int Simulator::CodeFromName(const char* name) { | 
| return i; | 
| } | 
| } | 
| -  for (unsigned i = 0; i < kNumberOfFPRegisters; i++) { | 
| +  for (unsigned i = 0; i < kNumberOfVRegisters; i++) { | 
| if ((strcmp(vreg_names[i], name) == 0) || | 
| (strcmp(dreg_names[i], name) == 0) || | 
| (strcmp(sreg_names[i], name) == 0)) { | 
| @@ -1021,16 +1075,6 @@ void Simulator::Extract(Instruction* instr) { | 
| } | 
|  | 
|  | 
| -template<> double Simulator::FPDefaultNaN<double>() const { | 
| -  return kFP64DefaultNaN; | 
| -} | 
| - | 
| - | 
| -template<> float Simulator::FPDefaultNaN<float>() const { | 
| -  return kFP32DefaultNaN; | 
| -} | 
| - | 
| - | 
| void Simulator::FPCompare(double val0, double val1) { | 
| AssertSupportedFPCR(); | 
|  | 
| @@ -1050,6 +1094,111 @@ void Simulator::FPCompare(double val0, double val1) { | 
| LogSystemRegister(NZCV); | 
| } | 
|  | 
| +Simulator::PrintRegisterFormat Simulator::GetPrintRegisterFormatForSize( | 
| +    size_t reg_size, size_t lane_size) { | 
| +  DCHECK_GE(reg_size, lane_size); | 
| + | 
| +  uint32_t format = 0; | 
| +  if (reg_size != lane_size) { | 
| +    switch (reg_size) { | 
| +      default: | 
| +        UNREACHABLE(); | 
| +        break; | 
| +      case kQRegSize: | 
| +        format = kPrintRegAsQVector; | 
| +        break; | 
| +      case kDRegSize: | 
| +        format = kPrintRegAsDVector; | 
| +        break; | 
| +    } | 
| +  } | 
| + | 
| +  switch (lane_size) { | 
| +    default: | 
| +      UNREACHABLE(); | 
| +    case kQRegSize: | 
| +      format |= kPrintReg1Q; | 
| +      break; | 
| +    case kDRegSize: | 
| +      format |= kPrintReg1D; | 
| +      break; | 
| +    case kSRegSize: | 
| +      format |= kPrintReg1S; | 
| +      break; | 
| +    case kHRegSize: | 
| +      format |= kPrintReg1H; | 
| +      break; | 
| +    case kBRegSize: | 
| +      format |= kPrintReg1B; | 
| +      break; | 
| +  } | 
| + | 
| +  // These sizes would be duplicate case labels. | 
| +  static_assert(kXRegSize == kDRegSize, "X and D registers must be same size."); | 
| +  static_assert(kWRegSize == kSRegSize, "W and S registers must be same size."); | 
| +  static_assert(kPrintXReg == kPrintReg1D, | 
| +                "X and D register printing code is shared."); | 
| +  static_assert(kPrintWReg == kPrintReg1S, | 
| +                "W and S register printing code is shared."); | 
| + | 
| +  return static_cast<PrintRegisterFormat>(format); | 
| +} | 
| + | 
| +Simulator::PrintRegisterFormat Simulator::GetPrintRegisterFormat( | 
| +    VectorFormat vform) { | 
| +  switch (vform) { | 
| +    default: | 
| +      UNREACHABLE(); | 
| +      return kPrintReg16B; | 
| +    case kFormat16B: | 
| +      return kPrintReg16B; | 
| +    case kFormat8B: | 
| +      return kPrintReg8B; | 
| +    case kFormat8H: | 
| +      return kPrintReg8H; | 
| +    case kFormat4H: | 
| +      return kPrintReg4H; | 
| +    case kFormat4S: | 
| +      return kPrintReg4S; | 
| +    case kFormat2S: | 
| +      return kPrintReg2S; | 
| +    case kFormat2D: | 
| +      return kPrintReg2D; | 
| +    case kFormat1D: | 
| +      return kPrintReg1D; | 
| + | 
| +    case kFormatB: | 
| +      return kPrintReg1B; | 
| +    case kFormatH: | 
| +      return kPrintReg1H; | 
| +    case kFormatS: | 
| +      return kPrintReg1S; | 
| +    case kFormatD: | 
| +      return kPrintReg1D; | 
| +  } | 
| +} | 
| + | 
| +Simulator::PrintRegisterFormat Simulator::GetPrintRegisterFormatFP( | 
| +    VectorFormat vform) { | 
| +  switch (vform) { | 
| +    default: | 
| +      UNREACHABLE(); | 
| +      return kPrintReg16B; | 
| +    case kFormat4S: | 
| +      return kPrintReg4SFP; | 
| +    case kFormat2S: | 
| +      return kPrintReg2SFP; | 
| +    case kFormat2D: | 
| +      return kPrintReg2DFP; | 
| +    case kFormat1D: | 
| +      return kPrintReg1DFP; | 
| + | 
| +    case kFormatS: | 
| +      return kPrintReg1SFP; | 
| +    case kFormatD: | 
| +      return kPrintReg1DFP; | 
| +  } | 
| +} | 
|  | 
| void Simulator::SetBreakpoint(Instruction* location) { | 
| for (unsigned i = 0; i < breakpoints_.size(); i++) { | 
| @@ -1113,6 +1262,18 @@ void Simulator::PrintInstructionsAt(Instruction* start, uint64_t count) { | 
| } | 
| } | 
|  | 
| +void Simulator::PrintWrittenRegisters() { | 
| +  for (unsigned i = 0; i < kNumberOfRegisters; i++) { | 
| +    if (registers_[i].WrittenSinceLastLog()) PrintRegister(i); | 
| +  } | 
| +} | 
| + | 
| +void Simulator::PrintWrittenVRegisters() { | 
| +  for (unsigned i = 0; i < kNumberOfVRegisters; i++) { | 
| +    // At this point there is no type information, so print as a raw 1Q. | 
| +    if (vregisters_[i].WrittenSinceLastLog()) PrintVRegister(i, kPrintReg1Q); | 
| +  } | 
| +} | 
|  | 
| void Simulator::PrintSystemRegisters() { | 
| PrintSystemRegister(NZCV); | 
| @@ -1126,58 +1287,217 @@ void Simulator::PrintRegisters() { | 
| } | 
| } | 
|  | 
| - | 
| -void Simulator::PrintFPRegisters() { | 
| -  for (unsigned i = 0; i < kNumberOfFPRegisters; i++) { | 
| -    PrintFPRegister(i); | 
| +void Simulator::PrintVRegisters() { | 
| +  for (unsigned i = 0; i < kNumberOfVRegisters; i++) { | 
| +    // At this point there is no type information, so print as a raw 1Q. | 
| +    PrintVRegister(i, kPrintReg1Q); | 
| } | 
| } | 
|  | 
|  | 
| void Simulator::PrintRegister(unsigned code, Reg31Mode r31mode) { | 
| +  registers_[code].NotifyRegisterLogged(); | 
| + | 
| // Don't print writes into xzr. | 
| if ((code == kZeroRegCode) && (r31mode == Reg31IsZeroRegister)) { | 
| return; | 
| } | 
|  | 
| -  // The template is "# x<code>:value". | 
| -  fprintf(stream_, "# %s%5s: %s0x%016" PRIx64 "%s\n", | 
| -          clr_reg_name, XRegNameForCode(code, r31mode), | 
| -          clr_reg_value, reg<uint64_t>(code, r31mode), clr_normal); | 
| +  // The template for all x and w registers: | 
| +  //   "# x{code}: 0x{value}" | 
| +  //   "# w{code}: 0x{value}" | 
| + | 
| +  PrintRegisterRawHelper(code, r31mode); | 
| +  fprintf(stream_, "\n"); | 
| +} | 
| + | 
| +// Print a register's name and raw value. | 
| +// | 
| +// The `bytes` and `lsb` arguments can be used to limit the bytes that are | 
| +// printed. These arguments are intended for use in cases where register hasn't | 
| +// actually been updated (such as in PrintVWrite). | 
| +// | 
| +// No newline is printed. This allows the caller to print more details (such as | 
| +// a floating-point interpretation or a memory access annotation). | 
| +void Simulator::PrintVRegisterRawHelper(unsigned code, int bytes, int lsb) { | 
| +  // The template for vector types: | 
| +  //   "# v{code}: 0xffeeddccbbaa99887766554433221100". | 
| +  // An example with bytes=4 and lsb=8: | 
| +  //   "# v{code}:         0xbbaa9988                ". | 
| +  fprintf(stream_, "# %s%5s: %s", clr_vreg_name, VRegNameForCode(code), | 
| +          clr_vreg_value); | 
| + | 
| +  int msb = lsb + bytes - 1; | 
| +  int byte = kQRegSize - 1; | 
| + | 
| +  // Print leading padding spaces. (Two spaces per byte.) | 
| +  while (byte > msb) { | 
| +    fprintf(stream_, "  "); | 
| +    byte--; | 
| +  } | 
| + | 
| +  // Print the specified part of the value, byte by byte. | 
| +  qreg_t rawbits = qreg(code); | 
| +  fprintf(stream_, "0x"); | 
| +  while (byte >= lsb) { | 
| +    fprintf(stream_, "%02x", rawbits.val[byte]); | 
| +    byte--; | 
| +  } | 
| + | 
| +  // Print trailing padding spaces. | 
| +  while (byte >= 0) { | 
| +    fprintf(stream_, "  "); | 
| +    byte--; | 
| +  } | 
| +  fprintf(stream_, "%s", clr_normal); | 
| +} | 
| + | 
| +// Print each of the specified lanes of a register as a float or double value. | 
| +// | 
| +// The `lane_count` and `lslane` arguments can be used to limit the lanes that | 
| +// are printed. These arguments are intended for use in cases where register | 
| +// hasn't actually been updated (such as in PrintVWrite). | 
| +// | 
| +// No newline is printed. This allows the caller to print more details (such as | 
| +// a memory access annotation). | 
| +void Simulator::PrintVRegisterFPHelper(unsigned code, | 
| +                                       unsigned lane_size_in_bytes, | 
| +                                       int lane_count, int rightmost_lane) { | 
| +  DCHECK((lane_size_in_bytes == kSRegSize) || | 
| +         (lane_size_in_bytes == kDRegSize)); | 
| + | 
| +  unsigned msb = (lane_count + rightmost_lane) * lane_size_in_bytes; | 
| +  DCHECK_LE(msb, static_cast<unsigned>(kQRegSize)); | 
| + | 
| +  // For scalar types ((lane_count == 1) && (rightmost_lane == 0)), a register | 
| +  // name is used: | 
| +  //   " (s{code}: {value})" | 
| +  //   " (d{code}: {value})" | 
| +  // For vector types, "..." is used to represent one or more omitted lanes. | 
| +  //   " (..., {value}, {value}, ...)" | 
| +  if ((lane_count == 1) && (rightmost_lane == 0)) { | 
| +    const char* name = (lane_size_in_bytes == kSRegSize) | 
| +                           ? SRegNameForCode(code) | 
| +                           : DRegNameForCode(code); | 
| +    fprintf(stream_, " (%s%s: ", clr_vreg_name, name); | 
| +  } else { | 
| +    if (msb < (kQRegSize - 1)) { | 
| +      fprintf(stream_, " (..., "); | 
| +    } else { | 
| +      fprintf(stream_, " ("); | 
| +    } | 
| +  } | 
| + | 
| +  // Print the list of values. | 
| +  const char* separator = ""; | 
| +  int leftmost_lane = rightmost_lane + lane_count - 1; | 
| +  for (int lane = leftmost_lane; lane >= rightmost_lane; lane--) { | 
| +    double value = (lane_size_in_bytes == kSRegSize) | 
| +                       ? vreg(code).Get<float>(lane) | 
| +                       : vreg(code).Get<double>(lane); | 
| +    fprintf(stream_, "%s%s%#g%s", separator, clr_vreg_value, value, clr_normal); | 
| +    separator = ", "; | 
| +  } | 
| + | 
| +  if (rightmost_lane > 0) { | 
| +    fprintf(stream_, ", ..."); | 
| +  } | 
| +  fprintf(stream_, ")"); | 
| } | 
|  | 
| +// Print a register's name and raw value. | 
| +// | 
| +// Only the least-significant `size_in_bytes` bytes of the register are printed, | 
| +// but the value is aligned as if the whole register had been printed. | 
| +// | 
| +// For typical register updates, size_in_bytes should be set to kXRegSize | 
| +// -- the default -- so that the whole register is printed. Other values of | 
| +// size_in_bytes are intended for use when the register hasn't actually been | 
| +// updated (such as in PrintWrite). | 
| +// | 
| +// No newline is printed. This allows the caller to print more details (such as | 
| +// a memory access annotation). | 
| +void Simulator::PrintRegisterRawHelper(unsigned code, Reg31Mode r31mode, | 
| +                                       int size_in_bytes) { | 
| +  // The template for all supported sizes. | 
| +  //   "# x{code}: 0xffeeddccbbaa9988" | 
| +  //   "# w{code}:         0xbbaa9988" | 
| +  //   "# w{code}<15:0>:       0x9988" | 
| +  //   "# w{code}<7:0>:          0x88" | 
| +  unsigned padding_chars = (kXRegSize - size_in_bytes) * 2; | 
| + | 
| +  const char* name = ""; | 
| +  const char* suffix = ""; | 
| +  switch (size_in_bytes) { | 
| +    case kXRegSize: | 
| +      name = XRegNameForCode(code, r31mode); | 
| +      break; | 
| +    case kWRegSize: | 
| +      name = WRegNameForCode(code, r31mode); | 
| +      break; | 
| +    case 2: | 
| +      name = WRegNameForCode(code, r31mode); | 
| +      suffix = "<15:0>"; | 
| +      padding_chars -= strlen(suffix); | 
| +      break; | 
| +    case 1: | 
| +      name = WRegNameForCode(code, r31mode); | 
| +      suffix = "<7:0>"; | 
| +      padding_chars -= strlen(suffix); | 
| +      break; | 
| +    default: | 
| +      UNREACHABLE(); | 
| +  } | 
| +  fprintf(stream_, "# %s%5s%s: ", clr_reg_name, name, suffix); | 
| + | 
| +  // Print leading padding spaces. | 
| +  DCHECK_LT(padding_chars, kXRegSize * 2U); | 
| +  for (unsigned i = 0; i < padding_chars; i++) { | 
| +    putc(' ', stream_); | 
| +  } | 
| + | 
| +  // Print the specified bits in hexadecimal format. | 
| +  uint64_t bits = reg<uint64_t>(code, r31mode); | 
| +  bits &= kXRegMask >> ((kXRegSize - size_in_bytes) * 8); | 
| +  static_assert(sizeof(bits) == kXRegSize, | 
| +                "X registers and uint64_t must be the same size."); | 
|  | 
| -void Simulator::PrintFPRegister(unsigned code, PrintFPRegisterSizes sizes) { | 
| -  // The template is "# v<code>:bits (d<code>:value, ...)". | 
| +  int chars = size_in_bytes * 2; | 
| +  fprintf(stream_, "%s0x%0*" PRIx64 "%s", clr_reg_value, chars, bits, | 
| +          clr_normal); | 
| +} | 
|  | 
| -  DCHECK(sizes != 0); | 
| -  DCHECK((sizes & kPrintAllFPRegValues) == sizes); | 
| +void Simulator::PrintVRegister(unsigned code, PrintRegisterFormat format) { | 
| +  vregisters_[code].NotifyRegisterLogged(); | 
|  | 
| -  // Print the raw bits. | 
| -  fprintf(stream_, "# %s%5s: %s0x%016" PRIx64 "%s (", | 
| -          clr_fpreg_name, VRegNameForCode(code), | 
| -          clr_fpreg_value, fpreg<uint64_t>(code), clr_normal); | 
| +  int lane_size_log2 = format & kPrintRegLaneSizeMask; | 
|  | 
| -  // Print all requested value interpretations. | 
| -  bool need_separator = false; | 
| -  if (sizes & kPrintDRegValue) { | 
| -    fprintf(stream_, "%s%s%s: %s%g%s", | 
| -            need_separator ? ", " : "", | 
| -            clr_fpreg_name, DRegNameForCode(code), | 
| -            clr_fpreg_value, fpreg<double>(code), clr_normal); | 
| -    need_separator = true; | 
| +  int reg_size_log2; | 
| +  if (format & kPrintRegAsQVector) { | 
| +    reg_size_log2 = kQRegSizeLog2; | 
| +  } else if (format & kPrintRegAsDVector) { | 
| +    reg_size_log2 = kDRegSizeLog2; | 
| +  } else { | 
| +    // Scalar types. | 
| +    reg_size_log2 = lane_size_log2; | 
| } | 
|  | 
| -  if (sizes & kPrintSRegValue) { | 
| -    fprintf(stream_, "%s%s%s: %s%g%s", | 
| -            need_separator ? ", " : "", | 
| -            clr_fpreg_name, SRegNameForCode(code), | 
| -            clr_fpreg_value, fpreg<float>(code), clr_normal); | 
| -    need_separator = true; | 
| +  int lane_count = 1 << (reg_size_log2 - lane_size_log2); | 
| +  int lane_size = 1 << lane_size_log2; | 
| + | 
| +  // The template for vector types: | 
| +  //   "# v{code}: 0x{rawbits} (..., {value}, ...)". | 
| +  // The template for scalar types: | 
| +  //   "# v{code}: 0x{rawbits} ({reg}:{value})". | 
| +  // The values in parentheses after the bit representations are floating-point | 
| +  // interpretations. They are displayed only if the kPrintVRegAsFP bit is set. | 
| + | 
| +  PrintVRegisterRawHelper(code); | 
| +  if (format & kPrintRegAsFP) { | 
| +    PrintVRegisterFPHelper(code, lane_size, lane_count); | 
| } | 
|  | 
| -  // End the value list. | 
| -  fprintf(stream_, ")\n"); | 
| +  fprintf(stream_, "\n"); | 
| } | 
|  | 
|  | 
| @@ -1209,109 +1529,61 @@ void Simulator::PrintSystemRegister(SystemRegister id) { | 
| } | 
| } | 
|  | 
| +void Simulator::PrintRead(uintptr_t address, unsigned reg_code, | 
| +                          PrintRegisterFormat format) { | 
| +  registers_[reg_code].NotifyRegisterLogged(); | 
|  | 
| -void Simulator::PrintRead(uintptr_t address, | 
| -                          size_t size, | 
| -                          unsigned reg_code) { | 
| -  USE(size);  // Size is unused here. | 
| - | 
| -  // The template is "# x<code>:value <- address". | 
| -  fprintf(stream_, "# %s%5s: %s0x%016" PRIx64 "%s", | 
| -          clr_reg_name, XRegNameForCode(reg_code), | 
| -          clr_reg_value, reg<uint64_t>(reg_code), clr_normal); | 
| +  USE(format); | 
|  | 
| +  // The template is "# {reg}: 0x{value} <- {address}". | 
| +  PrintRegisterRawHelper(reg_code, Reg31IsZeroRegister); | 
| fprintf(stream_, " <- %s0x%016" PRIxPTR "%s\n", | 
| clr_memory_address, address, clr_normal); | 
| } | 
|  | 
| +void Simulator::PrintVRead(uintptr_t address, unsigned reg_code, | 
| +                           PrintRegisterFormat format, unsigned lane) { | 
| +  vregisters_[reg_code].NotifyRegisterLogged(); | 
|  | 
| -void Simulator::PrintReadFP(uintptr_t address, | 
| -                            size_t size, | 
| -                            unsigned reg_code) { | 
| -  // The template is "# reg:bits (reg:value) <- address". | 
| -  switch (size) { | 
| -    case kSRegSize: | 
| -      fprintf(stream_, "# %s%5s: %s0x%016" PRIx64 "%s (%s%s: %s%gf%s)", | 
| -              clr_fpreg_name, VRegNameForCode(reg_code), | 
| -              clr_fpreg_value, fpreg<uint64_t>(reg_code), clr_normal, | 
| -              clr_fpreg_name, SRegNameForCode(reg_code), | 
| -              clr_fpreg_value, fpreg<float>(reg_code), clr_normal); | 
| -      break; | 
| -    case kDRegSize: | 
| -      fprintf(stream_, "# %s%5s: %s0x%016" PRIx64 "%s (%s%s: %s%g%s)", | 
| -              clr_fpreg_name, VRegNameForCode(reg_code), | 
| -              clr_fpreg_value, fpreg<uint64_t>(reg_code), clr_normal, | 
| -              clr_fpreg_name, DRegNameForCode(reg_code), | 
| -              clr_fpreg_value, fpreg<double>(reg_code), clr_normal); | 
| -      break; | 
| -    default: | 
| -      UNREACHABLE(); | 
| +  // The template is "# v{code}: 0x{rawbits} <- address". | 
| +  PrintVRegisterRawHelper(reg_code); | 
| +  if (format & kPrintRegAsFP) { | 
| +    PrintVRegisterFPHelper(reg_code, GetPrintRegLaneSizeInBytes(format), | 
| +                           GetPrintRegLaneCount(format), lane); | 
| } | 
| - | 
| fprintf(stream_, " <- %s0x%016" PRIxPTR "%s\n", | 
| clr_memory_address, address, clr_normal); | 
| } | 
|  | 
| +void Simulator::PrintWrite(uintptr_t address, unsigned reg_code, | 
| +                           PrintRegisterFormat format) { | 
| +  DCHECK_EQ(GetPrintRegLaneCount(format), 1U); | 
|  | 
| -void Simulator::PrintWrite(uintptr_t address, | 
| -                           size_t size, | 
| -                           unsigned reg_code) { | 
| -  // The template is "# reg:value -> address". To keep the trace tidy and | 
| -  // readable, the value is aligned with the values in the register trace. | 
| -  switch (size) { | 
| -    case kByteSizeInBytes: | 
| -      fprintf(stream_, "# %s%5s<7:0>:          %s0x%02" PRIx8 "%s", | 
| -              clr_reg_name, WRegNameForCode(reg_code), | 
| -              clr_reg_value, reg<uint8_t>(reg_code), clr_normal); | 
| -      break; | 
| -    case kHalfWordSizeInBytes: | 
| -      fprintf(stream_, "# %s%5s<15:0>:       %s0x%04" PRIx16 "%s", | 
| -              clr_reg_name, WRegNameForCode(reg_code), | 
| -              clr_reg_value, reg<uint16_t>(reg_code), clr_normal); | 
| -      break; | 
| -    case kWRegSize: | 
| -      fprintf(stream_, "# %s%5s:         %s0x%08" PRIx32 "%s", | 
| -              clr_reg_name, WRegNameForCode(reg_code), | 
| -              clr_reg_value, reg<uint32_t>(reg_code), clr_normal); | 
| -      break; | 
| -    case kXRegSize: | 
| -      fprintf(stream_, "# %s%5s: %s0x%016" PRIx64 "%s", | 
| -              clr_reg_name, XRegNameForCode(reg_code), | 
| -              clr_reg_value, reg<uint64_t>(reg_code), clr_normal); | 
| -      break; | 
| -    default: | 
| -      UNREACHABLE(); | 
| -  } | 
| - | 
| +  // The template is "# v{code}: 0x{value} -> {address}". To keep the trace tidy | 
| +  // and readable, the value is aligned with the values in the register trace. | 
| +  PrintRegisterRawHelper(reg_code, Reg31IsZeroRegister, | 
| +                         GetPrintRegSizeInBytes(format)); | 
| fprintf(stream_, " -> %s0x%016" PRIxPTR "%s\n", | 
| clr_memory_address, address, clr_normal); | 
| } | 
|  | 
| - | 
| -void Simulator::PrintWriteFP(uintptr_t address, | 
| -                             size_t size, | 
| -                             unsigned reg_code) { | 
| -  // The template is "# reg:bits (reg:value) -> address". To keep the trace tidy | 
| -  // and readable, the value is aligned with the values in the register trace. | 
| -  switch (size) { | 
| -    case kSRegSize: | 
| -      fprintf(stream_, "# %s%5s<31:0>:   %s0x%08" PRIx32 "%s (%s%s: %s%gf%s)", | 
| -              clr_fpreg_name, VRegNameForCode(reg_code), | 
| -              clr_fpreg_value, fpreg<uint32_t>(reg_code), clr_normal, | 
| -              clr_fpreg_name, SRegNameForCode(reg_code), | 
| -              clr_fpreg_value, fpreg<float>(reg_code), clr_normal); | 
| -      break; | 
| -    case kDRegSize: | 
| -      fprintf(stream_, "# %s%5s: %s0x%016" PRIx64 "%s (%s%s: %s%g%s)", | 
| -              clr_fpreg_name, VRegNameForCode(reg_code), | 
| -              clr_fpreg_value, fpreg<uint64_t>(reg_code), clr_normal, | 
| -              clr_fpreg_name, DRegNameForCode(reg_code), | 
| -              clr_fpreg_value, fpreg<double>(reg_code), clr_normal); | 
| -      break; | 
| -    default: | 
| -      UNREACHABLE(); | 
| +void Simulator::PrintVWrite(uintptr_t address, unsigned reg_code, | 
| +                            PrintRegisterFormat format, unsigned lane) { | 
| +  // The templates: | 
| +  //   "# v{code}: 0x{rawbits} -> {address}" | 
| +  //   "# v{code}: 0x{rawbits} (..., {value}, ...) -> {address}". | 
| +  //   "# v{code}: 0x{rawbits} ({reg}:{value}) -> {address}" | 
| +  // Because this trace doesn't represent a change to the source register's | 
| +  // value, only the relevant part of the value is printed. To keep the trace | 
| +  // tidy and readable, the raw value is aligned with the other values in the | 
| +  // register trace. | 
| +  int lane_count = GetPrintRegLaneCount(format); | 
| +  int lane_size = GetPrintRegLaneSizeInBytes(format); | 
| +  int reg_size = GetPrintRegSizeInBytes(format); | 
| +  PrintVRegisterRawHelper(reg_code, reg_size, lane_size * lane); | 
| +  if (format & kPrintRegAsFP) { | 
| +    PrintVRegisterFPHelper(reg_code, lane_size, lane_count, lane); | 
| } | 
| - | 
| fprintf(stream_, " -> %s0x%016" PRIxPTR "%s\n", | 
| clr_memory_address, address, clr_normal); | 
| } | 
| @@ -1657,10 +1929,10 @@ void Simulator::LoadStoreHelper(Instruction* instr, | 
| stack = sp(); | 
| } | 
|  | 
| -  LoadStoreOp op = static_cast<LoadStoreOp>(instr->Mask(LoadStoreOpMask)); | 
| +  LoadStoreOp op = static_cast<LoadStoreOp>(instr->Mask(LoadStoreMask)); | 
| switch (op) { | 
| // Use _no_log variants to suppress the register trace (LOG_REGS, | 
| -    // LOG_FP_REGS). We will print a more detailed log. | 
| +    // LOG_VREGS). We will print a more detailed log. | 
| case LDRB_w:  set_wreg_no_log(srcdst, MemoryRead<uint8_t>(address)); break; | 
| case LDRH_w:  set_wreg_no_log(srcdst, MemoryRead<uint16_t>(address)); break; | 
| case LDR_w:   set_wreg_no_log(srcdst, MemoryRead<uint32_t>(address)); break; | 
| @@ -1670,33 +1942,55 @@ void Simulator::LoadStoreHelper(Instruction* instr, | 
| case LDRSB_x: set_xreg_no_log(srcdst, MemoryRead<int8_t>(address)); break; | 
| case LDRSH_x: set_xreg_no_log(srcdst, MemoryRead<int16_t>(address)); break; | 
| case LDRSW_x: set_xreg_no_log(srcdst, MemoryRead<int32_t>(address)); break; | 
| +    case LDR_b: | 
| +      set_breg_no_log(srcdst, MemoryRead<uint8_t>(address)); | 
| +      break; | 
| +    case LDR_h: | 
| +      set_hreg_no_log(srcdst, MemoryRead<uint16_t>(address)); | 
| +      break; | 
| case LDR_s:   set_sreg_no_log(srcdst, MemoryRead<float>(address)); break; | 
| case LDR_d:   set_dreg_no_log(srcdst, MemoryRead<double>(address)); break; | 
| +    case LDR_q: | 
| +      set_qreg_no_log(srcdst, MemoryRead<qreg_t>(address)); | 
| +      break; | 
|  | 
| case STRB_w:  MemoryWrite<uint8_t>(address, wreg(srcdst)); break; | 
| case STRH_w:  MemoryWrite<uint16_t>(address, wreg(srcdst)); break; | 
| case STR_w:   MemoryWrite<uint32_t>(address, wreg(srcdst)); break; | 
| case STR_x:   MemoryWrite<uint64_t>(address, xreg(srcdst)); break; | 
| +    case STR_b: | 
| +      MemoryWrite<uint8_t>(address, breg(srcdst)); | 
| +      break; | 
| +    case STR_h: | 
| +      MemoryWrite<uint16_t>(address, hreg(srcdst)); | 
| +      break; | 
| case STR_s:   MemoryWrite<float>(address, sreg(srcdst)); break; | 
| case STR_d:   MemoryWrite<double>(address, dreg(srcdst)); break; | 
| +    case STR_q: | 
| +      MemoryWrite<qreg_t>(address, qreg(srcdst)); | 
| +      break; | 
|  | 
| default: UNIMPLEMENTED(); | 
| } | 
|  | 
| // Print a detailed trace (including the memory address) instead of the basic | 
| // register:value trace generated by set_*reg(). | 
| -  size_t access_size = 1 << instr->SizeLS(); | 
| +  unsigned access_size = 1 << instr->SizeLS(); | 
| if (instr->IsLoad()) { | 
| if ((op == LDR_s) || (op == LDR_d)) { | 
| -      LogReadFP(address, access_size, srcdst); | 
| +      LogVRead(address, srcdst, GetPrintRegisterFormatForSizeFP(access_size)); | 
| +    } else if ((op == LDR_b) || (op == LDR_h) || (op == LDR_q)) { | 
| +      LogVRead(address, srcdst, GetPrintRegisterFormatForSize(access_size)); | 
| } else { | 
| -      LogRead(address, access_size, srcdst); | 
| +      LogRead(address, srcdst, GetPrintRegisterFormatForSize(access_size)); | 
| } | 
| } else { | 
| if ((op == STR_s) || (op == STR_d)) { | 
| -      LogWriteFP(address, access_size, srcdst); | 
| +      LogVWrite(address, srcdst, GetPrintRegisterFormatForSizeFP(access_size)); | 
| +    } else if ((op == STR_b) || (op == STR_h) || (op == STR_q)) { | 
| +      LogVWrite(address, srcdst, GetPrintRegisterFormatForSize(access_size)); | 
| } else { | 
| -      LogWrite(address, access_size, srcdst); | 
| +      LogWrite(address, srcdst, GetPrintRegisterFormatForSize(access_size)); | 
| } | 
| } | 
|  | 
| @@ -1780,61 +2074,73 @@ void Simulator::LoadStorePairHelper(Instruction* instr, | 
|  | 
| switch (op) { | 
| // Use _no_log variants to suppress the register trace (LOG_REGS, | 
| -    // LOG_FP_REGS). We will print a more detailed log. | 
| +    // LOG_VREGS). We will print a more detailed log. | 
| case LDP_w: { | 
| -      DCHECK(access_size == kWRegSize); | 
| +      DCHECK_EQ(access_size, static_cast<unsigned>(kWRegSize)); | 
| set_wreg_no_log(rt, MemoryRead<uint32_t>(address)); | 
| set_wreg_no_log(rt2, MemoryRead<uint32_t>(address2)); | 
| break; | 
| } | 
| case LDP_s: { | 
| -      DCHECK(access_size == kSRegSize); | 
| +      DCHECK_EQ(access_size, static_cast<unsigned>(kSRegSize)); | 
| set_sreg_no_log(rt, MemoryRead<float>(address)); | 
| set_sreg_no_log(rt2, MemoryRead<float>(address2)); | 
| break; | 
| } | 
| case LDP_x: { | 
| -      DCHECK(access_size == kXRegSize); | 
| +      DCHECK_EQ(access_size, static_cast<unsigned>(kXRegSize)); | 
| set_xreg_no_log(rt, MemoryRead<uint64_t>(address)); | 
| set_xreg_no_log(rt2, MemoryRead<uint64_t>(address2)); | 
| break; | 
| } | 
| case LDP_d: { | 
| -      DCHECK(access_size == kDRegSize); | 
| +      DCHECK_EQ(access_size, static_cast<unsigned>(kDRegSize)); | 
| set_dreg_no_log(rt, MemoryRead<double>(address)); | 
| set_dreg_no_log(rt2, MemoryRead<double>(address2)); | 
| break; | 
| } | 
| +    case LDP_q: { | 
| +      DCHECK_EQ(access_size, static_cast<unsigned>(kQRegSize)); | 
| +      set_qreg(rt, MemoryRead<qreg_t>(address), NoRegLog); | 
| +      set_qreg(rt2, MemoryRead<qreg_t>(address2), NoRegLog); | 
| +      break; | 
| +    } | 
| case LDPSW_x: { | 
| -      DCHECK(access_size == kWRegSize); | 
| +      DCHECK_EQ(access_size, static_cast<unsigned>(kWRegSize)); | 
| set_xreg_no_log(rt, MemoryRead<int32_t>(address)); | 
| set_xreg_no_log(rt2, MemoryRead<int32_t>(address2)); | 
| break; | 
| } | 
| case STP_w: { | 
| -      DCHECK(access_size == kWRegSize); | 
| +      DCHECK_EQ(access_size, static_cast<unsigned>(kWRegSize)); | 
| MemoryWrite<uint32_t>(address, wreg(rt)); | 
| MemoryWrite<uint32_t>(address2, wreg(rt2)); | 
| break; | 
| } | 
| case STP_s: { | 
| -      DCHECK(access_size == kSRegSize); | 
| +      DCHECK_EQ(access_size, static_cast<unsigned>(kSRegSize)); | 
| MemoryWrite<float>(address, sreg(rt)); | 
| MemoryWrite<float>(address2, sreg(rt2)); | 
| break; | 
| } | 
| case STP_x: { | 
| -      DCHECK(access_size == kXRegSize); | 
| +      DCHECK_EQ(access_size, static_cast<unsigned>(kXRegSize)); | 
| MemoryWrite<uint64_t>(address, xreg(rt)); | 
| MemoryWrite<uint64_t>(address2, xreg(rt2)); | 
| break; | 
| } | 
| case STP_d: { | 
| -      DCHECK(access_size == kDRegSize); | 
| +      DCHECK_EQ(access_size, static_cast<unsigned>(kDRegSize)); | 
| MemoryWrite<double>(address, dreg(rt)); | 
| MemoryWrite<double>(address2, dreg(rt2)); | 
| break; | 
| } | 
| +    case STP_q: { | 
| +      DCHECK_EQ(access_size, static_cast<unsigned>(kQRegSize)); | 
| +      MemoryWrite<qreg_t>(address, qreg(rt)); | 
| +      MemoryWrite<qreg_t>(address2, qreg(rt2)); | 
| +      break; | 
| +    } | 
| default: UNREACHABLE(); | 
| } | 
|  | 
| @@ -1842,19 +2148,25 @@ void Simulator::LoadStorePairHelper(Instruction* instr, | 
| // register:value trace generated by set_*reg(). | 
| if (instr->IsLoad()) { | 
| if ((op == LDP_s) || (op == LDP_d)) { | 
| -      LogReadFP(address, access_size, rt); | 
| -      LogReadFP(address2, access_size, rt2); | 
| +      LogVRead(address, rt, GetPrintRegisterFormatForSizeFP(access_size)); | 
| +      LogVRead(address2, rt2, GetPrintRegisterFormatForSizeFP(access_size)); | 
| +    } else if (op == LDP_q) { | 
| +      LogVRead(address, rt, GetPrintRegisterFormatForSize(access_size)); | 
| +      LogVRead(address2, rt2, GetPrintRegisterFormatForSize(access_size)); | 
| } else { | 
| -      LogRead(address, access_size, rt); | 
| -      LogRead(address2, access_size, rt2); | 
| +      LogRead(address, rt, GetPrintRegisterFormatForSize(access_size)); | 
| +      LogRead(address2, rt2, GetPrintRegisterFormatForSize(access_size)); | 
| } | 
| } else { | 
| if ((op == STP_s) || (op == STP_d)) { | 
| -      LogWriteFP(address, access_size, rt); | 
| -      LogWriteFP(address2, access_size, rt2); | 
| +      LogVWrite(address, rt, GetPrintRegisterFormatForSizeFP(access_size)); | 
| +      LogVWrite(address2, rt2, GetPrintRegisterFormatForSizeFP(access_size)); | 
| +    } else if (op == STP_q) { | 
| +      LogVWrite(address, rt, GetPrintRegisterFormatForSize(access_size)); | 
| +      LogVWrite(address2, rt2, GetPrintRegisterFormatForSize(access_size)); | 
| } else { | 
| -      LogWrite(address, access_size, rt); | 
| -      LogWrite(address2, access_size, rt2); | 
| +      LogWrite(address, rt, GetPrintRegisterFormatForSize(access_size)); | 
| +      LogWrite(address2, rt2, GetPrintRegisterFormatForSize(access_size)); | 
| } | 
| } | 
|  | 
| @@ -1885,22 +2197,22 @@ void Simulator::VisitLoadLiteral(Instruction* instr) { | 
|  | 
| switch (instr->Mask(LoadLiteralMask)) { | 
| // Use _no_log variants to suppress the register trace (LOG_REGS, | 
| -    // LOG_FP_REGS), then print a more detailed log. | 
| +    // LOG_VREGS), then print a more detailed log. | 
| case LDR_w_lit: | 
| set_wreg_no_log(rt, MemoryRead<uint32_t>(address)); | 
| -      LogRead(address, kWRegSize, rt); | 
| +      LogRead(address, rt, kPrintWReg); | 
| break; | 
| case LDR_x_lit: | 
| set_xreg_no_log(rt, MemoryRead<uint64_t>(address)); | 
| -      LogRead(address, kXRegSize, rt); | 
| +      LogRead(address, rt, kPrintXReg); | 
| break; | 
| case LDR_s_lit: | 
| set_sreg_no_log(rt, MemoryRead<float>(address)); | 
| -      LogReadFP(address, kSRegSize, rt); | 
| +      LogVRead(address, rt, kPrintSReg); | 
| break; | 
| case LDR_d_lit: | 
| set_dreg_no_log(rt, MemoryRead<double>(address)); | 
| -      LogReadFP(address, kDRegSize, rt); | 
| +      LogVRead(address, rt, kPrintDReg); | 
| break; | 
| default: UNREACHABLE(); | 
| } | 
| @@ -1993,7 +2305,7 @@ void Simulator::VisitLoadStoreAcquireRelease(Instruction* instr) { | 
| default: | 
| UNIMPLEMENTED(); | 
| } | 
| -    LogRead(address, access_size, rt); | 
| +    LogRead(address, rt, GetPrintRegisterFormatForSize(access_size)); | 
| } else { | 
| if (is_exclusive) { | 
| unsigned rs = instr->Rs(); | 
| @@ -2014,7 +2326,7 @@ void Simulator::VisitLoadStoreAcquireRelease(Instruction* instr) { | 
| default: | 
| UNIMPLEMENTED(); | 
| } | 
| -        LogWrite(address, access_size, rt); | 
| +        LogWrite(address, rt, GetPrintRegisterFormatForSize(access_size)); | 
| set_wreg(rs, 0); | 
| } else { | 
| set_wreg(rs, 1); | 
| @@ -2511,62 +2823,22 @@ void Simulator::VisitFPFixedPointConvert(Instruction* instr) { | 
| } | 
|  | 
|  | 
| -int32_t Simulator::FPToInt32(double value, FPRounding rmode) { | 
| -  value = FPRoundInt(value, rmode); | 
| -  if (value >= kWMaxInt) { | 
| -    return kWMaxInt; | 
| -  } else if (value < kWMinInt) { | 
| -    return kWMinInt; | 
| -  } | 
| -  return std::isnan(value) ? 0 : static_cast<int32_t>(value); | 
| -} | 
| - | 
| - | 
| -int64_t Simulator::FPToInt64(double value, FPRounding rmode) { | 
| -  value = FPRoundInt(value, rmode); | 
| -  if (value >= kXMaxInt) { | 
| -    return kXMaxInt; | 
| -  } else if (value < kXMinInt) { | 
| -    return kXMinInt; | 
| -  } | 
| -  return std::isnan(value) ? 0 : static_cast<int64_t>(value); | 
| -} | 
| - | 
| - | 
| -uint32_t Simulator::FPToUInt32(double value, FPRounding rmode) { | 
| -  value = FPRoundInt(value, rmode); | 
| -  if (value >= kWMaxUInt) { | 
| -    return kWMaxUInt; | 
| -  } else if (value < 0.0) { | 
| -    return 0; | 
| -  } | 
| -  return std::isnan(value) ? 0 : static_cast<uint32_t>(value); | 
| -} | 
| - | 
| - | 
| -uint64_t Simulator::FPToUInt64(double value, FPRounding rmode) { | 
| -  value = FPRoundInt(value, rmode); | 
| -  if (value >= kXMaxUInt) { | 
| -    return kXMaxUInt; | 
| -  } else if (value < 0.0) { | 
| -    return 0; | 
| -  } | 
| -  return std::isnan(value) ? 0 : static_cast<uint64_t>(value); | 
| -} | 
| - | 
| - | 
| void Simulator::VisitFPCompare(Instruction* instr) { | 
| AssertSupportedFPCR(); | 
|  | 
| -  unsigned reg_size = (instr->Mask(FP64) == FP64) ? kDRegSizeInBits | 
| -                                                  : kSRegSizeInBits; | 
| -  double fn_val = fpreg(reg_size, instr->Rn()); | 
| - | 
| switch (instr->Mask(FPCompareMask)) { | 
| case FCMP_s: | 
| -    case FCMP_d: FPCompare(fn_val, fpreg(reg_size, instr->Rm())); break; | 
| +      FPCompare(sreg(instr->Rn()), sreg(instr->Rm())); | 
| +      break; | 
| +    case FCMP_d: | 
| +      FPCompare(dreg(instr->Rn()), dreg(instr->Rm())); | 
| +      break; | 
| case FCMP_s_zero: | 
| -    case FCMP_d_zero: FPCompare(fn_val, 0.0); break; | 
| +      FPCompare(sreg(instr->Rn()), 0.0f); | 
| +      break; | 
| +    case FCMP_d_zero: | 
| +      FPCompare(dreg(instr->Rn()), 0.0); | 
| +      break; | 
| default: UNIMPLEMENTED(); | 
| } | 
| } | 
| @@ -2577,13 +2849,16 @@ void Simulator::VisitFPConditionalCompare(Instruction* instr) { | 
|  | 
| switch (instr->Mask(FPConditionalCompareMask)) { | 
| case FCCMP_s: | 
| +      if (ConditionPassed(static_cast<Condition>(instr->Condition()))) { | 
| +        FPCompare(sreg(instr->Rn()), sreg(instr->Rm())); | 
| +      } else { | 
| +        nzcv().SetFlags(instr->Nzcv()); | 
| +        LogSystemRegister(NZCV); | 
| +      } | 
| +      break; | 
| case FCCMP_d: { | 
| if (ConditionPassed(static_cast<Condition>(instr->Condition()))) { | 
| -        // If the condition passes, set the status flags to the result of | 
| -        // comparing the operands. | 
| -        unsigned reg_size = (instr->Mask(FP64) == FP64) ? kDRegSizeInBits | 
| -                                                        : kSRegSizeInBits; | 
| -        FPCompare(fpreg(reg_size, instr->Rn()), fpreg(reg_size, instr->Rm())); | 
| +        FPCompare(dreg(instr->Rn()), dreg(instr->Rm())); | 
| } else { | 
| // If the condition fails, set the status flags to the nzcv immediate. | 
| nzcv().SetFlags(instr->Nzcv()); | 
| @@ -2617,481 +2892,149 @@ void Simulator::VisitFPConditionalSelect(Instruction* instr) { | 
| void Simulator::VisitFPDataProcessing1Source(Instruction* instr) { | 
| AssertSupportedFPCR(); | 
|  | 
| +  FPRounding fpcr_rounding = static_cast<FPRounding>(fpcr().RMode()); | 
| +  VectorFormat vform = (instr->Mask(FP64) == FP64) ? kFormatD : kFormatS; | 
| +  SimVRegister& rd = vreg(instr->Rd()); | 
| +  SimVRegister& rn = vreg(instr->Rn()); | 
| +  bool inexact_exception = false; | 
| + | 
| unsigned fd = instr->Rd(); | 
| unsigned fn = instr->Rn(); | 
|  | 
| switch (instr->Mask(FPDataProcessing1SourceMask)) { | 
| -    case FMOV_s: set_sreg(fd, sreg(fn)); break; | 
| -    case FMOV_d: set_dreg(fd, dreg(fn)); break; | 
| -    case FABS_s: set_sreg(fd, std::fabs(sreg(fn))); break; | 
| -    case FABS_d: set_dreg(fd, std::fabs(dreg(fn))); break; | 
| -    case FNEG_s: set_sreg(fd, -sreg(fn)); break; | 
| -    case FNEG_d: set_dreg(fd, -dreg(fn)); break; | 
| -    case FSQRT_s: set_sreg(fd, FPSqrt(sreg(fn))); break; | 
| -    case FSQRT_d: set_dreg(fd, FPSqrt(dreg(fn))); break; | 
| -    case FRINTA_s: set_sreg(fd, FPRoundInt(sreg(fn), FPTieAway)); break; | 
| -    case FRINTA_d: set_dreg(fd, FPRoundInt(dreg(fn), FPTieAway)); break; | 
| +    case FMOV_s: | 
| +      set_sreg(fd, sreg(fn)); | 
| +      return; | 
| +    case FMOV_d: | 
| +      set_dreg(fd, dreg(fn)); | 
| +      return; | 
| +    case FABS_s: | 
| +    case FABS_d: | 
| +      fabs_(vform, vreg(fd), vreg(fn)); | 
| +      // Explicitly log the register update whilst we have type information. | 
| +      LogVRegister(fd, GetPrintRegisterFormatFP(vform)); | 
| +      return; | 
| +    case FNEG_s: | 
| +    case FNEG_d: | 
| +      fneg(vform, vreg(fd), vreg(fn)); | 
| +      // Explicitly log the register update whilst we have type information. | 
| +      LogVRegister(fd, GetPrintRegisterFormatFP(vform)); | 
| +      return; | 
| +    case FCVT_ds: | 
| +      set_dreg(fd, FPToDouble(sreg(fn))); | 
| +      return; | 
| +    case FCVT_sd: | 
| +      set_sreg(fd, FPToFloat(dreg(fn), FPTieEven)); | 
| +      return; | 
| +    case FCVT_hs: | 
| +      set_hreg(fd, FPToFloat16(sreg(fn), FPTieEven)); | 
| +      return; | 
| +    case FCVT_sh: | 
| +      set_sreg(fd, FPToFloat(hreg(fn))); | 
| +      return; | 
| +    case FCVT_dh: | 
| +      set_dreg(fd, FPToDouble(FPToFloat(hreg(fn)))); | 
| +      return; | 
| +    case FCVT_hd: | 
| +      set_hreg(fd, FPToFloat16(dreg(fn), FPTieEven)); | 
| +      return; | 
| +    case FSQRT_s: | 
| +    case FSQRT_d: | 
| +      fsqrt(vform, rd, rn); | 
| +      // Explicitly log the register update whilst we have type information. | 
| +      LogVRegister(fd, GetPrintRegisterFormatFP(vform)); | 
| +      return; | 
| +    case FRINTI_s: | 
| +    case FRINTI_d: | 
| +      break;  // Use FPCR rounding mode. | 
| +    case FRINTX_s: | 
| +    case FRINTX_d: | 
| +      inexact_exception = true; | 
| +      break; | 
| +    case FRINTA_s: | 
| +    case FRINTA_d: | 
| +      fpcr_rounding = FPTieAway; | 
| +      break; | 
| case FRINTM_s: | 
| -        set_sreg(fd, FPRoundInt(sreg(fn), FPNegativeInfinity)); break; | 
| case FRINTM_d: | 
| -        set_dreg(fd, FPRoundInt(dreg(fn), FPNegativeInfinity)); break; | 
| -    case FRINTP_s: | 
| -      set_sreg(fd, FPRoundInt(sreg(fn), FPPositiveInfinity)); | 
| +      fpcr_rounding = FPNegativeInfinity; | 
| break; | 
| +    case FRINTN_s: | 
| +    case FRINTN_d: | 
| +      fpcr_rounding = FPTieEven; | 
| +      break; | 
| +    case FRINTP_s: | 
| case FRINTP_d: | 
| -      set_dreg(fd, FPRoundInt(dreg(fn), FPPositiveInfinity)); | 
| -      break; | 
| -    case FRINTN_s: set_sreg(fd, FPRoundInt(sreg(fn), FPTieEven)); break; | 
| -    case FRINTN_d: set_dreg(fd, FPRoundInt(dreg(fn), FPTieEven)); break; | 
| -    case FRINTZ_s: set_sreg(fd, FPRoundInt(sreg(fn), FPZero)); break; | 
| -    case FRINTZ_d: set_dreg(fd, FPRoundInt(dreg(fn), FPZero)); break; | 
| -    case FCVT_ds: set_dreg(fd, FPToDouble(sreg(fn))); break; | 
| -    case FCVT_sd: set_sreg(fd, FPToFloat(dreg(fn), FPTieEven)); break; | 
| -    default: UNIMPLEMENTED(); | 
| -  } | 
| -} | 
| - | 
| - | 
| -// Assemble the specified IEEE-754 components into the target type and apply | 
| -// appropriate rounding. | 
| -//  sign:     0 = positive, 1 = negative | 
| -//  exponent: Unbiased IEEE-754 exponent. | 
| -//  mantissa: The mantissa of the input. The top bit (which is not encoded for | 
| -//            normal IEEE-754 values) must not be omitted. This bit has the | 
| -//            value 'pow(2, exponent)'. | 
| -// | 
| -// The input value is assumed to be a normalized value. That is, the input may | 
| -// not be infinity or NaN. If the source value is subnormal, it must be | 
| -// normalized before calling this function such that the highest set bit in the | 
| -// mantissa has the value 'pow(2, exponent)'. | 
| -// | 
| -// Callers should use FPRoundToFloat or FPRoundToDouble directly, rather than | 
| -// calling a templated FPRound. | 
| -template <class T, int ebits, int mbits> | 
| -static T FPRound(int64_t sign, int64_t exponent, uint64_t mantissa, | 
| -                 FPRounding round_mode) { | 
| -  DCHECK((sign == 0) || (sign == 1)); | 
| - | 
| -  // Only the FPTieEven rounding mode is implemented. | 
| -  DCHECK(round_mode == FPTieEven); | 
| -  USE(round_mode); | 
| - | 
| -  // Rounding can promote subnormals to normals, and normals to infinities. For | 
| -  // example, a double with exponent 127 (FLT_MAX_EXP) would appear to be | 
| -  // encodable as a float, but rounding based on the low-order mantissa bits | 
| -  // could make it overflow. With ties-to-even rounding, this value would become | 
| -  // an infinity. | 
| - | 
| -  // ---- Rounding Method ---- | 
| -  // | 
| -  // The exponent is irrelevant in the rounding operation, so we treat the | 
| -  // lowest-order bit that will fit into the result ('onebit') as having | 
| -  // the value '1'. Similarly, the highest-order bit that won't fit into | 
| -  // the result ('halfbit') has the value '0.5'. The 'point' sits between | 
| -  // 'onebit' and 'halfbit': | 
| -  // | 
| -  //            These bits fit into the result. | 
| -  //               |---------------------| | 
| -  //  mantissa = 0bxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx | 
| -  //                                     || | 
| -  //                                    / | | 
| -  //                                   /  halfbit | 
| -  //                               onebit | 
| -  // | 
| -  // For subnormal outputs, the range of representable bits is smaller and | 
| -  // the position of onebit and halfbit depends on the exponent of the | 
| -  // input, but the method is otherwise similar. | 
| -  // | 
| -  //   onebit(frac) | 
| -  //     | | 
| -  //     | halfbit(frac)          halfbit(adjusted) | 
| -  //     | /                      / | 
| -  //     | |                      | | 
| -  //  0b00.0 (exact)      -> 0b00.0 (exact)                    -> 0b00 | 
| -  //  0b00.0...           -> 0b00.0...                         -> 0b00 | 
| -  //  0b00.1 (exact)      -> 0b00.0111..111                    -> 0b00 | 
| -  //  0b00.1...           -> 0b00.1...                         -> 0b01 | 
| -  //  0b01.0 (exact)      -> 0b01.0 (exact)                    -> 0b01 | 
| -  //  0b01.0...           -> 0b01.0...                         -> 0b01 | 
| -  //  0b01.1 (exact)      -> 0b01.1 (exact)                    -> 0b10 | 
| -  //  0b01.1...           -> 0b01.1...                         -> 0b10 | 
| -  //  0b10.0 (exact)      -> 0b10.0 (exact)                    -> 0b10 | 
| -  //  0b10.0...           -> 0b10.0...                         -> 0b10 | 
| -  //  0b10.1 (exact)      -> 0b10.0111..111                    -> 0b10 | 
| -  //  0b10.1...           -> 0b10.1...                         -> 0b11 | 
| -  //  0b11.0 (exact)      -> 0b11.0 (exact)                    -> 0b11 | 
| -  //  ...                   /             |                      /   | | 
| -  //                       /              |                     /    | | 
| -  //                                                           /     | | 
| -  // adjusted = frac - (halfbit(mantissa) & ~onebit(frac));   /      | | 
| -  // | 
| -  //                   mantissa = (mantissa >> shift) + halfbit(adjusted); | 
| - | 
| -  static const int mantissa_offset = 0; | 
| -  static const int exponent_offset = mantissa_offset + mbits; | 
| -  static const int sign_offset = exponent_offset + ebits; | 
| -  STATIC_ASSERT(sign_offset == (sizeof(T) * kByteSize - 1)); | 
| - | 
| -  // Bail out early for zero inputs. | 
| -  if (mantissa == 0) { | 
| -    return static_cast<T>(sign << sign_offset); | 
| -  } | 
| - | 
| -  // If all bits in the exponent are set, the value is infinite or NaN. | 
| -  // This is true for all binary IEEE-754 formats. | 
| -  static const int infinite_exponent = (1 << ebits) - 1; | 
| -  static const int max_normal_exponent = infinite_exponent - 1; | 
| - | 
| -  // Apply the exponent bias to encode it for the result. Doing this early makes | 
| -  // it easy to detect values that will be infinite or subnormal. | 
| -  exponent += max_normal_exponent >> 1; | 
| - | 
| -  if (exponent > max_normal_exponent) { | 
| -    // Overflow: The input is too large for the result type to represent. The | 
| -    // FPTieEven rounding mode handles overflows using infinities. | 
| -    exponent = infinite_exponent; | 
| -    mantissa = 0; | 
| -    return static_cast<T>((sign << sign_offset) | | 
| -                          (exponent << exponent_offset) | | 
| -                          (mantissa << mantissa_offset)); | 
| -  } | 
| - | 
| -  // Calculate the shift required to move the top mantissa bit to the proper | 
| -  // place in the destination type. | 
| -  const int highest_significant_bit = 63 - CountLeadingZeros(mantissa, 64); | 
| -  int shift = highest_significant_bit - mbits; | 
| - | 
| -  if (exponent <= 0) { | 
| -    // The output will be subnormal (before rounding). | 
| - | 
| -    // For subnormal outputs, the shift must be adjusted by the exponent. The +1 | 
| -    // is necessary because the exponent of a subnormal value (encoded as 0) is | 
| -    // the same as the exponent of the smallest normal value (encoded as 1). | 
| -    shift += -exponent + 1; | 
| - | 
| -    // Handle inputs that would produce a zero output. | 
| -    // | 
| -    // Shifts higher than highest_significant_bit+1 will always produce a zero | 
| -    // result. A shift of exactly highest_significant_bit+1 might produce a | 
| -    // non-zero result after rounding. | 
| -    if (shift > (highest_significant_bit + 1)) { | 
| -      // The result will always be +/-0.0. | 
| -      return static_cast<T>(sign << sign_offset); | 
| -    } | 
| - | 
| -    // Properly encode the exponent for a subnormal output. | 
| -    exponent = 0; | 
| -  } else { | 
| -    // Clear the topmost mantissa bit, since this is not encoded in IEEE-754 | 
| -    // normal values. | 
| -    mantissa &= ~(1UL << highest_significant_bit); | 
| -  } | 
| - | 
| -  if (shift > 0) { | 
| -    // We have to shift the mantissa to the right. Some precision is lost, so we | 
| -    // need to apply rounding. | 
| -    uint64_t onebit_mantissa = (mantissa >> (shift)) & 1; | 
| -    uint64_t halfbit_mantissa = (mantissa >> (shift-1)) & 1; | 
| -    uint64_t adjusted = mantissa - (halfbit_mantissa & ~onebit_mantissa); | 
| -    T halfbit_adjusted = (adjusted >> (shift-1)) & 1; | 
| - | 
| -    T result = | 
| -        static_cast<T>((sign << sign_offset) | (exponent << exponent_offset) | | 
| -                       ((mantissa >> shift) << mantissa_offset)); | 
| - | 
| -    // A very large mantissa can overflow during rounding. If this happens, the | 
| -    // exponent should be incremented and the mantissa set to 1.0 (encoded as | 
| -    // 0). Applying halfbit_adjusted after assembling the float has the nice | 
| -    // side-effect that this case is handled for free. | 
| -    // | 
| -    // This also handles cases where a very large finite value overflows to | 
| -    // infinity, or where a very large subnormal value overflows to become | 
| -    // normal. | 
| -    return result + halfbit_adjusted; | 
| -  } else { | 
| -    // We have to shift the mantissa to the left (or not at all). The input | 
| -    // mantissa is exactly representable in the output mantissa, so apply no | 
| -    // rounding correction. | 
| -    return static_cast<T>((sign << sign_offset) | | 
| -                          (exponent << exponent_offset) | | 
| -                          ((mantissa << -shift) << mantissa_offset)); | 
| -  } | 
| -} | 
| - | 
| - | 
| -// See FPRound for a description of this function. | 
| -static inline double FPRoundToDouble(int64_t sign, int64_t exponent, | 
| -                                     uint64_t mantissa, FPRounding round_mode) { | 
| -  int64_t bits = | 
| -      FPRound<int64_t, kDoubleExponentBits, kDoubleMantissaBits>(sign, | 
| -                                                                 exponent, | 
| -                                                                 mantissa, | 
| -                                                                 round_mode); | 
| -  return rawbits_to_double(bits); | 
| -} | 
| - | 
| - | 
| -// See FPRound for a description of this function. | 
| -static inline float FPRoundToFloat(int64_t sign, int64_t exponent, | 
| -                                   uint64_t mantissa, FPRounding round_mode) { | 
| -  int32_t bits = | 
| -      FPRound<int32_t, kFloatExponentBits, kFloatMantissaBits>(sign, | 
| -                                                               exponent, | 
| -                                                               mantissa, | 
| -                                                               round_mode); | 
| -  return rawbits_to_float(bits); | 
| -} | 
| - | 
| - | 
| -double Simulator::FixedToDouble(int64_t src, int fbits, FPRounding round) { | 
| -  if (src >= 0) { | 
| -    return UFixedToDouble(src, fbits, round); | 
| -  } else { | 
| -    // This works for all negative values, including INT64_MIN. | 
| -    return -UFixedToDouble(-src, fbits, round); | 
| -  } | 
| -} | 
| - | 
| - | 
| -double Simulator::UFixedToDouble(uint64_t src, int fbits, FPRounding round) { | 
| -  // An input of 0 is a special case because the result is effectively | 
| -  // subnormal: The exponent is encoded as 0 and there is no implicit 1 bit. | 
| -  if (src == 0) { | 
| -    return 0.0; | 
| +      fpcr_rounding = FPPositiveInfinity; | 
| +      break; | 
| +    case FRINTZ_s: | 
| +    case FRINTZ_d: | 
| +      fpcr_rounding = FPZero; | 
| +      break; | 
| +    default: | 
| +      UNIMPLEMENTED(); | 
| } | 
|  | 
| -  // Calculate the exponent. The highest significant bit will have the value | 
| -  // 2^exponent. | 
| -  const int highest_significant_bit = 63 - CountLeadingZeros(src, 64); | 
| -  const int64_t exponent = highest_significant_bit - fbits; | 
| - | 
| -  return FPRoundToDouble(0, exponent, src, round); | 
| -} | 
| - | 
| - | 
| -float Simulator::FixedToFloat(int64_t src, int fbits, FPRounding round) { | 
| -  if (src >= 0) { | 
| -    return UFixedToFloat(src, fbits, round); | 
| -  } else { | 
| -    // This works for all negative values, including INT64_MIN. | 
| -    return -UFixedToFloat(-src, fbits, round); | 
| -  } | 
| +  // Only FRINT* instructions fall through the switch above. | 
| +  frint(vform, rd, rn, fpcr_rounding, inexact_exception); | 
| +  // Explicitly log the register update whilst we have type information | 
| +  LogVRegister(fd, GetPrintRegisterFormatFP(vform)); | 
| } | 
|  | 
| +void Simulator::VisitFPDataProcessing2Source(Instruction* instr) { | 
| +  AssertSupportedFPCR(); | 
|  | 
| -float Simulator::UFixedToFloat(uint64_t src, int fbits, FPRounding round) { | 
| -  // An input of 0 is a special case because the result is effectively | 
| -  // subnormal: The exponent is encoded as 0 and there is no implicit 1 bit. | 
| -  if (src == 0) { | 
| -    return 0.0f; | 
| -  } | 
| - | 
| -  // Calculate the exponent. The highest significant bit will have the value | 
| -  // 2^exponent. | 
| -  const int highest_significant_bit = 63 - CountLeadingZeros(src, 64); | 
| -  const int32_t exponent = highest_significant_bit - fbits; | 
| - | 
| -  return FPRoundToFloat(0, exponent, src, round); | 
| -} | 
| - | 
| +  VectorFormat vform = (instr->Mask(FP64) == FP64) ? kFormatD : kFormatS; | 
| +  SimVRegister& rd = vreg(instr->Rd()); | 
| +  SimVRegister& rn = vreg(instr->Rn()); | 
| +  SimVRegister& rm = vreg(instr->Rm()); | 
|  | 
| -double Simulator::FPRoundInt(double value, FPRounding round_mode) { | 
| -  if ((value == 0.0) || (value == kFP64PositiveInfinity) || | 
| -      (value == kFP64NegativeInfinity)) { | 
| -    return value; | 
| -  } else if (std::isnan(value)) { | 
| -    return FPProcessNaN(value); | 
| -  } | 
| - | 
| -  double int_result = floor(value); | 
| -  double error = value - int_result; | 
| -  switch (round_mode) { | 
| -    case FPTieAway: { | 
| -      // Take care of correctly handling the range ]-0.5, -0.0], which must | 
| -      // yield -0.0. | 
| -      if ((-0.5 < value) && (value < 0.0)) { | 
| -        int_result = -0.0; | 
| - | 
| -      } else if ((error > 0.5) || ((error == 0.5) && (int_result >= 0.0))) { | 
| -        // If the error is greater than 0.5, or is equal to 0.5 and the integer | 
| -        // result is positive, round up. | 
| -        int_result++; | 
| -      } | 
| +  switch (instr->Mask(FPDataProcessing2SourceMask)) { | 
| +    case FADD_s: | 
| +    case FADD_d: | 
| +      fadd(vform, rd, rn, rm); | 
| break; | 
| -    } | 
| -    case FPTieEven: { | 
| -      // Take care of correctly handling the range [-0.5, -0.0], which must | 
| -      // yield -0.0. | 
| -      if ((-0.5 <= value) && (value < 0.0)) { | 
| -        int_result = -0.0; | 
| - | 
| -      // If the error is greater than 0.5, or is equal to 0.5 and the integer | 
| -      // result is odd, round up. | 
| -      } else if ((error > 0.5) || | 
| -                 ((error == 0.5) && (modulo(int_result, 2) != 0))) { | 
| -        int_result++; | 
| -      } | 
| +    case FSUB_s: | 
| +    case FSUB_d: | 
| +      fsub(vform, rd, rn, rm); | 
| break; | 
| -    } | 
| -    case FPZero: { | 
| -      // If value > 0 then we take floor(value) | 
| -      // otherwise, ceil(value) | 
| -      if (value < 0) { | 
| -         int_result = ceil(value); | 
| -      } | 
| +    case FMUL_s: | 
| +    case FMUL_d: | 
| +      fmul(vform, rd, rn, rm); | 
| break; | 
| -    } | 
| -    case FPNegativeInfinity: { | 
| -      // We always use floor(value). | 
| +    case FNMUL_s: | 
| +    case FNMUL_d: | 
| +      fnmul(vform, rd, rn, rm); | 
| break; | 
| -    } | 
| -    case FPPositiveInfinity: { | 
| -      int_result = ceil(value); | 
| +    case FDIV_s: | 
| +    case FDIV_d: | 
| +      fdiv(vform, rd, rn, rm); | 
| break; | 
| -    } | 
| -    default: UNIMPLEMENTED(); | 
| +    case FMAX_s: | 
| +    case FMAX_d: | 
| +      fmax(vform, rd, rn, rm); | 
| +      break; | 
| +    case FMIN_s: | 
| +    case FMIN_d: | 
| +      fmin(vform, rd, rn, rm); | 
| +      break; | 
| +    case FMAXNM_s: | 
| +    case FMAXNM_d: | 
| +      fmaxnm(vform, rd, rn, rm); | 
| +      break; | 
| +    case FMINNM_s: | 
| +    case FMINNM_d: | 
| +      fminnm(vform, rd, rn, rm); | 
| +      break; | 
| +    default: | 
| +      UNREACHABLE(); | 
| } | 
| -  return int_result; | 
| +  // Explicitly log the register update whilst we have type information. | 
| +  LogVRegister(instr->Rd(), GetPrintRegisterFormatFP(vform)); | 
| } | 
|  | 
| - | 
| -double Simulator::FPToDouble(float value) { | 
| -  switch (std::fpclassify(value)) { | 
| -    case FP_NAN: { | 
| -      if (fpcr().DN()) return kFP64DefaultNaN; | 
| - | 
| -      // Convert NaNs as the processor would: | 
| -      //  - The sign is propagated. | 
| -      //  - The payload (mantissa) is transferred entirely, except that the top | 
| -      //    bit is forced to '1', making the result a quiet NaN. The unused | 
| -      //    (low-order) payload bits are set to 0. | 
| -      uint32_t raw = float_to_rawbits(value); | 
| - | 
| -      uint64_t sign = raw >> 31; | 
| -      uint64_t exponent = (1 << 11) - 1; | 
| -      uint64_t payload = unsigned_bitextract_64(21, 0, raw); | 
| -      payload <<= (52 - 23);  // The unused low-order bits should be 0. | 
| -      payload |= (1L << 51);  // Force a quiet NaN. | 
| - | 
| -      return rawbits_to_double((sign << 63) | (exponent << 52) | payload); | 
| -    } | 
| - | 
| -    case FP_ZERO: | 
| -    case FP_NORMAL: | 
| -    case FP_SUBNORMAL: | 
| -    case FP_INFINITE: { | 
| -      // All other inputs are preserved in a standard cast, because every value | 
| -      // representable using an IEEE-754 float is also representable using an | 
| -      // IEEE-754 double. | 
| -      return static_cast<double>(value); | 
| -    } | 
| -  } | 
| - | 
| -  UNREACHABLE(); | 
| -  return static_cast<double>(value); | 
| -} | 
| - | 
| - | 
| -float Simulator::FPToFloat(double value, FPRounding round_mode) { | 
| -  // Only the FPTieEven rounding mode is implemented. | 
| -  DCHECK(round_mode == FPTieEven); | 
| -  USE(round_mode); | 
| - | 
| -  switch (std::fpclassify(value)) { | 
| -    case FP_NAN: { | 
| -      if (fpcr().DN()) return kFP32DefaultNaN; | 
| - | 
| -      // Convert NaNs as the processor would: | 
| -      //  - The sign is propagated. | 
| -      //  - The payload (mantissa) is transferred as much as possible, except | 
| -      //    that the top bit is forced to '1', making the result a quiet NaN. | 
| -      uint64_t raw = double_to_rawbits(value); | 
| - | 
| -      uint32_t sign = raw >> 63; | 
| -      uint32_t exponent = (1 << 8) - 1; | 
| -      uint32_t payload = | 
| -          static_cast<uint32_t>(unsigned_bitextract_64(50, 52 - 23, raw)); | 
| -      payload |= (1 << 22);   // Force a quiet NaN. | 
| - | 
| -      return rawbits_to_float((sign << 31) | (exponent << 23) | payload); | 
| -    } | 
| - | 
| -    case FP_ZERO: | 
| -    case FP_INFINITE: { | 
| -      // In a C++ cast, any value representable in the target type will be | 
| -      // unchanged. This is always the case for +/-0.0 and infinities. | 
| -      return static_cast<float>(value); | 
| -    } | 
| - | 
| -    case FP_NORMAL: | 
| -    case FP_SUBNORMAL: { | 
| -      // Convert double-to-float as the processor would, assuming that FPCR.FZ | 
| -      // (flush-to-zero) is not set. | 
| -      uint64_t raw = double_to_rawbits(value); | 
| -      // Extract the IEEE-754 double components. | 
| -      uint32_t sign = raw >> 63; | 
| -      // Extract the exponent and remove the IEEE-754 encoding bias. | 
| -      int32_t exponent = | 
| -          static_cast<int32_t>(unsigned_bitextract_64(62, 52, raw)) - 1023; | 
| -      // Extract the mantissa and add the implicit '1' bit. | 
| -      uint64_t mantissa = unsigned_bitextract_64(51, 0, raw); | 
| -      if (std::fpclassify(value) == FP_NORMAL) { | 
| -        mantissa |= (1UL << 52); | 
| -      } | 
| -      return FPRoundToFloat(sign, exponent, mantissa, round_mode); | 
| -    } | 
| -  } | 
| - | 
| -  UNREACHABLE(); | 
| -  return value; | 
| -} | 
| - | 
| - | 
| -void Simulator::VisitFPDataProcessing2Source(Instruction* instr) { | 
| -  AssertSupportedFPCR(); | 
| - | 
| -  unsigned fd = instr->Rd(); | 
| -  unsigned fn = instr->Rn(); | 
| -  unsigned fm = instr->Rm(); | 
| - | 
| -  // Fmaxnm and Fminnm have special NaN handling. | 
| -  switch (instr->Mask(FPDataProcessing2SourceMask)) { | 
| -    case FMAXNM_s: set_sreg(fd, FPMaxNM(sreg(fn), sreg(fm))); return; | 
| -    case FMAXNM_d: set_dreg(fd, FPMaxNM(dreg(fn), dreg(fm))); return; | 
| -    case FMINNM_s: set_sreg(fd, FPMinNM(sreg(fn), sreg(fm))); return; | 
| -    case FMINNM_d: set_dreg(fd, FPMinNM(dreg(fn), dreg(fm))); return; | 
| -    default: | 
| -      break;    // Fall through. | 
| -  } | 
| - | 
| -  if (FPProcessNaNs(instr)) return; | 
| - | 
| -  switch (instr->Mask(FPDataProcessing2SourceMask)) { | 
| -    case FADD_s: set_sreg(fd, FPAdd(sreg(fn), sreg(fm))); break; | 
| -    case FADD_d: set_dreg(fd, FPAdd(dreg(fn), dreg(fm))); break; | 
| -    case FSUB_s: set_sreg(fd, FPSub(sreg(fn), sreg(fm))); break; | 
| -    case FSUB_d: set_dreg(fd, FPSub(dreg(fn), dreg(fm))); break; | 
| -    case FMUL_s: set_sreg(fd, FPMul(sreg(fn), sreg(fm))); break; | 
| -    case FMUL_d: set_dreg(fd, FPMul(dreg(fn), dreg(fm))); break; | 
| -    case FDIV_s: set_sreg(fd, FPDiv(sreg(fn), sreg(fm))); break; | 
| -    case FDIV_d: set_dreg(fd, FPDiv(dreg(fn), dreg(fm))); break; | 
| -    case FMAX_s: set_sreg(fd, FPMax(sreg(fn), sreg(fm))); break; | 
| -    case FMAX_d: set_dreg(fd, FPMax(dreg(fn), dreg(fm))); break; | 
| -    case FMIN_s: set_sreg(fd, FPMin(sreg(fn), sreg(fm))); break; | 
| -    case FMIN_d: set_dreg(fd, FPMin(dreg(fn), dreg(fm))); break; | 
| -    case FMAXNM_s: | 
| -    case FMAXNM_d: | 
| -    case FMINNM_s: | 
| -    case FMINNM_d: | 
| -      // These were handled before the standard FPProcessNaNs() stage. | 
| -      UNREACHABLE(); | 
| -    default: UNIMPLEMENTED(); | 
| -  } | 
| -} | 
| - | 
| - | 
| -void Simulator::VisitFPDataProcessing3Source(Instruction* instr) { | 
| -  AssertSupportedFPCR(); | 
| +void Simulator::VisitFPDataProcessing3Source(Instruction* instr) { | 
| +  AssertSupportedFPCR(); | 
|  | 
| unsigned fd = instr->Rd(); | 
| unsigned fn = instr->Rn(); | 
| @@ -3100,10 +3043,18 @@ void Simulator::VisitFPDataProcessing3Source(Instruction* instr) { | 
|  | 
| switch (instr->Mask(FPDataProcessing3SourceMask)) { | 
| // fd = fa +/- (fn * fm) | 
| -    case FMADD_s: set_sreg(fd, FPMulAdd(sreg(fa), sreg(fn), sreg(fm))); break; | 
| -    case FMSUB_s: set_sreg(fd, FPMulAdd(sreg(fa), -sreg(fn), sreg(fm))); break; | 
| -    case FMADD_d: set_dreg(fd, FPMulAdd(dreg(fa), dreg(fn), dreg(fm))); break; | 
| -    case FMSUB_d: set_dreg(fd, FPMulAdd(dreg(fa), -dreg(fn), dreg(fm))); break; | 
| +    case FMADD_s: | 
| +      set_sreg(fd, FPMulAdd(sreg(fa), sreg(fn), sreg(fm))); | 
| +      break; | 
| +    case FMSUB_s: | 
| +      set_sreg(fd, FPMulAdd(sreg(fa), -sreg(fn), sreg(fm))); | 
| +      break; | 
| +    case FMADD_d: | 
| +      set_dreg(fd, FPMulAdd(dreg(fa), dreg(fn), dreg(fm))); | 
| +      break; | 
| +    case FMSUB_d: | 
| +      set_dreg(fd, FPMulAdd(dreg(fa), -dreg(fn), dreg(fm))); | 
| +      break; | 
| // Negated variants of the above. | 
| case FNMADD_s: | 
| set_sreg(fd, FPMulAdd(-sreg(fa), -sreg(fn), sreg(fm))); | 
| @@ -3117,232 +3068,11 @@ void Simulator::VisitFPDataProcessing3Source(Instruction* instr) { | 
| case FNMSUB_d: | 
| set_dreg(fd, FPMulAdd(-dreg(fa), dreg(fn), dreg(fm))); | 
| break; | 
| -    default: UNIMPLEMENTED(); | 
| -  } | 
| -} | 
| - | 
| - | 
| -template <typename T> | 
| -T Simulator::FPAdd(T op1, T op2) { | 
| -  // NaNs should be handled elsewhere. | 
| -  DCHECK(!std::isnan(op1) && !std::isnan(op2)); | 
| - | 
| -  if (std::isinf(op1) && std::isinf(op2) && (op1 != op2)) { | 
| -    // inf + -inf returns the default NaN. | 
| -    return FPDefaultNaN<T>(); | 
| -  } else { | 
| -    // Other cases should be handled by standard arithmetic. | 
| -    return op1 + op2; | 
| -  } | 
| -} | 
| - | 
| - | 
| -template <typename T> | 
| -T Simulator::FPDiv(T op1, T op2) { | 
| -  // NaNs should be handled elsewhere. | 
| -  DCHECK(!std::isnan(op1) && !std::isnan(op2)); | 
| - | 
| -  if ((std::isinf(op1) && std::isinf(op2)) || ((op1 == 0.0) && (op2 == 0.0))) { | 
| -    // inf / inf and 0.0 / 0.0 return the default NaN. | 
| -    return FPDefaultNaN<T>(); | 
| -  } else { | 
| -    // Other cases should be handled by standard arithmetic. | 
| -    return op1 / op2; | 
| -  } | 
| -} | 
| - | 
| - | 
| -template <typename T> | 
| -T Simulator::FPMax(T a, T b) { | 
| -  // NaNs should be handled elsewhere. | 
| -  DCHECK(!std::isnan(a) && !std::isnan(b)); | 
| - | 
| -  if ((a == 0.0) && (b == 0.0) && | 
| -      (copysign(1.0, a) != copysign(1.0, b))) { | 
| -    // a and b are zero, and the sign differs: return +0.0. | 
| -    return 0.0; | 
| -  } else { | 
| -    return (a > b) ? a : b; | 
| -  } | 
| -} | 
| - | 
| - | 
| -template <typename T> | 
| -T Simulator::FPMaxNM(T a, T b) { | 
| -  if (IsQuietNaN(a) && !IsQuietNaN(b)) { | 
| -    a = kFP64NegativeInfinity; | 
| -  } else if (!IsQuietNaN(a) && IsQuietNaN(b)) { | 
| -    b = kFP64NegativeInfinity; | 
| -  } | 
| - | 
| -  T result = FPProcessNaNs(a, b); | 
| -  return std::isnan(result) ? result : FPMax(a, b); | 
| -} | 
| - | 
| -template <typename T> | 
| -T Simulator::FPMin(T a, T b) { | 
| -  // NaNs should be handled elsewhere. | 
| -  DCHECK(!std::isnan(a) && !std::isnan(b)); | 
| - | 
| -  if ((a == 0.0) && (b == 0.0) && | 
| -      (copysign(1.0, a) != copysign(1.0, b))) { | 
| -    // a and b are zero, and the sign differs: return -0.0. | 
| -    return -0.0; | 
| -  } else { | 
| -    return (a < b) ? a : b; | 
| -  } | 
| -} | 
| - | 
| - | 
| -template <typename T> | 
| -T Simulator::FPMinNM(T a, T b) { | 
| -  if (IsQuietNaN(a) && !IsQuietNaN(b)) { | 
| -    a = kFP64PositiveInfinity; | 
| -  } else if (!IsQuietNaN(a) && IsQuietNaN(b)) { | 
| -    b = kFP64PositiveInfinity; | 
| -  } | 
| - | 
| -  T result = FPProcessNaNs(a, b); | 
| -  return std::isnan(result) ? result : FPMin(a, b); | 
| -} | 
| - | 
| - | 
| -template <typename T> | 
| -T Simulator::FPMul(T op1, T op2) { | 
| -  // NaNs should be handled elsewhere. | 
| -  DCHECK(!std::isnan(op1) && !std::isnan(op2)); | 
| - | 
| -  if ((std::isinf(op1) && (op2 == 0.0)) || (std::isinf(op2) && (op1 == 0.0))) { | 
| -    // inf * 0.0 returns the default NaN. | 
| -    return FPDefaultNaN<T>(); | 
| -  } else { | 
| -    // Other cases should be handled by standard arithmetic. | 
| -    return op1 * op2; | 
| -  } | 
| -} | 
| - | 
| - | 
| -template<typename T> | 
| -T Simulator::FPMulAdd(T a, T op1, T op2) { | 
| -  T result = FPProcessNaNs3(a, op1, op2); | 
| - | 
| -  T sign_a = copysign(1.0, a); | 
| -  T sign_prod = copysign(1.0, op1) * copysign(1.0, op2); | 
| -  bool isinf_prod = std::isinf(op1) || std::isinf(op2); | 
| -  bool operation_generates_nan = | 
| -      (std::isinf(op1) && (op2 == 0.0)) ||                      // inf * 0.0 | 
| -      (std::isinf(op2) && (op1 == 0.0)) ||                      // 0.0 * inf | 
| -      (std::isinf(a) && isinf_prod && (sign_a != sign_prod));   // inf - inf | 
| - | 
| -  if (std::isnan(result)) { | 
| -    // Generated NaNs override quiet NaNs propagated from a. | 
| -    if (operation_generates_nan && IsQuietNaN(a)) { | 
| -      return FPDefaultNaN<T>(); | 
| -    } else { | 
| -      return result; | 
| -    } | 
| -  } | 
| - | 
| -  // If the operation would produce a NaN, return the default NaN. | 
| -  if (operation_generates_nan) { | 
| -    return FPDefaultNaN<T>(); | 
| -  } | 
| - | 
| -  // Work around broken fma implementations for exact zero results: The sign of | 
| -  // exact 0.0 results is positive unless both a and op1 * op2 are negative. | 
| -  if (((op1 == 0.0) || (op2 == 0.0)) && (a == 0.0)) { | 
| -    return ((sign_a < 0) && (sign_prod < 0)) ? -0.0 : 0.0; | 
| -  } | 
| - | 
| -  result = FusedMultiplyAdd(op1, op2, a); | 
| -  DCHECK(!std::isnan(result)); | 
| - | 
| -  // Work around broken fma implementations for rounded zero results: If a is | 
| -  // 0.0, the sign of the result is the sign of op1 * op2 before rounding. | 
| -  if ((a == 0.0) && (result == 0.0)) { | 
| -    return copysign(0.0, sign_prod); | 
| -  } | 
| - | 
| -  return result; | 
| -} | 
| - | 
| - | 
| -template <typename T> | 
| -T Simulator::FPSqrt(T op) { | 
| -  if (std::isnan(op)) { | 
| -    return FPProcessNaN(op); | 
| -  } else if (op < 0.0) { | 
| -    return FPDefaultNaN<T>(); | 
| -  } else { | 
| -    lazily_initialize_fast_sqrt(isolate_); | 
| -    return fast_sqrt(op, isolate_); | 
| -  } | 
| -} | 
| - | 
| - | 
| -template <typename T> | 
| -T Simulator::FPSub(T op1, T op2) { | 
| -  // NaNs should be handled elsewhere. | 
| -  DCHECK(!std::isnan(op1) && !std::isnan(op2)); | 
| - | 
| -  if (std::isinf(op1) && std::isinf(op2) && (op1 == op2)) { | 
| -    // inf - inf returns the default NaN. | 
| -    return FPDefaultNaN<T>(); | 
| -  } else { | 
| -    // Other cases should be handled by standard arithmetic. | 
| -    return op1 - op2; | 
| -  } | 
| -} | 
| - | 
| - | 
| -template <typename T> | 
| -T Simulator::FPProcessNaN(T op) { | 
| -  DCHECK(std::isnan(op)); | 
| -  return fpcr().DN() ? FPDefaultNaN<T>() : ToQuietNaN(op); | 
| -} | 
| - | 
| - | 
| -template <typename T> | 
| -T Simulator::FPProcessNaNs(T op1, T op2) { | 
| -  if (IsSignallingNaN(op1)) { | 
| -    return FPProcessNaN(op1); | 
| -  } else if (IsSignallingNaN(op2)) { | 
| -    return FPProcessNaN(op2); | 
| -  } else if (std::isnan(op1)) { | 
| -    DCHECK(IsQuietNaN(op1)); | 
| -    return FPProcessNaN(op1); | 
| -  } else if (std::isnan(op2)) { | 
| -    DCHECK(IsQuietNaN(op2)); | 
| -    return FPProcessNaN(op2); | 
| -  } else { | 
| -    return 0.0; | 
| -  } | 
| -} | 
| - | 
| - | 
| -template <typename T> | 
| -T Simulator::FPProcessNaNs3(T op1, T op2, T op3) { | 
| -  if (IsSignallingNaN(op1)) { | 
| -    return FPProcessNaN(op1); | 
| -  } else if (IsSignallingNaN(op2)) { | 
| -    return FPProcessNaN(op2); | 
| -  } else if (IsSignallingNaN(op3)) { | 
| -    return FPProcessNaN(op3); | 
| -  } else if (std::isnan(op1)) { | 
| -    DCHECK(IsQuietNaN(op1)); | 
| -    return FPProcessNaN(op1); | 
| -  } else if (std::isnan(op2)) { | 
| -    DCHECK(IsQuietNaN(op2)); | 
| -    return FPProcessNaN(op2); | 
| -  } else if (std::isnan(op3)) { | 
| -    DCHECK(IsQuietNaN(op3)); | 
| -    return FPProcessNaN(op3); | 
| -  } else { | 
| -    return 0.0; | 
| +    default: | 
| +      UNIMPLEMENTED(); | 
| } | 
| } | 
|  | 
| - | 
| bool Simulator::FPProcessNaNs(Instruction* instr) { | 
| unsigned fd = instr->Rd(); | 
| unsigned fn = instr->Rn(); | 
| @@ -3452,31 +3182,24 @@ bool Simulator::PrintValue(const char* desc) { | 
| } | 
|  | 
| int i = CodeFromName(desc); | 
| -  STATIC_ASSERT(kNumberOfRegisters == kNumberOfFPRegisters); | 
| -  if (i < 0 || static_cast<unsigned>(i) >= kNumberOfFPRegisters) return false; | 
| +  static_assert(kNumberOfRegisters == kNumberOfVRegisters, | 
| +                "Must be same number of Registers as VRegisters."); | 
| +  if (i < 0 || static_cast<unsigned>(i) >= kNumberOfVRegisters) return false; | 
|  | 
| if (desc[0] == 'v') { | 
| PrintF(stream_, "%s %s:%s 0x%016" PRIx64 "%s (%s%s:%s %g%s %s:%s %g%s)\n", | 
| -        clr_fpreg_name, VRegNameForCode(i), | 
| -        clr_fpreg_value, double_to_rawbits(dreg(i)), | 
| -        clr_normal, | 
| -        clr_fpreg_name, DRegNameForCode(i), | 
| -        clr_fpreg_value, dreg(i), | 
| -        clr_fpreg_name, SRegNameForCode(i), | 
| -        clr_fpreg_value, sreg(i), | 
| -        clr_normal); | 
| +           clr_vreg_name, VRegNameForCode(i), clr_vreg_value, | 
| +           bit_cast<uint64_t>(dreg(i)), clr_normal, clr_vreg_name, | 
| +           DRegNameForCode(i), clr_vreg_value, dreg(i), clr_vreg_name, | 
| +           SRegNameForCode(i), clr_vreg_value, sreg(i), clr_normal); | 
| return true; | 
| } else if (desc[0] == 'd') { | 
| -    PrintF(stream_, "%s %s:%s %g%s\n", | 
| -        clr_fpreg_name, DRegNameForCode(i), | 
| -        clr_fpreg_value, dreg(i), | 
| -        clr_normal); | 
| +    PrintF(stream_, "%s %s:%s %g%s\n", clr_vreg_name, DRegNameForCode(i), | 
| +           clr_vreg_value, dreg(i), clr_normal); | 
| return true; | 
| } else if (desc[0] == 's') { | 
| -    PrintF(stream_, "%s %s:%s %g%s\n", | 
| -        clr_fpreg_name, SRegNameForCode(i), | 
| -        clr_fpreg_value, sreg(i), | 
| -        clr_normal); | 
| +    PrintF(stream_, "%s %s:%s %g%s\n", clr_vreg_name, SRegNameForCode(i), | 
| +           clr_vreg_value, sreg(i), clr_normal); | 
| return true; | 
| } else if (desc[0] == 'w') { | 
| PrintF(stream_, "%s %s:%s 0x%08" PRIx32 "%s\n", | 
| @@ -3602,7 +3325,7 @@ void Simulator::Debug() { | 
| if (argc == 2) { | 
| if (strcmp(arg1, "all") == 0) { | 
| PrintRegisters(); | 
| -            PrintFPRegisters(); | 
| +            PrintVRegisters(); | 
| } else { | 
| if (!PrintValue(arg1)) { | 
| PrintF("%s unrecognized\n", arg1); | 
| @@ -3828,7 +3551,9 @@ void Simulator::VisitException(Instruction* instr) { | 
| set_log_parameters(log_parameters() | parameters); | 
| if (parameters & LOG_SYS_REGS) { PrintSystemRegisters(); } | 
| if (parameters & LOG_REGS) { PrintRegisters(); } | 
| -            if (parameters & LOG_FP_REGS) { PrintFPRegisters(); } | 
| +            if (parameters & LOG_VREGS) { | 
| +              PrintVRegisters(); | 
| +            } | 
| break; | 
| case TRACE_DISABLE: | 
| set_log_parameters(log_parameters() & ~parameters); | 
| @@ -3844,7 +3569,7 @@ void Simulator::VisitException(Instruction* instr) { | 
| // Print the requested information. | 
| if (parameters & LOG_SYS_REGS) PrintSystemRegisters(); | 
| if (parameters & LOG_REGS) PrintRegisters(); | 
| -            if (parameters & LOG_FP_REGS) PrintFPRegisters(); | 
| +            if (parameters & LOG_VREGS) PrintVRegisters(); | 
| } | 
|  | 
| // The stop parameters are inlined in the code. Skip them: | 
| @@ -3881,6 +3606,2104 @@ void Simulator::VisitException(Instruction* instr) { | 
| } | 
| } | 
|  | 
| +void Simulator::VisitNEON2RegMisc(Instruction* instr) { | 
| +  NEONFormatDecoder nfd(instr); | 
| +  VectorFormat vf = nfd.GetVectorFormat(); | 
| + | 
| +  // Format mapping for "long pair" instructions, [su]addlp, [su]adalp. | 
| +  static const NEONFormatMap map_lp = { | 
| +      {23, 22, 30}, {NF_4H, NF_8H, NF_2S, NF_4S, NF_1D, NF_2D}}; | 
| +  VectorFormat vf_lp = nfd.GetVectorFormat(&map_lp); | 
| + | 
| +  static const NEONFormatMap map_fcvtl = {{22}, {NF_4S, NF_2D}}; | 
| +  VectorFormat vf_fcvtl = nfd.GetVectorFormat(&map_fcvtl); | 
| + | 
| +  static const NEONFormatMap map_fcvtn = {{22, 30}, | 
| +                                          {NF_4H, NF_8H, NF_2S, NF_4S}}; | 
| +  VectorFormat vf_fcvtn = nfd.GetVectorFormat(&map_fcvtn); | 
| + | 
| +  SimVRegister& rd = vreg(instr->Rd()); | 
| +  SimVRegister& rn = vreg(instr->Rn()); | 
| + | 
| +  if (instr->Mask(NEON2RegMiscOpcode) <= NEON_NEG_opcode) { | 
| +    // These instructions all use a two bit size field, except NOT and RBIT, | 
| +    // which use the field to encode the operation. | 
| +    switch (instr->Mask(NEON2RegMiscMask)) { | 
| +      case NEON_REV64: | 
| +        rev64(vf, rd, rn); | 
| +        break; | 
| +      case NEON_REV32: | 
| +        rev32(vf, rd, rn); | 
| +        break; | 
| +      case NEON_REV16: | 
| +        rev16(vf, rd, rn); | 
| +        break; | 
| +      case NEON_SUQADD: | 
| +        suqadd(vf, rd, rn); | 
| +        break; | 
| +      case NEON_USQADD: | 
| +        usqadd(vf, rd, rn); | 
| +        break; | 
| +      case NEON_CLS: | 
| +        cls(vf, rd, rn); | 
| +        break; | 
| +      case NEON_CLZ: | 
| +        clz(vf, rd, rn); | 
| +        break; | 
| +      case NEON_CNT: | 
| +        cnt(vf, rd, rn); | 
| +        break; | 
| +      case NEON_SQABS: | 
| +        abs(vf, rd, rn).SignedSaturate(vf); | 
| +        break; | 
| +      case NEON_SQNEG: | 
| +        neg(vf, rd, rn).SignedSaturate(vf); | 
| +        break; | 
| +      case NEON_CMGT_zero: | 
| +        cmp(vf, rd, rn, 0, gt); | 
| +        break; | 
| +      case NEON_CMGE_zero: | 
| +        cmp(vf, rd, rn, 0, ge); | 
| +        break; | 
| +      case NEON_CMEQ_zero: | 
| +        cmp(vf, rd, rn, 0, eq); | 
| +        break; | 
| +      case NEON_CMLE_zero: | 
| +        cmp(vf, rd, rn, 0, le); | 
| +        break; | 
| +      case NEON_CMLT_zero: | 
| +        cmp(vf, rd, rn, 0, lt); | 
| +        break; | 
| +      case NEON_ABS: | 
| +        abs(vf, rd, rn); | 
| +        break; | 
| +      case NEON_NEG: | 
| +        neg(vf, rd, rn); | 
| +        break; | 
| +      case NEON_SADDLP: | 
| +        saddlp(vf_lp, rd, rn); | 
| +        break; | 
| +      case NEON_UADDLP: | 
| +        uaddlp(vf_lp, rd, rn); | 
| +        break; | 
| +      case NEON_SADALP: | 
| +        sadalp(vf_lp, rd, rn); | 
| +        break; | 
| +      case NEON_UADALP: | 
| +        uadalp(vf_lp, rd, rn); | 
| +        break; | 
| +      case NEON_RBIT_NOT: | 
| +        vf = nfd.GetVectorFormat(nfd.LogicalFormatMap()); | 
| +        switch (instr->FPType()) { | 
| +          case 0: | 
| +            not_(vf, rd, rn); | 
| +            break; | 
| +          case 1: | 
| +            rbit(vf, rd, rn); | 
| +            break; | 
| +          default: | 
| +            UNIMPLEMENTED(); | 
| +        } | 
| +        break; | 
| +    } | 
| +  } else { | 
| +    VectorFormat fpf = nfd.GetVectorFormat(nfd.FPFormatMap()); | 
| +    FPRounding fpcr_rounding = static_cast<FPRounding>(fpcr().RMode()); | 
| +    bool inexact_exception = false; | 
| + | 
| +    // These instructions all use a one bit size field, except XTN, SQXTUN, | 
| +    // SHLL, SQXTN and UQXTN, which use a two bit size field. | 
| +    switch (instr->Mask(NEON2RegMiscFPMask)) { | 
| +      case NEON_FABS: | 
| +        fabs_(fpf, rd, rn); | 
| +        return; | 
| +      case NEON_FNEG: | 
| +        fneg(fpf, rd, rn); | 
| +        return; | 
| +      case NEON_FSQRT: | 
| +        fsqrt(fpf, rd, rn); | 
| +        return; | 
| +      case NEON_FCVTL: | 
| +        if (instr->Mask(NEON_Q)) { | 
| +          fcvtl2(vf_fcvtl, rd, rn); | 
| +        } else { | 
| +          fcvtl(vf_fcvtl, rd, rn); | 
| +        } | 
| +        return; | 
| +      case NEON_FCVTN: | 
| +        if (instr->Mask(NEON_Q)) { | 
| +          fcvtn2(vf_fcvtn, rd, rn); | 
| +        } else { | 
| +          fcvtn(vf_fcvtn, rd, rn); | 
| +        } | 
| +        return; | 
| +      case NEON_FCVTXN: | 
| +        if (instr->Mask(NEON_Q)) { | 
| +          fcvtxn2(vf_fcvtn, rd, rn); | 
| +        } else { | 
| +          fcvtxn(vf_fcvtn, rd, rn); | 
| +        } | 
| +        return; | 
| + | 
| +      // The following instructions break from the switch statement, rather | 
| +      // than return. | 
| +      case NEON_FRINTI: | 
| +        break;  // Use FPCR rounding mode. | 
| +      case NEON_FRINTX: | 
| +        inexact_exception = true; | 
| +        break; | 
| +      case NEON_FRINTA: | 
| +        fpcr_rounding = FPTieAway; | 
| +        break; | 
| +      case NEON_FRINTM: | 
| +        fpcr_rounding = FPNegativeInfinity; | 
| +        break; | 
| +      case NEON_FRINTN: | 
| +        fpcr_rounding = FPTieEven; | 
| +        break; | 
| +      case NEON_FRINTP: | 
| +        fpcr_rounding = FPPositiveInfinity; | 
| +        break; | 
| +      case NEON_FRINTZ: | 
| +        fpcr_rounding = FPZero; | 
| +        break; | 
| + | 
| +      // The remaining cases return to the caller. | 
| +      case NEON_FCVTNS: | 
| +        fcvts(fpf, rd, rn, FPTieEven); | 
| +        return; | 
| +      case NEON_FCVTNU: | 
| +        fcvtu(fpf, rd, rn, FPTieEven); | 
| +        return; | 
| +      case NEON_FCVTPS: | 
| +        fcvts(fpf, rd, rn, FPPositiveInfinity); | 
| +        return; | 
| +      case NEON_FCVTPU: | 
| +        fcvtu(fpf, rd, rn, FPPositiveInfinity); | 
| +        return; | 
| +      case NEON_FCVTMS: | 
| +        fcvts(fpf, rd, rn, FPNegativeInfinity); | 
| +        return; | 
| +      case NEON_FCVTMU: | 
| +        fcvtu(fpf, rd, rn, FPNegativeInfinity); | 
| +        return; | 
| +      case NEON_FCVTZS: | 
| +        fcvts(fpf, rd, rn, FPZero); | 
| +        return; | 
| +      case NEON_FCVTZU: | 
| +        fcvtu(fpf, rd, rn, FPZero); | 
| +        return; | 
| +      case NEON_FCVTAS: | 
| +        fcvts(fpf, rd, rn, FPTieAway); | 
| +        return; | 
| +      case NEON_FCVTAU: | 
| +        fcvtu(fpf, rd, rn, FPTieAway); | 
| +        return; | 
| +      case NEON_SCVTF: | 
| +        scvtf(fpf, rd, rn, 0, fpcr_rounding); | 
| +        return; | 
| +      case NEON_UCVTF: | 
| +        ucvtf(fpf, rd, rn, 0, fpcr_rounding); | 
| +        return; | 
| +      case NEON_URSQRTE: | 
| +        ursqrte(fpf, rd, rn); | 
| +        return; | 
| +      case NEON_URECPE: | 
| +        urecpe(fpf, rd, rn); | 
| +        return; | 
| +      case NEON_FRSQRTE: | 
| +        frsqrte(fpf, rd, rn); | 
| +        return; | 
| +      case NEON_FRECPE: | 
| +        frecpe(fpf, rd, rn, fpcr_rounding); | 
| +        return; | 
| +      case NEON_FCMGT_zero: | 
| +        fcmp_zero(fpf, rd, rn, gt); | 
| +        return; | 
| +      case NEON_FCMGE_zero: | 
| +        fcmp_zero(fpf, rd, rn, ge); | 
| +        return; | 
| +      case NEON_FCMEQ_zero: | 
| +        fcmp_zero(fpf, rd, rn, eq); | 
| +        return; | 
| +      case NEON_FCMLE_zero: | 
| +        fcmp_zero(fpf, rd, rn, le); | 
| +        return; | 
| +      case NEON_FCMLT_zero: | 
| +        fcmp_zero(fpf, rd, rn, lt); | 
| +        return; | 
| +      default: | 
| +        if ((NEON_XTN_opcode <= instr->Mask(NEON2RegMiscOpcode)) && | 
| +            (instr->Mask(NEON2RegMiscOpcode) <= NEON_UQXTN_opcode)) { | 
| +          switch (instr->Mask(NEON2RegMiscMask)) { | 
| +            case NEON_XTN: | 
| +              xtn(vf, rd, rn); | 
| +              return; | 
| +            case NEON_SQXTN: | 
| +              sqxtn(vf, rd, rn); | 
| +              return; | 
| +            case NEON_UQXTN: | 
| +              uqxtn(vf, rd, rn); | 
| +              return; | 
| +            case NEON_SQXTUN: | 
| +              sqxtun(vf, rd, rn); | 
| +              return; | 
| +            case NEON_SHLL: | 
| +              vf = nfd.GetVectorFormat(nfd.LongIntegerFormatMap()); | 
| +              if (instr->Mask(NEON_Q)) { | 
| +                shll2(vf, rd, rn); | 
| +              } else { | 
| +                shll(vf, rd, rn); | 
| +              } | 
| +              return; | 
| +            default: | 
| +              UNIMPLEMENTED(); | 
| +          } | 
| +        } else { | 
| +          UNIMPLEMENTED(); | 
| +        } | 
| +    } | 
| + | 
| +    // Only FRINT* instructions fall through the switch above. | 
| +    frint(fpf, rd, rn, fpcr_rounding, inexact_exception); | 
| +  } | 
| +} | 
| + | 
| +void Simulator::VisitNEON3Same(Instruction* instr) { | 
| +  NEONFormatDecoder nfd(instr); | 
| +  SimVRegister& rd = vreg(instr->Rd()); | 
| +  SimVRegister& rn = vreg(instr->Rn()); | 
| +  SimVRegister& rm = vreg(instr->Rm()); | 
| + | 
| +  if (instr->Mask(NEON3SameLogicalFMask) == NEON3SameLogicalFixed) { | 
| +    VectorFormat vf = nfd.GetVectorFormat(nfd.LogicalFormatMap()); | 
| +    switch (instr->Mask(NEON3SameLogicalMask)) { | 
| +      case NEON_AND: | 
| +        and_(vf, rd, rn, rm); | 
| +        break; | 
| +      case NEON_ORR: | 
| +        orr(vf, rd, rn, rm); | 
| +        break; | 
| +      case NEON_ORN: | 
| +        orn(vf, rd, rn, rm); | 
| +        break; | 
| +      case NEON_EOR: | 
| +        eor(vf, rd, rn, rm); | 
| +        break; | 
| +      case NEON_BIC: | 
| +        bic(vf, rd, rn, rm); | 
| +        break; | 
| +      case NEON_BIF: | 
| +        bif(vf, rd, rn, rm); | 
| +        break; | 
| +      case NEON_BIT: | 
| +        bit(vf, rd, rn, rm); | 
| +        break; | 
| +      case NEON_BSL: | 
| +        bsl(vf, rd, rn, rm); | 
| +        break; | 
| +      default: | 
| +        UNIMPLEMENTED(); | 
| +    } | 
| +  } else if (instr->Mask(NEON3SameFPFMask) == NEON3SameFPFixed) { | 
| +    VectorFormat vf = nfd.GetVectorFormat(nfd.FPFormatMap()); | 
| +    switch (instr->Mask(NEON3SameFPMask)) { | 
| +      case NEON_FADD: | 
| +        fadd(vf, rd, rn, rm); | 
| +        break; | 
| +      case NEON_FSUB: | 
| +        fsub(vf, rd, rn, rm); | 
| +        break; | 
| +      case NEON_FMUL: | 
| +        fmul(vf, rd, rn, rm); | 
| +        break; | 
| +      case NEON_FDIV: | 
| +        fdiv(vf, rd, rn, rm); | 
| +        break; | 
| +      case NEON_FMAX: | 
| +        fmax(vf, rd, rn, rm); | 
| +        break; | 
| +      case NEON_FMIN: | 
| +        fmin(vf, rd, rn, rm); | 
| +        break; | 
| +      case NEON_FMAXNM: | 
| +        fmaxnm(vf, rd, rn, rm); | 
| +        break; | 
| +      case NEON_FMINNM: | 
| +        fminnm(vf, rd, rn, rm); | 
| +        break; | 
| +      case NEON_FMLA: | 
| +        fmla(vf, rd, rn, rm); | 
| +        break; | 
| +      case NEON_FMLS: | 
| +        fmls(vf, rd, rn, rm); | 
| +        break; | 
| +      case NEON_FMULX: | 
| +        fmulx(vf, rd, rn, rm); | 
| +        break; | 
| +      case NEON_FACGE: | 
| +        fabscmp(vf, rd, rn, rm, ge); | 
| +        break; | 
| +      case NEON_FACGT: | 
| +        fabscmp(vf, rd, rn, rm, gt); | 
| +        break; | 
| +      case NEON_FCMEQ: | 
| +        fcmp(vf, rd, rn, rm, eq); | 
| +        break; | 
| +      case NEON_FCMGE: | 
| +        fcmp(vf, rd, rn, rm, ge); | 
| +        break; | 
| +      case NEON_FCMGT: | 
| +        fcmp(vf, rd, rn, rm, gt); | 
| +        break; | 
| +      case NEON_FRECPS: | 
| +        frecps(vf, rd, rn, rm); | 
| +        break; | 
| +      case NEON_FRSQRTS: | 
| +        frsqrts(vf, rd, rn, rm); | 
| +        break; | 
| +      case NEON_FABD: | 
| +        fabd(vf, rd, rn, rm); | 
| +        break; | 
| +      case NEON_FADDP: | 
| +        faddp(vf, rd, rn, rm); | 
| +        break; | 
| +      case NEON_FMAXP: | 
| +        fmaxp(vf, rd, rn, rm); | 
| +        break; | 
| +      case NEON_FMAXNMP: | 
| +        fmaxnmp(vf, rd, rn, rm); | 
| +        break; | 
| +      case NEON_FMINP: | 
| +        fminp(vf, rd, rn, rm); | 
| +        break; | 
| +      case NEON_FMINNMP: | 
| +        fminnmp(vf, rd, rn, rm); | 
| +        break; | 
| +      default: | 
| +        UNIMPLEMENTED(); | 
| +    } | 
| +  } else { | 
| +    VectorFormat vf = nfd.GetVectorFormat(); | 
| +    switch (instr->Mask(NEON3SameMask)) { | 
| +      case NEON_ADD: | 
| +        add(vf, rd, rn, rm); | 
| +        break; | 
| +      case NEON_ADDP: | 
| +        addp(vf, rd, rn, rm); | 
| +        break; | 
| +      case NEON_CMEQ: | 
| +        cmp(vf, rd, rn, rm, eq); | 
| +        break; | 
| +      case NEON_CMGE: | 
| +        cmp(vf, rd, rn, rm, ge); | 
| +        break; | 
| +      case NEON_CMGT: | 
| +        cmp(vf, rd, rn, rm, gt); | 
| +        break; | 
| +      case NEON_CMHI: | 
| +        cmp(vf, rd, rn, rm, hi); | 
| +        break; | 
| +      case NEON_CMHS: | 
| +        cmp(vf, rd, rn, rm, hs); | 
| +        break; | 
| +      case NEON_CMTST: | 
| +        cmptst(vf, rd, rn, rm); | 
| +        break; | 
| +      case NEON_MLS: | 
| +        mls(vf, rd, rn, rm); | 
| +        break; | 
| +      case NEON_MLA: | 
| +        mla(vf, rd, rn, rm); | 
| +        break; | 
| +      case NEON_MUL: | 
| +        mul(vf, rd, rn, rm); | 
| +        break; | 
| +      case NEON_PMUL: | 
| +        pmul(vf, rd, rn, rm); | 
| +        break; | 
| +      case NEON_SMAX: | 
| +        smax(vf, rd, rn, rm); | 
| +        break; | 
| +      case NEON_SMAXP: | 
| +        smaxp(vf, rd, rn, rm); | 
| +        break; | 
| +      case NEON_SMIN: | 
| +        smin(vf, rd, rn, rm); | 
| +        break; | 
| +      case NEON_SMINP: | 
| +        sminp(vf, rd, rn, rm); | 
| +        break; | 
| +      case NEON_SUB: | 
| +        sub(vf, rd, rn, rm); | 
| +        break; | 
| +      case NEON_UMAX: | 
| +        umax(vf, rd, rn, rm); | 
| +        break; | 
| +      case NEON_UMAXP: | 
| +        umaxp(vf, rd, rn, rm); | 
| +        break; | 
| +      case NEON_UMIN: | 
| +        umin(vf, rd, rn, rm); | 
| +        break; | 
| +      case NEON_UMINP: | 
| +        uminp(vf, rd, rn, rm); | 
| +        break; | 
| +      case NEON_SSHL: | 
| +        sshl(vf, rd, rn, rm); | 
| +        break; | 
| +      case NEON_USHL: | 
| +        ushl(vf, rd, rn, rm); | 
| +        break; | 
| +      case NEON_SABD: | 
| +        AbsDiff(vf, rd, rn, rm, true); | 
| +        break; | 
| +      case NEON_UABD: | 
| +        AbsDiff(vf, rd, rn, rm, false); | 
| +        break; | 
| +      case NEON_SABA: | 
| +        saba(vf, rd, rn, rm); | 
| +        break; | 
| +      case NEON_UABA: | 
| +        uaba(vf, rd, rn, rm); | 
| +        break; | 
| +      case NEON_UQADD: | 
| +        add(vf, rd, rn, rm).UnsignedSaturate(vf); | 
| +        break; | 
| +      case NEON_SQADD: | 
| +        add(vf, rd, rn, rm).SignedSaturate(vf); | 
| +        break; | 
| +      case NEON_UQSUB: | 
| +        sub(vf, rd, rn, rm).UnsignedSaturate(vf); | 
| +        break; | 
| +      case NEON_SQSUB: | 
| +        sub(vf, rd, rn, rm).SignedSaturate(vf); | 
| +        break; | 
| +      case NEON_SQDMULH: | 
| +        sqdmulh(vf, rd, rn, rm); | 
| +        break; | 
| +      case NEON_SQRDMULH: | 
| +        sqrdmulh(vf, rd, rn, rm); | 
| +        break; | 
| +      case NEON_UQSHL: | 
| +        ushl(vf, rd, rn, rm).UnsignedSaturate(vf); | 
| +        break; | 
| +      case NEON_SQSHL: | 
| +        sshl(vf, rd, rn, rm).SignedSaturate(vf); | 
| +        break; | 
| +      case NEON_URSHL: | 
| +        ushl(vf, rd, rn, rm).Round(vf); | 
| +        break; | 
| +      case NEON_SRSHL: | 
| +        sshl(vf, rd, rn, rm).Round(vf); | 
| +        break; | 
| +      case NEON_UQRSHL: | 
| +        ushl(vf, rd, rn, rm).Round(vf).UnsignedSaturate(vf); | 
| +        break; | 
| +      case NEON_SQRSHL: | 
| +        sshl(vf, rd, rn, rm).Round(vf).SignedSaturate(vf); | 
| +        break; | 
| +      case NEON_UHADD: | 
| +        add(vf, rd, rn, rm).Uhalve(vf); | 
| +        break; | 
| +      case NEON_URHADD: | 
| +        add(vf, rd, rn, rm).Uhalve(vf).Round(vf); | 
| +        break; | 
| +      case NEON_SHADD: | 
| +        add(vf, rd, rn, rm).Halve(vf); | 
| +        break; | 
| +      case NEON_SRHADD: | 
| +        add(vf, rd, rn, rm).Halve(vf).Round(vf); | 
| +        break; | 
| +      case NEON_UHSUB: | 
| +        sub(vf, rd, rn, rm).Uhalve(vf); | 
| +        break; | 
| +      case NEON_SHSUB: | 
| +        sub(vf, rd, rn, rm).Halve(vf); | 
| +        break; | 
| +      default: | 
| +        UNIMPLEMENTED(); | 
| +    } | 
| +  } | 
| +} | 
| + | 
| +void Simulator::VisitNEON3Different(Instruction* instr) { | 
| +  NEONFormatDecoder nfd(instr); | 
| +  VectorFormat vf = nfd.GetVectorFormat(); | 
| +  VectorFormat vf_l = nfd.GetVectorFormat(nfd.LongIntegerFormatMap()); | 
| + | 
| +  SimVRegister& rd = vreg(instr->Rd()); | 
| +  SimVRegister& rn = vreg(instr->Rn()); | 
| +  SimVRegister& rm = vreg(instr->Rm()); | 
| + | 
| +  switch (instr->Mask(NEON3DifferentMask)) { | 
| +    case NEON_PMULL: | 
| +      pmull(vf_l, rd, rn, rm); | 
| +      break; | 
| +    case NEON_PMULL2: | 
| +      pmull2(vf_l, rd, rn, rm); | 
| +      break; | 
| +    case NEON_UADDL: | 
| +      uaddl(vf_l, rd, rn, rm); | 
| +      break; | 
| +    case NEON_UADDL2: | 
| +      uaddl2(vf_l, rd, rn, rm); | 
| +      break; | 
| +    case NEON_SADDL: | 
| +      saddl(vf_l, rd, rn, rm); | 
| +      break; | 
| +    case NEON_SADDL2: | 
| +      saddl2(vf_l, rd, rn, rm); | 
| +      break; | 
| +    case NEON_USUBL: | 
| +      usubl(vf_l, rd, rn, rm); | 
| +      break; | 
| +    case NEON_USUBL2: | 
| +      usubl2(vf_l, rd, rn, rm); | 
| +      break; | 
| +    case NEON_SSUBL: | 
| +      ssubl(vf_l, rd, rn, rm); | 
| +      break; | 
| +    case NEON_SSUBL2: | 
| +      ssubl2(vf_l, rd, rn, rm); | 
| +      break; | 
| +    case NEON_SABAL: | 
| +      sabal(vf_l, rd, rn, rm); | 
| +      break; | 
| +    case NEON_SABAL2: | 
| +      sabal2(vf_l, rd, rn, rm); | 
| +      break; | 
| +    case NEON_UABAL: | 
| +      uabal(vf_l, rd, rn, rm); | 
| +      break; | 
| +    case NEON_UABAL2: | 
| +      uabal2(vf_l, rd, rn, rm); | 
| +      break; | 
| +    case NEON_SABDL: | 
| +      sabdl(vf_l, rd, rn, rm); | 
| +      break; | 
| +    case NEON_SABDL2: | 
| +      sabdl2(vf_l, rd, rn, rm); | 
| +      break; | 
| +    case NEON_UABDL: | 
| +      uabdl(vf_l, rd, rn, rm); | 
| +      break; | 
| +    case NEON_UABDL2: | 
| +      uabdl2(vf_l, rd, rn, rm); | 
| +      break; | 
| +    case NEON_SMLAL: | 
| +      smlal(vf_l, rd, rn, rm); | 
| +      break; | 
| +    case NEON_SMLAL2: | 
| +      smlal2(vf_l, rd, rn, rm); | 
| +      break; | 
| +    case NEON_UMLAL: | 
| +      umlal(vf_l, rd, rn, rm); | 
| +      break; | 
| +    case NEON_UMLAL2: | 
| +      umlal2(vf_l, rd, rn, rm); | 
| +      break; | 
| +    case NEON_SMLSL: | 
| +      smlsl(vf_l, rd, rn, rm); | 
| +      break; | 
| +    case NEON_SMLSL2: | 
| +      smlsl2(vf_l, rd, rn, rm); | 
| +      break; | 
| +    case NEON_UMLSL: | 
| +      umlsl(vf_l, rd, rn, rm); | 
| +      break; | 
| +    case NEON_UMLSL2: | 
| +      umlsl2(vf_l, rd, rn, rm); | 
| +      break; | 
| +    case NEON_SMULL: | 
| +      smull(vf_l, rd, rn, rm); | 
| +      break; | 
| +    case NEON_SMULL2: | 
| +      smull2(vf_l, rd, rn, rm); | 
| +      break; | 
| +    case NEON_UMULL: | 
| +      umull(vf_l, rd, rn, rm); | 
| +      break; | 
| +    case NEON_UMULL2: | 
| +      umull2(vf_l, rd, rn, rm); | 
| +      break; | 
| +    case NEON_SQDMLAL: | 
| +      sqdmlal(vf_l, rd, rn, rm); | 
| +      break; | 
| +    case NEON_SQDMLAL2: | 
| +      sqdmlal2(vf_l, rd, rn, rm); | 
| +      break; | 
| +    case NEON_SQDMLSL: | 
| +      sqdmlsl(vf_l, rd, rn, rm); | 
| +      break; | 
| +    case NEON_SQDMLSL2: | 
| +      sqdmlsl2(vf_l, rd, rn, rm); | 
| +      break; | 
| +    case NEON_SQDMULL: | 
| +      sqdmull(vf_l, rd, rn, rm); | 
| +      break; | 
| +    case NEON_SQDMULL2: | 
| +      sqdmull2(vf_l, rd, rn, rm); | 
| +      break; | 
| +    case NEON_UADDW: | 
| +      uaddw(vf_l, rd, rn, rm); | 
| +      break; | 
| +    case NEON_UADDW2: | 
| +      uaddw2(vf_l, rd, rn, rm); | 
| +      break; | 
| +    case NEON_SADDW: | 
| +      saddw(vf_l, rd, rn, rm); | 
| +      break; | 
| +    case NEON_SADDW2: | 
| +      saddw2(vf_l, rd, rn, rm); | 
| +      break; | 
| +    case NEON_USUBW: | 
| +      usubw(vf_l, rd, rn, rm); | 
| +      break; | 
| +    case NEON_USUBW2: | 
| +      usubw2(vf_l, rd, rn, rm); | 
| +      break; | 
| +    case NEON_SSUBW: | 
| +      ssubw(vf_l, rd, rn, rm); | 
| +      break; | 
| +    case NEON_SSUBW2: | 
| +      ssubw2(vf_l, rd, rn, rm); | 
| +      break; | 
| +    case NEON_ADDHN: | 
| +      addhn(vf, rd, rn, rm); | 
| +      break; | 
| +    case NEON_ADDHN2: | 
| +      addhn2(vf, rd, rn, rm); | 
| +      break; | 
| +    case NEON_RADDHN: | 
| +      raddhn(vf, rd, rn, rm); | 
| +      break; | 
| +    case NEON_RADDHN2: | 
| +      raddhn2(vf, rd, rn, rm); | 
| +      break; | 
| +    case NEON_SUBHN: | 
| +      subhn(vf, rd, rn, rm); | 
| +      break; | 
| +    case NEON_SUBHN2: | 
| +      subhn2(vf, rd, rn, rm); | 
| +      break; | 
| +    case NEON_RSUBHN: | 
| +      rsubhn(vf, rd, rn, rm); | 
| +      break; | 
| +    case NEON_RSUBHN2: | 
| +      rsubhn2(vf, rd, rn, rm); | 
| +      break; | 
| +    default: | 
| +      UNIMPLEMENTED(); | 
| +  } | 
| +} | 
| + | 
| +void Simulator::VisitNEONAcrossLanes(Instruction* instr) { | 
| +  NEONFormatDecoder nfd(instr); | 
| + | 
| +  SimVRegister& rd = vreg(instr->Rd()); | 
| +  SimVRegister& rn = vreg(instr->Rn()); | 
| + | 
| +  // The input operand's VectorFormat is passed for these instructions. | 
| +  if (instr->Mask(NEONAcrossLanesFPFMask) == NEONAcrossLanesFPFixed) { | 
| +    VectorFormat vf = nfd.GetVectorFormat(nfd.FPFormatMap()); | 
| + | 
| +    switch (instr->Mask(NEONAcrossLanesFPMask)) { | 
| +      case NEON_FMAXV: | 
| +        fmaxv(vf, rd, rn); | 
| +        break; | 
| +      case NEON_FMINV: | 
| +        fminv(vf, rd, rn); | 
| +        break; | 
| +      case NEON_FMAXNMV: | 
| +        fmaxnmv(vf, rd, rn); | 
| +        break; | 
| +      case NEON_FMINNMV: | 
| +        fminnmv(vf, rd, rn); | 
| +        break; | 
| +      default: | 
| +        UNIMPLEMENTED(); | 
| +    } | 
| +  } else { | 
| +    VectorFormat vf = nfd.GetVectorFormat(); | 
| + | 
| +    switch (instr->Mask(NEONAcrossLanesMask)) { | 
| +      case NEON_ADDV: | 
| +        addv(vf, rd, rn); | 
| +        break; | 
| +      case NEON_SMAXV: | 
| +        smaxv(vf, rd, rn); | 
| +        break; | 
| +      case NEON_SMINV: | 
| +        sminv(vf, rd, rn); | 
| +        break; | 
| +      case NEON_UMAXV: | 
| +        umaxv(vf, rd, rn); | 
| +        break; | 
| +      case NEON_UMINV: | 
| +        uminv(vf, rd, rn); | 
| +        break; | 
| +      case NEON_SADDLV: | 
| +        saddlv(vf, rd, rn); | 
| +        break; | 
| +      case NEON_UADDLV: | 
| +        uaddlv(vf, rd, rn); | 
| +        break; | 
| +      default: | 
| +        UNIMPLEMENTED(); | 
| +    } | 
| +  } | 
| +} | 
| + | 
| +void Simulator::VisitNEONByIndexedElement(Instruction* instr) { | 
| +  NEONFormatDecoder nfd(instr); | 
| +  VectorFormat vf_r = nfd.GetVectorFormat(); | 
| +  VectorFormat vf = nfd.GetVectorFormat(nfd.LongIntegerFormatMap()); | 
| + | 
| +  SimVRegister& rd = vreg(instr->Rd()); | 
| +  SimVRegister& rn = vreg(instr->Rn()); | 
| + | 
| +  ByElementOp Op = NULL; | 
| + | 
| +  int rm_reg = instr->Rm(); | 
| +  int index = (instr->NEONH() << 1) | instr->NEONL(); | 
| +  if (instr->NEONSize() == 1) { | 
| +    rm_reg &= 0xf; | 
| +    index = (index << 1) | instr->NEONM(); | 
| +  } | 
| + | 
| +  switch (instr->Mask(NEONByIndexedElementMask)) { | 
| +    case NEON_MUL_byelement: | 
| +      Op = &Simulator::mul; | 
| +      vf = vf_r; | 
| +      break; | 
| +    case NEON_MLA_byelement: | 
| +      Op = &Simulator::mla; | 
| +      vf = vf_r; | 
| +      break; | 
| +    case NEON_MLS_byelement: | 
| +      Op = &Simulator::mls; | 
| +      vf = vf_r; | 
| +      break; | 
| +    case NEON_SQDMULH_byelement: | 
| +      Op = &Simulator::sqdmulh; | 
| +      vf = vf_r; | 
| +      break; | 
| +    case NEON_SQRDMULH_byelement: | 
| +      Op = &Simulator::sqrdmulh; | 
| +      vf = vf_r; | 
| +      break; | 
| +    case NEON_SMULL_byelement: | 
| +      if (instr->Mask(NEON_Q)) { | 
| +        Op = &Simulator::smull2; | 
| +      } else { | 
| +        Op = &Simulator::smull; | 
| +      } | 
| +      break; | 
| +    case NEON_UMULL_byelement: | 
| +      if (instr->Mask(NEON_Q)) { | 
| +        Op = &Simulator::umull2; | 
| +      } else { | 
| +        Op = &Simulator::umull; | 
| +      } | 
| +      break; | 
| +    case NEON_SMLAL_byelement: | 
| +      if (instr->Mask(NEON_Q)) { | 
| +        Op = &Simulator::smlal2; | 
| +      } else { | 
| +        Op = &Simulator::smlal; | 
| +      } | 
| +      break; | 
| +    case NEON_UMLAL_byelement: | 
| +      if (instr->Mask(NEON_Q)) { | 
| +        Op = &Simulator::umlal2; | 
| +      } else { | 
| +        Op = &Simulator::umlal; | 
| +      } | 
| +      break; | 
| +    case NEON_SMLSL_byelement: | 
| +      if (instr->Mask(NEON_Q)) { | 
| +        Op = &Simulator::smlsl2; | 
| +      } else { | 
| +        Op = &Simulator::smlsl; | 
| +      } | 
| +      break; | 
| +    case NEON_UMLSL_byelement: | 
| +      if (instr->Mask(NEON_Q)) { | 
| +        Op = &Simulator::umlsl2; | 
| +      } else { | 
| +        Op = &Simulator::umlsl; | 
| +      } | 
| +      break; | 
| +    case NEON_SQDMULL_byelement: | 
| +      if (instr->Mask(NEON_Q)) { | 
| +        Op = &Simulator::sqdmull2; | 
| +      } else { | 
| +        Op = &Simulator::sqdmull; | 
| +      } | 
| +      break; | 
| +    case NEON_SQDMLAL_byelement: | 
| +      if (instr->Mask(NEON_Q)) { | 
| +        Op = &Simulator::sqdmlal2; | 
| +      } else { | 
| +        Op = &Simulator::sqdmlal; | 
| +      } | 
| +      break; | 
| +    case NEON_SQDMLSL_byelement: | 
| +      if (instr->Mask(NEON_Q)) { | 
| +        Op = &Simulator::sqdmlsl2; | 
| +      } else { | 
| +        Op = &Simulator::sqdmlsl; | 
| +      } | 
| +      break; | 
| +    default: | 
| +      index = instr->NEONH(); | 
| +      if ((instr->FPType() & 1) == 0) { | 
| +        index = (index << 1) | instr->NEONL(); | 
| +      } | 
| + | 
| +      vf = nfd.GetVectorFormat(nfd.FPFormatMap()); | 
| + | 
| +      switch (instr->Mask(NEONByIndexedElementFPMask)) { | 
| +        case NEON_FMUL_byelement: | 
| +          Op = &Simulator::fmul; | 
| +          break; | 
| +        case NEON_FMLA_byelement: | 
| +          Op = &Simulator::fmla; | 
| +          break; | 
| +        case NEON_FMLS_byelement: | 
| +          Op = &Simulator::fmls; | 
| +          break; | 
| +        case NEON_FMULX_byelement: | 
| +          Op = &Simulator::fmulx; | 
| +          break; | 
| +        default: | 
| +          UNIMPLEMENTED(); | 
| +      } | 
| +  } | 
| + | 
| +  (this->*Op)(vf, rd, rn, vreg(rm_reg), index); | 
| +} | 
| + | 
| +void Simulator::VisitNEONCopy(Instruction* instr) { | 
| +  NEONFormatDecoder nfd(instr, NEONFormatDecoder::TriangularFormatMap()); | 
| +  VectorFormat vf = nfd.GetVectorFormat(); | 
| + | 
| +  SimVRegister& rd = vreg(instr->Rd()); | 
| +  SimVRegister& rn = vreg(instr->Rn()); | 
| +  int imm5 = instr->ImmNEON5(); | 
| +  int lsb = LowestSetBitPosition(imm5); | 
| +  int reg_index = imm5 >> lsb; | 
| + | 
| +  if (instr->Mask(NEONCopyInsElementMask) == NEON_INS_ELEMENT) { | 
| +    int imm4 = instr->ImmNEON4(); | 
| +    DCHECK_GE(lsb, 1); | 
| +    int rn_index = imm4 >> (lsb - 1); | 
| +    ins_element(vf, rd, reg_index, rn, rn_index); | 
| +  } else if (instr->Mask(NEONCopyInsGeneralMask) == NEON_INS_GENERAL) { | 
| +    ins_immediate(vf, rd, reg_index, xreg(instr->Rn())); | 
| +  } else if (instr->Mask(NEONCopyUmovMask) == NEON_UMOV) { | 
| +    uint64_t value = LogicVRegister(rn).Uint(vf, reg_index); | 
| +    value &= MaxUintFromFormat(vf); | 
| +    set_xreg(instr->Rd(), value); | 
| +  } else if (instr->Mask(NEONCopyUmovMask) == NEON_SMOV) { | 
| +    int64_t value = LogicVRegister(rn).Int(vf, reg_index); | 
| +    if (instr->NEONQ()) { | 
| +      set_xreg(instr->Rd(), value); | 
| +    } else { | 
| +      DCHECK(is_int32(value)); | 
| +      set_wreg(instr->Rd(), static_cast<int32_t>(value)); | 
| +    } | 
| +  } else if (instr->Mask(NEONCopyDupElementMask) == NEON_DUP_ELEMENT) { | 
| +    dup_element(vf, rd, rn, reg_index); | 
| +  } else if (instr->Mask(NEONCopyDupGeneralMask) == NEON_DUP_GENERAL) { | 
| +    dup_immediate(vf, rd, xreg(instr->Rn())); | 
| +  } else { | 
| +    UNIMPLEMENTED(); | 
| +  } | 
| +} | 
| + | 
| +void Simulator::VisitNEONExtract(Instruction* instr) { | 
| +  NEONFormatDecoder nfd(instr, NEONFormatDecoder::LogicalFormatMap()); | 
| +  VectorFormat vf = nfd.GetVectorFormat(); | 
| +  SimVRegister& rd = vreg(instr->Rd()); | 
| +  SimVRegister& rn = vreg(instr->Rn()); | 
| +  SimVRegister& rm = vreg(instr->Rm()); | 
| +  if (instr->Mask(NEONExtractMask) == NEON_EXT) { | 
| +    int index = instr->ImmNEONExt(); | 
| +    ext(vf, rd, rn, rm, index); | 
| +  } else { | 
| +    UNIMPLEMENTED(); | 
| +  } | 
| +} | 
| + | 
| +void Simulator::NEONLoadStoreMultiStructHelper(const Instruction* instr, | 
| +                                               AddrMode addr_mode) { | 
| +  NEONFormatDecoder nfd(instr, NEONFormatDecoder::LoadStoreFormatMap()); | 
| +  VectorFormat vf = nfd.GetVectorFormat(); | 
| + | 
| +  uint64_t addr_base = xreg(instr->Rn(), Reg31IsStackPointer); | 
| +  int reg_size = RegisterSizeInBytesFromFormat(vf); | 
| + | 
| +  int reg[4]; | 
| +  uint64_t addr[4]; | 
| +  for (int i = 0; i < 4; i++) { | 
| +    reg[i] = (instr->Rt() + i) % kNumberOfVRegisters; | 
| +    addr[i] = addr_base + (i * reg_size); | 
| +  } | 
| +  int count = 1; | 
| +  bool log_read = true; | 
| + | 
| +  // Bit 23 determines whether this is an offset or post-index addressing mode. | 
| +  // In offset mode, bits 20 to 16 should be zero; these bits encode the | 
| +  // register of immediate in post-index mode. | 
| +  if ((instr->Bit(23) == 0) && (instr->Bits(20, 16) != 0)) { | 
| +    UNREACHABLE(); | 
| +  } | 
| + | 
| +  // We use the PostIndex mask here, as it works in this case for both Offset | 
| +  // and PostIndex addressing. | 
| +  switch (instr->Mask(NEONLoadStoreMultiStructPostIndexMask)) { | 
| +    case NEON_LD1_4v: | 
| +    case NEON_LD1_4v_post: | 
| +      ld1(vf, vreg(reg[3]), addr[3]); | 
| +      count++;  // Fall through. | 
| +    case NEON_LD1_3v: | 
| +    case NEON_LD1_3v_post: | 
| +      ld1(vf, vreg(reg[2]), addr[2]); | 
| +      count++;  // Fall through. | 
| +    case NEON_LD1_2v: | 
| +    case NEON_LD1_2v_post: | 
| +      ld1(vf, vreg(reg[1]), addr[1]); | 
| +      count++;  // Fall through. | 
| +    case NEON_LD1_1v: | 
| +    case NEON_LD1_1v_post: | 
| +      ld1(vf, vreg(reg[0]), addr[0]); | 
| +      break; | 
| +    case NEON_ST1_4v: | 
| +    case NEON_ST1_4v_post: | 
| +      st1(vf, vreg(reg[3]), addr[3]); | 
| +      count++;  // Fall through. | 
| +    case NEON_ST1_3v: | 
| +    case NEON_ST1_3v_post: | 
| +      st1(vf, vreg(reg[2]), addr[2]); | 
| +      count++;  // Fall through. | 
| +    case NEON_ST1_2v: | 
| +    case NEON_ST1_2v_post: | 
| +      st1(vf, vreg(reg[1]), addr[1]); | 
| +      count++;  // Fall through. | 
| +    case NEON_ST1_1v: | 
| +    case NEON_ST1_1v_post: | 
| +      st1(vf, vreg(reg[0]), addr[0]); | 
| +      log_read = false; | 
| +      break; | 
| +    case NEON_LD2_post: | 
| +    case NEON_LD2: | 
| +      ld2(vf, vreg(reg[0]), vreg(reg[1]), addr[0]); | 
| +      count = 2; | 
| +      break; | 
| +    case NEON_ST2: | 
| +    case NEON_ST2_post: | 
| +      st2(vf, vreg(reg[0]), vreg(reg[1]), addr[0]); | 
| +      count = 2; | 
| +      log_read = false; | 
| +      break; | 
| +    case NEON_LD3_post: | 
| +    case NEON_LD3: | 
| +      ld3(vf, vreg(reg[0]), vreg(reg[1]), vreg(reg[2]), addr[0]); | 
| +      count = 3; | 
| +      break; | 
| +    case NEON_ST3: | 
| +    case NEON_ST3_post: | 
| +      st3(vf, vreg(reg[0]), vreg(reg[1]), vreg(reg[2]), addr[0]); | 
| +      count = 3; | 
| +      log_read = false; | 
| +      break; | 
| +    case NEON_LD4_post: | 
| +    case NEON_LD4: | 
| +      ld4(vf, vreg(reg[0]), vreg(reg[1]), vreg(reg[2]), vreg(reg[3]), addr[0]); | 
| +      count = 4; | 
| +      break; | 
| +    case NEON_ST4: | 
| +    case NEON_ST4_post: | 
| +      st4(vf, vreg(reg[0]), vreg(reg[1]), vreg(reg[2]), vreg(reg[3]), addr[0]); | 
| +      count = 4; | 
| +      log_read = false; | 
| +      break; | 
| +    default: | 
| +      UNIMPLEMENTED(); | 
| +  } | 
| + | 
| +  // Explicitly log the register update whilst we have type information. | 
| +  for (int i = 0; i < count; i++) { | 
| +    // For de-interleaving loads, only print the base address. | 
| +    int lane_size = LaneSizeInBytesFromFormat(vf); | 
| +    PrintRegisterFormat format = GetPrintRegisterFormatTryFP( | 
| +        GetPrintRegisterFormatForSize(reg_size, lane_size)); | 
| +    if (log_read) { | 
| +      LogVRead(addr_base, reg[i], format); | 
| +    } else { | 
| +      LogVWrite(addr_base, reg[i], format); | 
| +    } | 
| +  } | 
| + | 
| +  if (addr_mode == PostIndex) { | 
| +    int rm = instr->Rm(); | 
| +    // The immediate post index addressing mode is indicated by rm = 31. | 
| +    // The immediate is implied by the number of vector registers used. | 
| +    addr_base += | 
| +        (rm == 31) ? RegisterSizeInBytesFromFormat(vf) * count : xreg(rm); | 
| +    set_xreg(instr->Rn(), addr_base); | 
| +  } else { | 
| +    DCHECK_EQ(addr_mode, Offset); | 
| +  } | 
| +} | 
| + | 
| +void Simulator::VisitNEONLoadStoreMultiStruct(Instruction* instr) { | 
| +  NEONLoadStoreMultiStructHelper(instr, Offset); | 
| +} | 
| + | 
| +void Simulator::VisitNEONLoadStoreMultiStructPostIndex(Instruction* instr) { | 
| +  NEONLoadStoreMultiStructHelper(instr, PostIndex); | 
| +} | 
| + | 
| +void Simulator::NEONLoadStoreSingleStructHelper(const Instruction* instr, | 
| +                                                AddrMode addr_mode) { | 
| +  uint64_t addr = xreg(instr->Rn(), Reg31IsStackPointer); | 
| +  int rt = instr->Rt(); | 
| + | 
| +  // Bit 23 determines whether this is an offset or post-index addressing mode. | 
| +  // In offset mode, bits 20 to 16 should be zero; these bits encode the | 
| +  // register of immediate in post-index mode. | 
| +  DCHECK_IMPLIES(instr->Bit(23) == 0, instr->Bits(20, 16) == 0); | 
| + | 
| +  bool do_load = false; | 
| + | 
| +  NEONFormatDecoder nfd(instr, NEONFormatDecoder::LoadStoreFormatMap()); | 
| +  VectorFormat vf_t = nfd.GetVectorFormat(); | 
| + | 
| +  VectorFormat vf = kFormat16B; | 
| +  // We use the PostIndex mask here, as it works in this case for both Offset | 
| +  // and PostIndex addressing. | 
| +  switch (instr->Mask(NEONLoadStoreSingleStructPostIndexMask)) { | 
| +    case NEON_LD1_b: | 
| +    case NEON_LD1_b_post: | 
| +    case NEON_LD2_b: | 
| +    case NEON_LD2_b_post: | 
| +    case NEON_LD3_b: | 
| +    case NEON_LD3_b_post: | 
| +    case NEON_LD4_b: | 
| +    case NEON_LD4_b_post: | 
| +      do_load = true;  // Fall through. | 
| +    case NEON_ST1_b: | 
| +    case NEON_ST1_b_post: | 
| +    case NEON_ST2_b: | 
| +    case NEON_ST2_b_post: | 
| +    case NEON_ST3_b: | 
| +    case NEON_ST3_b_post: | 
| +    case NEON_ST4_b: | 
| +    case NEON_ST4_b_post: | 
| +      break; | 
| + | 
| +    case NEON_LD1_h: | 
| +    case NEON_LD1_h_post: | 
| +    case NEON_LD2_h: | 
| +    case NEON_LD2_h_post: | 
| +    case NEON_LD3_h: | 
| +    case NEON_LD3_h_post: | 
| +    case NEON_LD4_h: | 
| +    case NEON_LD4_h_post: | 
| +      do_load = true;  // Fall through. | 
| +    case NEON_ST1_h: | 
| +    case NEON_ST1_h_post: | 
| +    case NEON_ST2_h: | 
| +    case NEON_ST2_h_post: | 
| +    case NEON_ST3_h: | 
| +    case NEON_ST3_h_post: | 
| +    case NEON_ST4_h: | 
| +    case NEON_ST4_h_post: | 
| +      vf = kFormat8H; | 
| +      break; | 
| + | 
| +    case NEON_LD1_s: | 
| +    case NEON_LD1_s_post: | 
| +    case NEON_LD2_s: | 
| +    case NEON_LD2_s_post: | 
| +    case NEON_LD3_s: | 
| +    case NEON_LD3_s_post: | 
| +    case NEON_LD4_s: | 
| +    case NEON_LD4_s_post: | 
| +      do_load = true;  // Fall through. | 
| +    case NEON_ST1_s: | 
| +    case NEON_ST1_s_post: | 
| +    case NEON_ST2_s: | 
| +    case NEON_ST2_s_post: | 
| +    case NEON_ST3_s: | 
| +    case NEON_ST3_s_post: | 
| +    case NEON_ST4_s: | 
| +    case NEON_ST4_s_post: { | 
| +      static_assert((NEON_LD1_s | (1 << NEONLSSize_offset)) == NEON_LD1_d, | 
| +                    "LSB of size distinguishes S and D registers."); | 
| +      static_assert( | 
| +          (NEON_LD1_s_post | (1 << NEONLSSize_offset)) == NEON_LD1_d_post, | 
| +          "LSB of size distinguishes S and D registers."); | 
| +      static_assert((NEON_ST1_s | (1 << NEONLSSize_offset)) == NEON_ST1_d, | 
| +                    "LSB of size distinguishes S and D registers."); | 
| +      static_assert( | 
| +          (NEON_ST1_s_post | (1 << NEONLSSize_offset)) == NEON_ST1_d_post, | 
| +          "LSB of size distinguishes S and D registers."); | 
| +      vf = ((instr->NEONLSSize() & 1) == 0) ? kFormat4S : kFormat2D; | 
| +      break; | 
| +    } | 
| + | 
| +    case NEON_LD1R: | 
| +    case NEON_LD1R_post: { | 
| +      vf = vf_t; | 
| +      ld1r(vf, vreg(rt), addr); | 
| +      do_load = true; | 
| +      break; | 
| +    } | 
| + | 
| +    case NEON_LD2R: | 
| +    case NEON_LD2R_post: { | 
| +      vf = vf_t; | 
| +      int rt2 = (rt + 1) % kNumberOfVRegisters; | 
| +      ld2r(vf, vreg(rt), vreg(rt2), addr); | 
| +      do_load = true; | 
| +      break; | 
| +    } | 
| + | 
| +    case NEON_LD3R: | 
| +    case NEON_LD3R_post: { | 
| +      vf = vf_t; | 
| +      int rt2 = (rt + 1) % kNumberOfVRegisters; | 
| +      int rt3 = (rt2 + 1) % kNumberOfVRegisters; | 
| +      ld3r(vf, vreg(rt), vreg(rt2), vreg(rt3), addr); | 
| +      do_load = true; | 
| +      break; | 
| +    } | 
| + | 
| +    case NEON_LD4R: | 
| +    case NEON_LD4R_post: { | 
| +      vf = vf_t; | 
| +      int rt2 = (rt + 1) % kNumberOfVRegisters; | 
| +      int rt3 = (rt2 + 1) % kNumberOfVRegisters; | 
| +      int rt4 = (rt3 + 1) % kNumberOfVRegisters; | 
| +      ld4r(vf, vreg(rt), vreg(rt2), vreg(rt3), vreg(rt4), addr); | 
| +      do_load = true; | 
| +      break; | 
| +    } | 
| +    default: | 
| +      UNIMPLEMENTED(); | 
| +  } | 
| + | 
| +  PrintRegisterFormat print_format = | 
| +      GetPrintRegisterFormatTryFP(GetPrintRegisterFormat(vf)); | 
| +  // Make sure that the print_format only includes a single lane. | 
| +  print_format = | 
| +      static_cast<PrintRegisterFormat>(print_format & ~kPrintRegAsVectorMask); | 
| + | 
| +  int esize = LaneSizeInBytesFromFormat(vf); | 
| +  int index_shift = LaneSizeInBytesLog2FromFormat(vf); | 
| +  int lane = instr->NEONLSIndex(index_shift); | 
| +  int scale = 0; | 
| +  int rt2 = (rt + 1) % kNumberOfVRegisters; | 
| +  int rt3 = (rt2 + 1) % kNumberOfVRegisters; | 
| +  int rt4 = (rt3 + 1) % kNumberOfVRegisters; | 
| +  switch (instr->Mask(NEONLoadStoreSingleLenMask)) { | 
| +    case NEONLoadStoreSingle1: | 
| +      scale = 1; | 
| +      if (do_load) { | 
| +        ld1(vf, vreg(rt), lane, addr); | 
| +        LogVRead(addr, rt, print_format, lane); | 
| +      } else { | 
| +        st1(vf, vreg(rt), lane, addr); | 
| +        LogVWrite(addr, rt, print_format, lane); | 
| +      } | 
| +      break; | 
| +    case NEONLoadStoreSingle2: | 
| +      scale = 2; | 
| +      if (do_load) { | 
| +        ld2(vf, vreg(rt), vreg(rt2), lane, addr); | 
| +        LogVRead(addr, rt, print_format, lane); | 
| +        LogVRead(addr + esize, rt2, print_format, lane); | 
| +      } else { | 
| +        st2(vf, vreg(rt), vreg(rt2), lane, addr); | 
| +        LogVWrite(addr, rt, print_format, lane); | 
| +        LogVWrite(addr + esize, rt2, print_format, lane); | 
| +      } | 
| +      break; | 
| +    case NEONLoadStoreSingle3: | 
| +      scale = 3; | 
| +      if (do_load) { | 
| +        ld3(vf, vreg(rt), vreg(rt2), vreg(rt3), lane, addr); | 
| +        LogVRead(addr, rt, print_format, lane); | 
| +        LogVRead(addr + esize, rt2, print_format, lane); | 
| +        LogVRead(addr + (2 * esize), rt3, print_format, lane); | 
| +      } else { | 
| +        st3(vf, vreg(rt), vreg(rt2), vreg(rt3), lane, addr); | 
| +        LogVWrite(addr, rt, print_format, lane); | 
| +        LogVWrite(addr + esize, rt2, print_format, lane); | 
| +        LogVWrite(addr + (2 * esize), rt3, print_format, lane); | 
| +      } | 
| +      break; | 
| +    case NEONLoadStoreSingle4: | 
| +      scale = 4; | 
| +      if (do_load) { | 
| +        ld4(vf, vreg(rt), vreg(rt2), vreg(rt3), vreg(rt4), lane, addr); | 
| +        LogVRead(addr, rt, print_format, lane); | 
| +        LogVRead(addr + esize, rt2, print_format, lane); | 
| +        LogVRead(addr + (2 * esize), rt3, print_format, lane); | 
| +        LogVRead(addr + (3 * esize), rt4, print_format, lane); | 
| +      } else { | 
| +        st4(vf, vreg(rt), vreg(rt2), vreg(rt3), vreg(rt4), lane, addr); | 
| +        LogVWrite(addr, rt, print_format, lane); | 
| +        LogVWrite(addr + esize, rt2, print_format, lane); | 
| +        LogVWrite(addr + (2 * esize), rt3, print_format, lane); | 
| +        LogVWrite(addr + (3 * esize), rt4, print_format, lane); | 
| +      } | 
| +      break; | 
| +    default: | 
| +      UNIMPLEMENTED(); | 
| +  } | 
| + | 
| +  if (addr_mode == PostIndex) { | 
| +    int rm = instr->Rm(); | 
| +    int lane_size = LaneSizeInBytesFromFormat(vf); | 
| +    set_xreg(instr->Rn(), addr + ((rm == 31) ? (scale * lane_size) : xreg(rm))); | 
| +  } | 
| +} | 
| + | 
| +void Simulator::VisitNEONLoadStoreSingleStruct(Instruction* instr) { | 
| +  NEONLoadStoreSingleStructHelper(instr, Offset); | 
| +} | 
| + | 
| +void Simulator::VisitNEONLoadStoreSingleStructPostIndex(Instruction* instr) { | 
| +  NEONLoadStoreSingleStructHelper(instr, PostIndex); | 
| +} | 
| + | 
| +void Simulator::VisitNEONModifiedImmediate(Instruction* instr) { | 
| +  SimVRegister& rd = vreg(instr->Rd()); | 
| +  int cmode = instr->NEONCmode(); | 
| +  int cmode_3_1 = (cmode >> 1) & 7; | 
| +  int cmode_3 = (cmode >> 3) & 1; | 
| +  int cmode_2 = (cmode >> 2) & 1; | 
| +  int cmode_1 = (cmode >> 1) & 1; | 
| +  int cmode_0 = cmode & 1; | 
| +  int q = instr->NEONQ(); | 
| +  int op_bit = instr->NEONModImmOp(); | 
| +  uint64_t imm8 = instr->ImmNEONabcdefgh(); | 
| + | 
| +  // Find the format and immediate value | 
| +  uint64_t imm = 0; | 
| +  VectorFormat vform = kFormatUndefined; | 
| +  switch (cmode_3_1) { | 
| +    case 0x0: | 
| +    case 0x1: | 
| +    case 0x2: | 
| +    case 0x3: | 
| +      vform = (q == 1) ? kFormat4S : kFormat2S; | 
| +      imm = imm8 << (8 * cmode_3_1); | 
| +      break; | 
| +    case 0x4: | 
| +    case 0x5: | 
| +      vform = (q == 1) ? kFormat8H : kFormat4H; | 
| +      imm = imm8 << (8 * cmode_1); | 
| +      break; | 
| +    case 0x6: | 
| +      vform = (q == 1) ? kFormat4S : kFormat2S; | 
| +      if (cmode_0 == 0) { | 
| +        imm = imm8 << 8 | 0x000000ff; | 
| +      } else { | 
| +        imm = imm8 << 16 | 0x0000ffff; | 
| +      } | 
| +      break; | 
| +    case 0x7: | 
| +      if (cmode_0 == 0 && op_bit == 0) { | 
| +        vform = q ? kFormat16B : kFormat8B; | 
| +        imm = imm8; | 
| +      } else if (cmode_0 == 0 && op_bit == 1) { | 
| +        vform = q ? kFormat2D : kFormat1D; | 
| +        imm = 0; | 
| +        for (int i = 0; i < 8; ++i) { | 
| +          if (imm8 & (1 << i)) { | 
| +            imm |= (UINT64_C(0xff) << (8 * i)); | 
| +          } | 
| +        } | 
| +      } else {  // cmode_0 == 1, cmode == 0xf. | 
| +        if (op_bit == 0) { | 
| +          vform = q ? kFormat4S : kFormat2S; | 
| +          imm = bit_cast<uint32_t>(instr->ImmNEONFP32()); | 
| +        } else if (q == 1) { | 
| +          vform = kFormat2D; | 
| +          imm = bit_cast<uint64_t>(instr->ImmNEONFP64()); | 
| +        } else { | 
| +          DCHECK((q == 0) && (op_bit == 1) && (cmode == 0xf)); | 
| +          VisitUnallocated(instr); | 
| +        } | 
| +      } | 
| +      break; | 
| +    default: | 
| +      UNREACHABLE(); | 
| +      break; | 
| +  } | 
| + | 
| +  // Find the operation. | 
| +  NEONModifiedImmediateOp op; | 
| +  if (cmode_3 == 0) { | 
| +    if (cmode_0 == 0) { | 
| +      op = op_bit ? NEONModifiedImmediate_MVNI : NEONModifiedImmediate_MOVI; | 
| +    } else {  // cmode<0> == '1' | 
| +      op = op_bit ? NEONModifiedImmediate_BIC : NEONModifiedImmediate_ORR; | 
| +    } | 
| +  } else {  // cmode<3> == '1' | 
| +    if (cmode_2 == 0) { | 
| +      if (cmode_0 == 0) { | 
| +        op = op_bit ? NEONModifiedImmediate_MVNI : NEONModifiedImmediate_MOVI; | 
| +      } else {  // cmode<0> == '1' | 
| +        op = op_bit ? NEONModifiedImmediate_BIC : NEONModifiedImmediate_ORR; | 
| +      } | 
| +    } else {  // cmode<2> == '1' | 
| +      if (cmode_1 == 0) { | 
| +        op = op_bit ? NEONModifiedImmediate_MVNI : NEONModifiedImmediate_MOVI; | 
| +      } else {  // cmode<1> == '1' | 
| +        if (cmode_0 == 0) { | 
| +          op = NEONModifiedImmediate_MOVI; | 
| +        } else {  // cmode<0> == '1' | 
| +          op = NEONModifiedImmediate_MOVI; | 
| +        } | 
| +      } | 
| +    } | 
| +  } | 
| + | 
| +  // Call the logic function. | 
| +  switch (op) { | 
| +    case NEONModifiedImmediate_ORR: | 
| +      orr(vform, rd, rd, imm); | 
| +      break; | 
| +    case NEONModifiedImmediate_BIC: | 
| +      bic(vform, rd, rd, imm); | 
| +      break; | 
| +    case NEONModifiedImmediate_MOVI: | 
| +      movi(vform, rd, imm); | 
| +      break; | 
| +    case NEONModifiedImmediate_MVNI: | 
| +      mvni(vform, rd, imm); | 
| +      break; | 
| +    default: | 
| +      VisitUnimplemented(instr); | 
| +  } | 
| +} | 
| + | 
| +void Simulator::VisitNEONScalar2RegMisc(Instruction* instr) { | 
| +  NEONFormatDecoder nfd(instr, NEONFormatDecoder::ScalarFormatMap()); | 
| +  VectorFormat vf = nfd.GetVectorFormat(); | 
| + | 
| +  SimVRegister& rd = vreg(instr->Rd()); | 
| +  SimVRegister& rn = vreg(instr->Rn()); | 
| + | 
| +  if (instr->Mask(NEON2RegMiscOpcode) <= NEON_NEG_scalar_opcode) { | 
| +    // These instructions all use a two bit size field, except NOT and RBIT, | 
| +    // which use the field to encode the operation. | 
| +    switch (instr->Mask(NEONScalar2RegMiscMask)) { | 
| +      case NEON_CMEQ_zero_scalar: | 
| +        cmp(vf, rd, rn, 0, eq); | 
| +        break; | 
| +      case NEON_CMGE_zero_scalar: | 
| +        cmp(vf, rd, rn, 0, ge); | 
| +        break; | 
| +      case NEON_CMGT_zero_scalar: | 
| +        cmp(vf, rd, rn, 0, gt); | 
| +        break; | 
| +      case NEON_CMLT_zero_scalar: | 
| +        cmp(vf, rd, rn, 0, lt); | 
| +        break; | 
| +      case NEON_CMLE_zero_scalar: | 
| +        cmp(vf, rd, rn, 0, le); | 
| +        break; | 
| +      case NEON_ABS_scalar: | 
| +        abs(vf, rd, rn); | 
| +        break; | 
| +      case NEON_SQABS_scalar: | 
| +        abs(vf, rd, rn).SignedSaturate(vf); | 
| +        break; | 
| +      case NEON_NEG_scalar: | 
| +        neg(vf, rd, rn); | 
| +        break; | 
| +      case NEON_SQNEG_scalar: | 
| +        neg(vf, rd, rn).SignedSaturate(vf); | 
| +        break; | 
| +      case NEON_SUQADD_scalar: | 
| +        suqadd(vf, rd, rn); | 
| +        break; | 
| +      case NEON_USQADD_scalar: | 
| +        usqadd(vf, rd, rn); | 
| +        break; | 
| +      default: | 
| +        UNIMPLEMENTED(); | 
| +        break; | 
| +    } | 
| +  } else { | 
| +    VectorFormat fpf = nfd.GetVectorFormat(nfd.FPScalarFormatMap()); | 
| +    FPRounding fpcr_rounding = static_cast<FPRounding>(fpcr().RMode()); | 
| + | 
| +    // These instructions all use a one bit size field, except SQXTUN, SQXTN | 
| +    // and UQXTN, which use a two bit size field. | 
| +    switch (instr->Mask(NEONScalar2RegMiscFPMask)) { | 
| +      case NEON_FRECPE_scalar: | 
| +        frecpe(fpf, rd, rn, fpcr_rounding); | 
| +        break; | 
| +      case NEON_FRECPX_scalar: | 
| +        frecpx(fpf, rd, rn); | 
| +        break; | 
| +      case NEON_FRSQRTE_scalar: | 
| +        frsqrte(fpf, rd, rn); | 
| +        break; | 
| +      case NEON_FCMGT_zero_scalar: | 
| +        fcmp_zero(fpf, rd, rn, gt); | 
| +        break; | 
| +      case NEON_FCMGE_zero_scalar: | 
| +        fcmp_zero(fpf, rd, rn, ge); | 
| +        break; | 
| +      case NEON_FCMEQ_zero_scalar: | 
| +        fcmp_zero(fpf, rd, rn, eq); | 
| +        break; | 
| +      case NEON_FCMLE_zero_scalar: | 
| +        fcmp_zero(fpf, rd, rn, le); | 
| +        break; | 
| +      case NEON_FCMLT_zero_scalar: | 
| +        fcmp_zero(fpf, rd, rn, lt); | 
| +        break; | 
| +      case NEON_SCVTF_scalar: | 
| +        scvtf(fpf, rd, rn, 0, fpcr_rounding); | 
| +        break; | 
| +      case NEON_UCVTF_scalar: | 
| +        ucvtf(fpf, rd, rn, 0, fpcr_rounding); | 
| +        break; | 
| +      case NEON_FCVTNS_scalar: | 
| +        fcvts(fpf, rd, rn, FPTieEven); | 
| +        break; | 
| +      case NEON_FCVTNU_scalar: | 
| +        fcvtu(fpf, rd, rn, FPTieEven); | 
| +        break; | 
| +      case NEON_FCVTPS_scalar: | 
| +        fcvts(fpf, rd, rn, FPPositiveInfinity); | 
| +        break; | 
| +      case NEON_FCVTPU_scalar: | 
| +        fcvtu(fpf, rd, rn, FPPositiveInfinity); | 
| +        break; | 
| +      case NEON_FCVTMS_scalar: | 
| +        fcvts(fpf, rd, rn, FPNegativeInfinity); | 
| +        break; | 
| +      case NEON_FCVTMU_scalar: | 
| +        fcvtu(fpf, rd, rn, FPNegativeInfinity); | 
| +        break; | 
| +      case NEON_FCVTZS_scalar: | 
| +        fcvts(fpf, rd, rn, FPZero); | 
| +        break; | 
| +      case NEON_FCVTZU_scalar: | 
| +        fcvtu(fpf, rd, rn, FPZero); | 
| +        break; | 
| +      case NEON_FCVTAS_scalar: | 
| +        fcvts(fpf, rd, rn, FPTieAway); | 
| +        break; | 
| +      case NEON_FCVTAU_scalar: | 
| +        fcvtu(fpf, rd, rn, FPTieAway); | 
| +        break; | 
| +      case NEON_FCVTXN_scalar: | 
| +        // Unlike all of the other FP instructions above, fcvtxn encodes dest | 
| +        // size S as size<0>=1. There's only one case, so we ignore the form. | 
| +        DCHECK_EQ(instr->Bit(22), 1); | 
| +        fcvtxn(kFormatS, rd, rn); | 
| +        break; | 
| +      default: | 
| +        switch (instr->Mask(NEONScalar2RegMiscMask)) { | 
| +          case NEON_SQXTN_scalar: | 
| +            sqxtn(vf, rd, rn); | 
| +            break; | 
| +          case NEON_UQXTN_scalar: | 
| +            uqxtn(vf, rd, rn); | 
| +            break; | 
| +          case NEON_SQXTUN_scalar: | 
| +            sqxtun(vf, rd, rn); | 
| +            break; | 
| +          default: | 
| +            UNIMPLEMENTED(); | 
| +        } | 
| +    } | 
| +  } | 
| +} | 
| + | 
| +void Simulator::VisitNEONScalar3Diff(Instruction* instr) { | 
| +  NEONFormatDecoder nfd(instr, NEONFormatDecoder::LongScalarFormatMap()); | 
| +  VectorFormat vf = nfd.GetVectorFormat(); | 
| + | 
| +  SimVRegister& rd = vreg(instr->Rd()); | 
| +  SimVRegister& rn = vreg(instr->Rn()); | 
| +  SimVRegister& rm = vreg(instr->Rm()); | 
| +  switch (instr->Mask(NEONScalar3DiffMask)) { | 
| +    case NEON_SQDMLAL_scalar: | 
| +      sqdmlal(vf, rd, rn, rm); | 
| +      break; | 
| +    case NEON_SQDMLSL_scalar: | 
| +      sqdmlsl(vf, rd, rn, rm); | 
| +      break; | 
| +    case NEON_SQDMULL_scalar: | 
| +      sqdmull(vf, rd, rn, rm); | 
| +      break; | 
| +    default: | 
| +      UNIMPLEMENTED(); | 
| +  } | 
| +} | 
| + | 
| +void Simulator::VisitNEONScalar3Same(Instruction* instr) { | 
| +  NEONFormatDecoder nfd(instr, NEONFormatDecoder::ScalarFormatMap()); | 
| +  VectorFormat vf = nfd.GetVectorFormat(); | 
| + | 
| +  SimVRegister& rd = vreg(instr->Rd()); | 
| +  SimVRegister& rn = vreg(instr->Rn()); | 
| +  SimVRegister& rm = vreg(instr->Rm()); | 
| + | 
| +  if (instr->Mask(NEONScalar3SameFPFMask) == NEONScalar3SameFPFixed) { | 
| +    vf = nfd.GetVectorFormat(nfd.FPScalarFormatMap()); | 
| +    switch (instr->Mask(NEONScalar3SameFPMask)) { | 
| +      case NEON_FMULX_scalar: | 
| +        fmulx(vf, rd, rn, rm); | 
| +        break; | 
| +      case NEON_FACGE_scalar: | 
| +        fabscmp(vf, rd, rn, rm, ge); | 
| +        break; | 
| +      case NEON_FACGT_scalar: | 
| +        fabscmp(vf, rd, rn, rm, gt); | 
| +        break; | 
| +      case NEON_FCMEQ_scalar: | 
| +        fcmp(vf, rd, rn, rm, eq); | 
| +        break; | 
| +      case NEON_FCMGE_scalar: | 
| +        fcmp(vf, rd, rn, rm, ge); | 
| +        break; | 
| +      case NEON_FCMGT_scalar: | 
| +        fcmp(vf, rd, rn, rm, gt); | 
| +        break; | 
| +      case NEON_FRECPS_scalar: | 
| +        frecps(vf, rd, rn, rm); | 
| +        break; | 
| +      case NEON_FRSQRTS_scalar: | 
| +        frsqrts(vf, rd, rn, rm); | 
| +        break; | 
| +      case NEON_FABD_scalar: | 
| +        fabd(vf, rd, rn, rm); | 
| +        break; | 
| +      default: | 
| +        UNIMPLEMENTED(); | 
| +    } | 
| +  } else { | 
| +    switch (instr->Mask(NEONScalar3SameMask)) { | 
| +      case NEON_ADD_scalar: | 
| +        add(vf, rd, rn, rm); | 
| +        break; | 
| +      case NEON_SUB_scalar: | 
| +        sub(vf, rd, rn, rm); | 
| +        break; | 
| +      case NEON_CMEQ_scalar: | 
| +        cmp(vf, rd, rn, rm, eq); | 
| +        break; | 
| +      case NEON_CMGE_scalar: | 
| +        cmp(vf, rd, rn, rm, ge); | 
| +        break; | 
| +      case NEON_CMGT_scalar: | 
| +        cmp(vf, rd, rn, rm, gt); | 
| +        break; | 
| +      case NEON_CMHI_scalar: | 
| +        cmp(vf, rd, rn, rm, hi); | 
| +        break; | 
| +      case NEON_CMHS_scalar: | 
| +        cmp(vf, rd, rn, rm, hs); | 
| +        break; | 
| +      case NEON_CMTST_scalar: | 
| +        cmptst(vf, rd, rn, rm); | 
| +        break; | 
| +      case NEON_USHL_scalar: | 
| +        ushl(vf, rd, rn, rm); | 
| +        break; | 
| +      case NEON_SSHL_scalar: | 
| +        sshl(vf, rd, rn, rm); | 
| +        break; | 
| +      case NEON_SQDMULH_scalar: | 
| +        sqdmulh(vf, rd, rn, rm); | 
| +        break; | 
| +      case NEON_SQRDMULH_scalar: | 
| +        sqrdmulh(vf, rd, rn, rm); | 
| +        break; | 
| +      case NEON_UQADD_scalar: | 
| +        add(vf, rd, rn, rm).UnsignedSaturate(vf); | 
| +        break; | 
| +      case NEON_SQADD_scalar: | 
| +        add(vf, rd, rn, rm).SignedSaturate(vf); | 
| +        break; | 
| +      case NEON_UQSUB_scalar: | 
| +        sub(vf, rd, rn, rm).UnsignedSaturate(vf); | 
| +        break; | 
| +      case NEON_SQSUB_scalar: | 
| +        sub(vf, rd, rn, rm).SignedSaturate(vf); | 
| +        break; | 
| +      case NEON_UQSHL_scalar: | 
| +        ushl(vf, rd, rn, rm).UnsignedSaturate(vf); | 
| +        break; | 
| +      case NEON_SQSHL_scalar: | 
| +        sshl(vf, rd, rn, rm).SignedSaturate(vf); | 
| +        break; | 
| +      case NEON_URSHL_scalar: | 
| +        ushl(vf, rd, rn, rm).Round(vf); | 
| +        break; | 
| +      case NEON_SRSHL_scalar: | 
| +        sshl(vf, rd, rn, rm).Round(vf); | 
| +        break; | 
| +      case NEON_UQRSHL_scalar: | 
| +        ushl(vf, rd, rn, rm).Round(vf).UnsignedSaturate(vf); | 
| +        break; | 
| +      case NEON_SQRSHL_scalar: | 
| +        sshl(vf, rd, rn, rm).Round(vf).SignedSaturate(vf); | 
| +        break; | 
| +      default: | 
| +        UNIMPLEMENTED(); | 
| +    } | 
| +  } | 
| +} | 
| + | 
| +void Simulator::VisitNEONScalarByIndexedElement(Instruction* instr) { | 
| +  NEONFormatDecoder nfd(instr, NEONFormatDecoder::LongScalarFormatMap()); | 
| +  VectorFormat vf = nfd.GetVectorFormat(); | 
| +  VectorFormat vf_r = nfd.GetVectorFormat(nfd.ScalarFormatMap()); | 
| + | 
| +  SimVRegister& rd = vreg(instr->Rd()); | 
| +  SimVRegister& rn = vreg(instr->Rn()); | 
| +  ByElementOp Op = NULL; | 
| + | 
| +  int rm_reg = instr->Rm(); | 
| +  int index = (instr->NEONH() << 1) | instr->NEONL(); | 
| +  if (instr->NEONSize() == 1) { | 
| +    rm_reg &= 0xf; | 
| +    index = (index << 1) | instr->NEONM(); | 
| +  } | 
| + | 
| +  switch (instr->Mask(NEONScalarByIndexedElementMask)) { | 
| +    case NEON_SQDMULL_byelement_scalar: | 
| +      Op = &Simulator::sqdmull; | 
| +      break; | 
| +    case NEON_SQDMLAL_byelement_scalar: | 
| +      Op = &Simulator::sqdmlal; | 
| +      break; | 
| +    case NEON_SQDMLSL_byelement_scalar: | 
| +      Op = &Simulator::sqdmlsl; | 
| +      break; | 
| +    case NEON_SQDMULH_byelement_scalar: | 
| +      Op = &Simulator::sqdmulh; | 
| +      vf = vf_r; | 
| +      break; | 
| +    case NEON_SQRDMULH_byelement_scalar: | 
| +      Op = &Simulator::sqrdmulh; | 
| +      vf = vf_r; | 
| +      break; | 
| +    default: | 
| +      vf = nfd.GetVectorFormat(nfd.FPScalarFormatMap()); | 
| +      index = instr->NEONH(); | 
| +      if ((instr->FPType() & 1) == 0) { | 
| +        index = (index << 1) | instr->NEONL(); | 
| +      } | 
| +      switch (instr->Mask(NEONScalarByIndexedElementFPMask)) { | 
| +        case NEON_FMUL_byelement_scalar: | 
| +          Op = &Simulator::fmul; | 
| +          break; | 
| +        case NEON_FMLA_byelement_scalar: | 
| +          Op = &Simulator::fmla; | 
| +          break; | 
| +        case NEON_FMLS_byelement_scalar: | 
| +          Op = &Simulator::fmls; | 
| +          break; | 
| +        case NEON_FMULX_byelement_scalar: | 
| +          Op = &Simulator::fmulx; | 
| +          break; | 
| +        default: | 
| +          UNIMPLEMENTED(); | 
| +      } | 
| +  } | 
| + | 
| +  (this->*Op)(vf, rd, rn, vreg(rm_reg), index); | 
| +} | 
| + | 
| +void Simulator::VisitNEONScalarCopy(Instruction* instr) { | 
| +  NEONFormatDecoder nfd(instr, NEONFormatDecoder::TriangularScalarFormatMap()); | 
| +  VectorFormat vf = nfd.GetVectorFormat(); | 
| + | 
| +  SimVRegister& rd = vreg(instr->Rd()); | 
| +  SimVRegister& rn = vreg(instr->Rn()); | 
| + | 
| +  if (instr->Mask(NEONScalarCopyMask) == NEON_DUP_ELEMENT_scalar) { | 
| +    int imm5 = instr->ImmNEON5(); | 
| +    int lsb = LowestSetBitPosition(imm5); | 
| +    int rn_index = imm5 >> lsb; | 
| +    dup_element(vf, rd, rn, rn_index); | 
| +  } else { | 
| +    UNIMPLEMENTED(); | 
| +  } | 
| +} | 
| + | 
| +void Simulator::VisitNEONScalarPairwise(Instruction* instr) { | 
| +  NEONFormatDecoder nfd(instr, NEONFormatDecoder::FPScalarFormatMap()); | 
| +  VectorFormat vf = nfd.GetVectorFormat(); | 
| + | 
| +  SimVRegister& rd = vreg(instr->Rd()); | 
| +  SimVRegister& rn = vreg(instr->Rn()); | 
| +  switch (instr->Mask(NEONScalarPairwiseMask)) { | 
| +    case NEON_ADDP_scalar: | 
| +      addp(vf, rd, rn); | 
| +      break; | 
| +    case NEON_FADDP_scalar: | 
| +      faddp(vf, rd, rn); | 
| +      break; | 
| +    case NEON_FMAXP_scalar: | 
| +      fmaxp(vf, rd, rn); | 
| +      break; | 
| +    case NEON_FMAXNMP_scalar: | 
| +      fmaxnmp(vf, rd, rn); | 
| +      break; | 
| +    case NEON_FMINP_scalar: | 
| +      fminp(vf, rd, rn); | 
| +      break; | 
| +    case NEON_FMINNMP_scalar: | 
| +      fminnmp(vf, rd, rn); | 
| +      break; | 
| +    default: | 
| +      UNIMPLEMENTED(); | 
| +  } | 
| +} | 
| + | 
| +void Simulator::VisitNEONScalarShiftImmediate(Instruction* instr) { | 
| +  SimVRegister& rd = vreg(instr->Rd()); | 
| +  SimVRegister& rn = vreg(instr->Rn()); | 
| +  FPRounding fpcr_rounding = static_cast<FPRounding>(fpcr().RMode()); | 
| + | 
| +  static const NEONFormatMap map = { | 
| +      {22, 21, 20, 19}, | 
| +      {NF_UNDEF, NF_B, NF_H, NF_H, NF_S, NF_S, NF_S, NF_S, NF_D, NF_D, NF_D, | 
| +       NF_D, NF_D, NF_D, NF_D, NF_D}}; | 
| +  NEONFormatDecoder nfd(instr, &map); | 
| +  VectorFormat vf = nfd.GetVectorFormat(); | 
| + | 
| +  int highestSetBit = HighestSetBitPosition(instr->ImmNEONImmh()); | 
| +  int immhimmb = instr->ImmNEONImmhImmb(); | 
| +  int right_shift = (16 << highestSetBit) - immhimmb; | 
| +  int left_shift = immhimmb - (8 << highestSetBit); | 
| +  switch (instr->Mask(NEONScalarShiftImmediateMask)) { | 
| +    case NEON_SHL_scalar: | 
| +      shl(vf, rd, rn, left_shift); | 
| +      break; | 
| +    case NEON_SLI_scalar: | 
| +      sli(vf, rd, rn, left_shift); | 
| +      break; | 
| +    case NEON_SQSHL_imm_scalar: | 
| +      sqshl(vf, rd, rn, left_shift); | 
| +      break; | 
| +    case NEON_UQSHL_imm_scalar: | 
| +      uqshl(vf, rd, rn, left_shift); | 
| +      break; | 
| +    case NEON_SQSHLU_scalar: | 
| +      sqshlu(vf, rd, rn, left_shift); | 
| +      break; | 
| +    case NEON_SRI_scalar: | 
| +      sri(vf, rd, rn, right_shift); | 
| +      break; | 
| +    case NEON_SSHR_scalar: | 
| +      sshr(vf, rd, rn, right_shift); | 
| +      break; | 
| +    case NEON_USHR_scalar: | 
| +      ushr(vf, rd, rn, right_shift); | 
| +      break; | 
| +    case NEON_SRSHR_scalar: | 
| +      sshr(vf, rd, rn, right_shift).Round(vf); | 
| +      break; | 
| +    case NEON_URSHR_scalar: | 
| +      ushr(vf, rd, rn, right_shift).Round(vf); | 
| +      break; | 
| +    case NEON_SSRA_scalar: | 
| +      ssra(vf, rd, rn, right_shift); | 
| +      break; | 
| +    case NEON_USRA_scalar: | 
| +      usra(vf, rd, rn, right_shift); | 
| +      break; | 
| +    case NEON_SRSRA_scalar: | 
| +      srsra(vf, rd, rn, right_shift); | 
| +      break; | 
| +    case NEON_URSRA_scalar: | 
| +      ursra(vf, rd, rn, right_shift); | 
| +      break; | 
| +    case NEON_UQSHRN_scalar: | 
| +      uqshrn(vf, rd, rn, right_shift); | 
| +      break; | 
| +    case NEON_UQRSHRN_scalar: | 
| +      uqrshrn(vf, rd, rn, right_shift); | 
| +      break; | 
| +    case NEON_SQSHRN_scalar: | 
| +      sqshrn(vf, rd, rn, right_shift); | 
| +      break; | 
| +    case NEON_SQRSHRN_scalar: | 
| +      sqrshrn(vf, rd, rn, right_shift); | 
| +      break; | 
| +    case NEON_SQSHRUN_scalar: | 
| +      sqshrun(vf, rd, rn, right_shift); | 
| +      break; | 
| +    case NEON_SQRSHRUN_scalar: | 
| +      sqrshrun(vf, rd, rn, right_shift); | 
| +      break; | 
| +    case NEON_FCVTZS_imm_scalar: | 
| +      fcvts(vf, rd, rn, FPZero, right_shift); | 
| +      break; | 
| +    case NEON_FCVTZU_imm_scalar: | 
| +      fcvtu(vf, rd, rn, FPZero, right_shift); | 
| +      break; | 
| +    case NEON_SCVTF_imm_scalar: | 
| +      scvtf(vf, rd, rn, right_shift, fpcr_rounding); | 
| +      break; | 
| +    case NEON_UCVTF_imm_scalar: | 
| +      ucvtf(vf, rd, rn, right_shift, fpcr_rounding); | 
| +      break; | 
| +    default: | 
| +      UNIMPLEMENTED(); | 
| +  } | 
| +} | 
| + | 
| +void Simulator::VisitNEONShiftImmediate(Instruction* instr) { | 
| +  SimVRegister& rd = vreg(instr->Rd()); | 
| +  SimVRegister& rn = vreg(instr->Rn()); | 
| +  FPRounding fpcr_rounding = static_cast<FPRounding>(fpcr().RMode()); | 
| + | 
| +  // 00010->8B, 00011->16B, 001x0->4H, 001x1->8H, | 
| +  // 01xx0->2S, 01xx1->4S, 1xxx1->2D, all others undefined. | 
| +  static const NEONFormatMap map = { | 
| +      {22, 21, 20, 19, 30}, | 
| +      {NF_UNDEF, NF_UNDEF, NF_8B,    NF_16B, NF_4H,    NF_8H, NF_4H,    NF_8H, | 
| +       NF_2S,    NF_4S,    NF_2S,    NF_4S,  NF_2S,    NF_4S, NF_2S,    NF_4S, | 
| +       NF_UNDEF, NF_2D,    NF_UNDEF, NF_2D,  NF_UNDEF, NF_2D, NF_UNDEF, NF_2D, | 
| +       NF_UNDEF, NF_2D,    NF_UNDEF, NF_2D,  NF_UNDEF, NF_2D, NF_UNDEF, NF_2D}}; | 
| +  NEONFormatDecoder nfd(instr, &map); | 
| +  VectorFormat vf = nfd.GetVectorFormat(); | 
| + | 
| +  // 0001->8H, 001x->4S, 01xx->2D, all others undefined. | 
| +  static const NEONFormatMap map_l = { | 
| +      {22, 21, 20, 19}, | 
| +      {NF_UNDEF, NF_8H, NF_4S, NF_4S, NF_2D, NF_2D, NF_2D, NF_2D}}; | 
| +  VectorFormat vf_l = nfd.GetVectorFormat(&map_l); | 
| + | 
| +  int highestSetBit = HighestSetBitPosition(instr->ImmNEONImmh()); | 
| +  int immhimmb = instr->ImmNEONImmhImmb(); | 
| +  int right_shift = (16 << highestSetBit) - immhimmb; | 
| +  int left_shift = immhimmb - (8 << highestSetBit); | 
| + | 
| +  switch (instr->Mask(NEONShiftImmediateMask)) { | 
| +    case NEON_SHL: | 
| +      shl(vf, rd, rn, left_shift); | 
| +      break; | 
| +    case NEON_SLI: | 
| +      sli(vf, rd, rn, left_shift); | 
| +      break; | 
| +    case NEON_SQSHLU: | 
| +      sqshlu(vf, rd, rn, left_shift); | 
| +      break; | 
| +    case NEON_SRI: | 
| +      sri(vf, rd, rn, right_shift); | 
| +      break; | 
| +    case NEON_SSHR: | 
| +      sshr(vf, rd, rn, right_shift); | 
| +      break; | 
| +    case NEON_USHR: | 
| +      ushr(vf, rd, rn, right_shift); | 
| +      break; | 
| +    case NEON_SRSHR: | 
| +      sshr(vf, rd, rn, right_shift).Round(vf); | 
| +      break; | 
| +    case NEON_URSHR: | 
| +      ushr(vf, rd, rn, right_shift).Round(vf); | 
| +      break; | 
| +    case NEON_SSRA: | 
| +      ssra(vf, rd, rn, right_shift); | 
| +      break; | 
| +    case NEON_USRA: | 
| +      usra(vf, rd, rn, right_shift); | 
| +      break; | 
| +    case NEON_SRSRA: | 
| +      srsra(vf, rd, rn, right_shift); | 
| +      break; | 
| +    case NEON_URSRA: | 
| +      ursra(vf, rd, rn, right_shift); | 
| +      break; | 
| +    case NEON_SQSHL_imm: | 
| +      sqshl(vf, rd, rn, left_shift); | 
| +      break; | 
| +    case NEON_UQSHL_imm: | 
| +      uqshl(vf, rd, rn, left_shift); | 
| +      break; | 
| +    case NEON_SCVTF_imm: | 
| +      scvtf(vf, rd, rn, right_shift, fpcr_rounding); | 
| +      break; | 
| +    case NEON_UCVTF_imm: | 
| +      ucvtf(vf, rd, rn, right_shift, fpcr_rounding); | 
| +      break; | 
| +    case NEON_FCVTZS_imm: | 
| +      fcvts(vf, rd, rn, FPZero, right_shift); | 
| +      break; | 
| +    case NEON_FCVTZU_imm: | 
| +      fcvtu(vf, rd, rn, FPZero, right_shift); | 
| +      break; | 
| +    case NEON_SSHLL: | 
| +      vf = vf_l; | 
| +      if (instr->Mask(NEON_Q)) { | 
| +        sshll2(vf, rd, rn, left_shift); | 
| +      } else { | 
| +        sshll(vf, rd, rn, left_shift); | 
| +      } | 
| +      break; | 
| +    case NEON_USHLL: | 
| +      vf = vf_l; | 
| +      if (instr->Mask(NEON_Q)) { | 
| +        ushll2(vf, rd, rn, left_shift); | 
| +      } else { | 
| +        ushll(vf, rd, rn, left_shift); | 
| +      } | 
| +      break; | 
| +    case NEON_SHRN: | 
| +      if (instr->Mask(NEON_Q)) { | 
| +        shrn2(vf, rd, rn, right_shift); | 
| +      } else { | 
| +        shrn(vf, rd, rn, right_shift); | 
| +      } | 
| +      break; | 
| +    case NEON_RSHRN: | 
| +      if (instr->Mask(NEON_Q)) { | 
| +        rshrn2(vf, rd, rn, right_shift); | 
| +      } else { | 
| +        rshrn(vf, rd, rn, right_shift); | 
| +      } | 
| +      break; | 
| +    case NEON_UQSHRN: | 
| +      if (instr->Mask(NEON_Q)) { | 
| +        uqshrn2(vf, rd, rn, right_shift); | 
| +      } else { | 
| +        uqshrn(vf, rd, rn, right_shift); | 
| +      } | 
| +      break; | 
| +    case NEON_UQRSHRN: | 
| +      if (instr->Mask(NEON_Q)) { | 
| +        uqrshrn2(vf, rd, rn, right_shift); | 
| +      } else { | 
| +        uqrshrn(vf, rd, rn, right_shift); | 
| +      } | 
| +      break; | 
| +    case NEON_SQSHRN: | 
| +      if (instr->Mask(NEON_Q)) { | 
| +        sqshrn2(vf, rd, rn, right_shift); | 
| +      } else { | 
| +        sqshrn(vf, rd, rn, right_shift); | 
| +      } | 
| +      break; | 
| +    case NEON_SQRSHRN: | 
| +      if (instr->Mask(NEON_Q)) { | 
| +        sqrshrn2(vf, rd, rn, right_shift); | 
| +      } else { | 
| +        sqrshrn(vf, rd, rn, right_shift); | 
| +      } | 
| +      break; | 
| +    case NEON_SQSHRUN: | 
| +      if (instr->Mask(NEON_Q)) { | 
| +        sqshrun2(vf, rd, rn, right_shift); | 
| +      } else { | 
| +        sqshrun(vf, rd, rn, right_shift); | 
| +      } | 
| +      break; | 
| +    case NEON_SQRSHRUN: | 
| +      if (instr->Mask(NEON_Q)) { | 
| +        sqrshrun2(vf, rd, rn, right_shift); | 
| +      } else { | 
| +        sqrshrun(vf, rd, rn, right_shift); | 
| +      } | 
| +      break; | 
| +    default: | 
| +      UNIMPLEMENTED(); | 
| +  } | 
| +} | 
| + | 
| +void Simulator::VisitNEONTable(Instruction* instr) { | 
| +  NEONFormatDecoder nfd(instr, NEONFormatDecoder::LogicalFormatMap()); | 
| +  VectorFormat vf = nfd.GetVectorFormat(); | 
| + | 
| +  SimVRegister& rd = vreg(instr->Rd()); | 
| +  SimVRegister& rn = vreg(instr->Rn()); | 
| +  SimVRegister& rn2 = vreg((instr->Rn() + 1) % kNumberOfVRegisters); | 
| +  SimVRegister& rn3 = vreg((instr->Rn() + 2) % kNumberOfVRegisters); | 
| +  SimVRegister& rn4 = vreg((instr->Rn() + 3) % kNumberOfVRegisters); | 
| +  SimVRegister& rm = vreg(instr->Rm()); | 
| + | 
| +  switch (instr->Mask(NEONTableMask)) { | 
| +    case NEON_TBL_1v: | 
| +      tbl(vf, rd, rn, rm); | 
| +      break; | 
| +    case NEON_TBL_2v: | 
| +      tbl(vf, rd, rn, rn2, rm); | 
| +      break; | 
| +    case NEON_TBL_3v: | 
| +      tbl(vf, rd, rn, rn2, rn3, rm); | 
| +      break; | 
| +    case NEON_TBL_4v: | 
| +      tbl(vf, rd, rn, rn2, rn3, rn4, rm); | 
| +      break; | 
| +    case NEON_TBX_1v: | 
| +      tbx(vf, rd, rn, rm); | 
| +      break; | 
| +    case NEON_TBX_2v: | 
| +      tbx(vf, rd, rn, rn2, rm); | 
| +      break; | 
| +    case NEON_TBX_3v: | 
| +      tbx(vf, rd, rn, rn2, rn3, rm); | 
| +      break; | 
| +    case NEON_TBX_4v: | 
| +      tbx(vf, rd, rn, rn2, rn3, rn4, rm); | 
| +      break; | 
| +    default: | 
| +      UNIMPLEMENTED(); | 
| +  } | 
| +} | 
| + | 
| +void Simulator::VisitNEONPerm(Instruction* instr) { | 
| +  NEONFormatDecoder nfd(instr); | 
| +  VectorFormat vf = nfd.GetVectorFormat(); | 
| + | 
| +  SimVRegister& rd = vreg(instr->Rd()); | 
| +  SimVRegister& rn = vreg(instr->Rn()); | 
| +  SimVRegister& rm = vreg(instr->Rm()); | 
| + | 
| +  switch (instr->Mask(NEONPermMask)) { | 
| +    case NEON_TRN1: | 
| +      trn1(vf, rd, rn, rm); | 
| +      break; | 
| +    case NEON_TRN2: | 
| +      trn2(vf, rd, rn, rm); | 
| +      break; | 
| +    case NEON_UZP1: | 
| +      uzp1(vf, rd, rn, rm); | 
| +      break; | 
| +    case NEON_UZP2: | 
| +      uzp2(vf, rd, rn, rm); | 
| +      break; | 
| +    case NEON_ZIP1: | 
| +      zip1(vf, rd, rn, rm); | 
| +      break; | 
| +    case NEON_ZIP2: | 
| +      zip2(vf, rd, rn, rm); | 
| +      break; | 
| +    default: | 
| +      UNIMPLEMENTED(); | 
| +  } | 
| +} | 
|  | 
| void Simulator::DoPrintf(Instruction* instr) { | 
| DCHECK((instr->Mask(ExceptionMask) == HLT) && | 
|  |