| Index: src/arm/simulator-arm.cc
 | 
| diff --git a/src/arm/simulator-arm.cc b/src/arm/simulator-arm.cc
 | 
| index ad7c4daae19d9323935de5f2adeca98dcea13292..3a3a90225bf0af1fa7ba72f454fca9fd8487eb42 100644
 | 
| --- a/src/arm/simulator-arm.cc
 | 
| +++ b/src/arm/simulator-arm.cc
 | 
| @@ -3988,61 +3988,177 @@ void Simulator::DecodeType6CoprocessorIns(Instruction* instr) {
 | 
|    }
 | 
|  }
 | 
|  
 | 
| +// Templated operations for NEON instructions.
 | 
| +// TODO(bbudge) Add more templates for use in DecodeSpecialCondition.
 | 
| +template <typename T>
 | 
| +int64_t Widen(T value) {
 | 
| +  static_assert(sizeof(int64_t) > sizeof(T), "T must be int32_t or smaller");
 | 
| +  return static_cast<int64_t>(value);
 | 
| +}
 | 
| +
 | 
| +template <typename T>
 | 
| +T Clamp(int64_t value) {
 | 
| +  static_assert(sizeof(int64_t) > sizeof(T), "T must be int32_t or smaller");
 | 
| +  int64_t min = static_cast<int64_t>(std::numeric_limits<T>::min());
 | 
| +  int64_t max = static_cast<int64_t>(std::numeric_limits<T>::max());
 | 
| +  int64_t clamped = std::max(min, std::min(max, value));
 | 
| +  return static_cast<T>(clamped);
 | 
| +}
 | 
| +
 | 
| +template <typename T>
 | 
| +void AddSaturate(Simulator* simulator, int Vd, int Vm, int Vn) {
 | 
| +  static const int kLanes = 16 / sizeof(T);
 | 
| +  T src1[kLanes], src2[kLanes];
 | 
| +  simulator->get_q_register(Vn, src1);
 | 
| +  simulator->get_q_register(Vm, src2);
 | 
| +  for (int i = 0; i < kLanes; i++) {
 | 
| +    src1[i] = Clamp<T>(Widen(src1[i]) + Widen(src2[i]));
 | 
| +  }
 | 
| +  simulator->set_q_register(Vd, src1);
 | 
| +}
 | 
| +
 | 
| +template <typename T>
 | 
| +void SubSaturate(Simulator* simulator, int Vd, int Vm, int Vn) {
 | 
| +  static const int kLanes = 16 / sizeof(T);
 | 
| +  T src1[kLanes], src2[kLanes];
 | 
| +  simulator->get_q_register(Vn, src1);
 | 
| +  simulator->get_q_register(Vm, src2);
 | 
| +  for (int i = 0; i < kLanes; i++) {
 | 
| +    src1[i] = Clamp<T>(Widen(src1[i]) - Widen(src2[i]));
 | 
| +  }
 | 
| +  simulator->set_q_register(Vd, src1);
 | 
| +}
 | 
| +
 | 
|  void Simulator::DecodeSpecialCondition(Instruction* instr) {
 | 
|    switch (instr->SpecialValue()) {
 | 
| -    case 4:
 | 
| -      if (instr->Bits(11, 8) == 1 && instr->Bits(21, 20) == 2 &&
 | 
| -          instr->Bit(4) == 1) {
 | 
| -        // vmov Qd, Qm.
 | 
| -        // vorr, Qd, Qm, Qn.
 | 
| -        int Vd = instr->VFPDRegValue(kSimd128Precision);
 | 
| -        int Vm = instr->VFPMRegValue(kSimd128Precision);
 | 
| -        int Vn = instr->VFPNRegValue(kSimd128Precision);
 | 
| -        uint32_t src1[4];
 | 
| -        get_q_register(Vm, src1);
 | 
| -        if (Vm != Vn) {
 | 
| -          uint32_t src2[4];
 | 
| -          get_q_register(Vn, src2);
 | 
| -          for (int i = 0; i < 4; i++) {
 | 
| -            src1[i] = src1[i] | src2[i];
 | 
| +    case 4: {
 | 
| +      int Vd, Vm, Vn;
 | 
| +      if (instr->Bit(6) == 0) {
 | 
| +        Vd = instr->VFPDRegValue(kDoublePrecision);
 | 
| +        Vm = instr->VFPMRegValue(kDoublePrecision);
 | 
| +        Vn = instr->VFPNRegValue(kDoublePrecision);
 | 
| +      } else {
 | 
| +        Vd = instr->VFPDRegValue(kSimd128Precision);
 | 
| +        Vm = instr->VFPMRegValue(kSimd128Precision);
 | 
| +        Vn = instr->VFPNRegValue(kSimd128Precision);
 | 
| +      }
 | 
| +      switch (instr->Bits(11, 8)) {
 | 
| +        case 0x0: {
 | 
| +          if (instr->Bit(4) == 1) {
 | 
| +            // vqadd.s<size> Qd, Qm, Qn.
 | 
| +            NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20));
 | 
| +            switch (size) {
 | 
| +              case Neon8:
 | 
| +                AddSaturate<int8_t>(this, Vd, Vm, Vn);
 | 
| +                break;
 | 
| +              case Neon16:
 | 
| +                AddSaturate<int16_t>(this, Vd, Vm, Vn);
 | 
| +                break;
 | 
| +              case Neon32:
 | 
| +                AddSaturate<int32_t>(this, Vd, Vm, Vn);
 | 
| +                break;
 | 
| +              default:
 | 
| +                UNREACHABLE();
 | 
| +                break;
 | 
| +            }
 | 
| +          } else {
 | 
| +            UNIMPLEMENTED();
 | 
|            }
 | 
| +          break;
 | 
|          }
 | 
| -        set_q_register(Vd, src1);
 | 
| -      } else if (instr->Bits(11, 8) == 8) {
 | 
| -        // vadd/vtst
 | 
| -        NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20));
 | 
| -        int Vd = instr->VFPDRegValue(kSimd128Precision);
 | 
| -        int Vm = instr->VFPMRegValue(kSimd128Precision);
 | 
| -        int Vn = instr->VFPNRegValue(kSimd128Precision);
 | 
| -        if (instr->Bit(4) == 0) {
 | 
| -          // vadd.i<size> Qd, Qm, Qn.
 | 
| +        case 0x1: {
 | 
| +          if (instr->Bits(21, 20) == 2 && instr->Bit(6) == 1 &&
 | 
| +              instr->Bit(4) == 1) {
 | 
| +            // vmov Qd, Qm.
 | 
| +            // vorr, Qd, Qm, Qn.
 | 
| +            uint32_t src1[4];
 | 
| +            get_q_register(Vm, src1);
 | 
| +            if (Vm != Vn) {
 | 
| +              uint32_t src2[4];
 | 
| +              get_q_register(Vn, src2);
 | 
| +              for (int i = 0; i < 4; i++) {
 | 
| +                src1[i] = src1[i] | src2[i];
 | 
| +              }
 | 
| +            }
 | 
| +            set_q_register(Vd, src1);
 | 
| +          } else if (instr->Bits(21, 20) == 0 && instr->Bit(6) == 1 &&
 | 
| +                     instr->Bit(4) == 1) {
 | 
| +            // vand Qd, Qm, Qn.
 | 
| +            uint32_t src1[4], src2[4];
 | 
| +            get_q_register(Vn, src1);
 | 
| +            get_q_register(Vm, src2);
 | 
| +            for (int i = 0; i < 4; i++) {
 | 
| +              src1[i] = src1[i] & src2[i];
 | 
| +            }
 | 
| +            set_q_register(Vd, src1);
 | 
| +          } else {
 | 
| +            UNIMPLEMENTED();
 | 
| +          }
 | 
| +          break;
 | 
| +        }
 | 
| +        case 0x2: {
 | 
| +          if (instr->Bit(4) == 1) {
 | 
| +            // vqsub.s<size> Qd, Qm, Qn.
 | 
| +            NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20));
 | 
| +            switch (size) {
 | 
| +              case Neon8:
 | 
| +                SubSaturate<int8_t>(this, Vd, Vm, Vn);
 | 
| +                break;
 | 
| +              case Neon16:
 | 
| +                SubSaturate<int16_t>(this, Vd, Vm, Vn);
 | 
| +                break;
 | 
| +              case Neon32:
 | 
| +                SubSaturate<int32_t>(this, Vd, Vm, Vn);
 | 
| +                break;
 | 
| +              default:
 | 
| +                UNREACHABLE();
 | 
| +                break;
 | 
| +            }
 | 
| +          } else {
 | 
| +            UNIMPLEMENTED();
 | 
| +          }
 | 
| +          break;
 | 
| +        }
 | 
| +        case 0x3: {
 | 
| +          // vcge/vcgt.s<size> Qd, Qm, Qn.
 | 
| +          bool ge = instr->Bit(4) == 1;
 | 
| +          NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20));
 | 
|            switch (size) {
 | 
|              case Neon8: {
 | 
| -              uint8_t src1[16], src2[16];
 | 
| +              int8_t src1[16], src2[16];
 | 
|                get_q_register(Vn, src1);
 | 
|                get_q_register(Vm, src2);
 | 
|                for (int i = 0; i < 16; i++) {
 | 
| -                src1[i] += src2[i];
 | 
| +                if (ge)
 | 
| +                  src1[i] = src1[i] >= src2[i] ? 0xFF : 0;
 | 
| +                else
 | 
| +                  src1[i] = src1[i] > src2[i] ? 0xFF : 0;
 | 
|                }
 | 
|                set_q_register(Vd, src1);
 | 
|                break;
 | 
|              }
 | 
|              case Neon16: {
 | 
| -              uint16_t src1[8], src2[8];
 | 
| +              int16_t src1[8], src2[8];
 | 
|                get_q_register(Vn, src1);
 | 
|                get_q_register(Vm, src2);
 | 
|                for (int i = 0; i < 8; i++) {
 | 
| -                src1[i] += src2[i];
 | 
| +                if (ge)
 | 
| +                  src1[i] = src1[i] >= src2[i] ? 0xFFFF : 0;
 | 
| +                else
 | 
| +                  src1[i] = src1[i] > src2[i] ? 0xFFFF : 0;
 | 
|                }
 | 
|                set_q_register(Vd, src1);
 | 
|                break;
 | 
|              }
 | 
|              case Neon32: {
 | 
| -              uint32_t src1[4], src2[4];
 | 
| +              int32_t src1[4], src2[4];
 | 
|                get_q_register(Vn, src1);
 | 
|                get_q_register(Vm, src2);
 | 
|                for (int i = 0; i < 4; i++) {
 | 
| -                src1[i] += src2[i];
 | 
| +                if (ge)
 | 
| +                  src1[i] = src1[i] >= src2[i] ? 0xFFFFFFFF : 0;
 | 
| +                else
 | 
| +                  src1[i] = src1[i] > src2[i] ? 0xFFFFFFFF : 0;
 | 
|                }
 | 
|                set_q_register(Vd, src1);
 | 
|                break;
 | 
| @@ -4051,35 +4167,48 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) {
 | 
|                UNREACHABLE();
 | 
|                break;
 | 
|            }
 | 
| -        } else {
 | 
| -          // vtst.i<size> Qd, Qm, Qn.
 | 
| +          break;
 | 
| +        }
 | 
| +        case 0x6: {
 | 
| +          // vmin/vmax.s<size> Qd, Qm, Qn.
 | 
| +          NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20));
 | 
| +          bool min = instr->Bit(4) != 0;
 | 
|            switch (size) {
 | 
|              case Neon8: {
 | 
| -              uint8_t src1[16], src2[16];
 | 
| +              int8_t src1[16], src2[16];
 | 
|                get_q_register(Vn, src1);
 | 
|                get_q_register(Vm, src2);
 | 
|                for (int i = 0; i < 16; i++) {
 | 
| -                src1[i] = (src1[i] & src2[i]) != 0 ? 0xFFu : 0;
 | 
| +                if (min)
 | 
| +                  src1[i] = std::min(src1[i], src2[i]);
 | 
| +                else
 | 
| +                  src1[i] = std::max(src1[i], src2[i]);
 | 
|                }
 | 
|                set_q_register(Vd, src1);
 | 
|                break;
 | 
|              }
 | 
|              case Neon16: {
 | 
| -              uint16_t src1[8], src2[8];
 | 
| +              int16_t src1[8], src2[8];
 | 
|                get_q_register(Vn, src1);
 | 
|                get_q_register(Vm, src2);
 | 
|                for (int i = 0; i < 8; i++) {
 | 
| -                src1[i] = (src1[i] & src2[i]) != 0 ? 0xFFFFu : 0;
 | 
| +                if (min)
 | 
| +                  src1[i] = std::min(src1[i], src2[i]);
 | 
| +                else
 | 
| +                  src1[i] = std::max(src1[i], src2[i]);
 | 
|                }
 | 
|                set_q_register(Vd, src1);
 | 
|                break;
 | 
|              }
 | 
|              case Neon32: {
 | 
| -              uint32_t src1[4], src2[4];
 | 
| +              int32_t src1[4], src2[4];
 | 
|                get_q_register(Vn, src1);
 | 
|                get_q_register(Vm, src2);
 | 
|                for (int i = 0; i < 4; i++) {
 | 
| -                src1[i] = (src1[i] & src2[i]) != 0 ? 0xFFFFFFFFu : 0;
 | 
| +                if (min)
 | 
| +                  src1[i] = std::min(src1[i], src2[i]);
 | 
| +                else
 | 
| +                  src1[i] = std::max(src1[i], src2[i]);
 | 
|                }
 | 
|                set_q_register(Vd, src1);
 | 
|                break;
 | 
| @@ -4088,234 +4217,210 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) {
 | 
|                UNREACHABLE();
 | 
|                break;
 | 
|            }
 | 
| +          break;
 | 
|          }
 | 
| -      } else if (instr->Bits(11, 8) == 0xd && instr->Bit(20) == 0 &&
 | 
| -                 instr->Bit(4) == 0) {
 | 
| -        int Vd = instr->VFPDRegValue(kSimd128Precision);
 | 
| -        int Vm = instr->VFPMRegValue(kSimd128Precision);
 | 
| -        int Vn = instr->VFPNRegValue(kSimd128Precision);
 | 
| -        float src1[4], src2[4];
 | 
| -        get_q_register(Vn, src1);
 | 
| -        get_q_register(Vm, src2);
 | 
| -        for (int i = 0; i < 4; i++) {
 | 
| -          if (instr->Bit(21) == 0) {
 | 
| -            // vadd.f32 Qd, Qm, Qn.
 | 
| -            src1[i] = src1[i] + src2[i];
 | 
| +        case 0x8: {
 | 
| +          // vadd/vtst
 | 
| +          NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20));
 | 
| +          if (instr->Bit(4) == 0) {
 | 
| +            // vadd.i<size> Qd, Qm, Qn.
 | 
| +            switch (size) {
 | 
| +              case Neon8: {
 | 
| +                uint8_t src1[16], src2[16];
 | 
| +                get_q_register(Vn, src1);
 | 
| +                get_q_register(Vm, src2);
 | 
| +                for (int i = 0; i < 16; i++) {
 | 
| +                  src1[i] += src2[i];
 | 
| +                }
 | 
| +                set_q_register(Vd, src1);
 | 
| +                break;
 | 
| +              }
 | 
| +              case Neon16: {
 | 
| +                uint16_t src1[8], src2[8];
 | 
| +                get_q_register(Vn, src1);
 | 
| +                get_q_register(Vm, src2);
 | 
| +                for (int i = 0; i < 8; i++) {
 | 
| +                  src1[i] += src2[i];
 | 
| +                }
 | 
| +                set_q_register(Vd, src1);
 | 
| +                break;
 | 
| +              }
 | 
| +              case Neon32: {
 | 
| +                uint32_t src1[4], src2[4];
 | 
| +                get_q_register(Vn, src1);
 | 
| +                get_q_register(Vm, src2);
 | 
| +                for (int i = 0; i < 4; i++) {
 | 
| +                  src1[i] += src2[i];
 | 
| +                }
 | 
| +                set_q_register(Vd, src1);
 | 
| +                break;
 | 
| +              }
 | 
| +              default:
 | 
| +                UNREACHABLE();
 | 
| +                break;
 | 
| +            }
 | 
|            } else {
 | 
| -            // vsub.f32 Qd, Qm, Qn.
 | 
| -            src1[i] = src1[i] - src2[i];
 | 
| -          }
 | 
| -        }
 | 
| -        set_q_register(Vd, src1);
 | 
| -      } else if (instr->Bits(11, 8) == 0x9 && instr->Bit(6) == 1 &&
 | 
| -                 instr->Bit(4) == 1) {
 | 
| -        // vmul.i<size> Qd, Qm, Qn.
 | 
| -        NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20));
 | 
| -        int Vd = instr->VFPDRegValue(kSimd128Precision);
 | 
| -        int Vm = instr->VFPMRegValue(kSimd128Precision);
 | 
| -        int Vn = instr->VFPNRegValue(kSimd128Precision);
 | 
| -        switch (size) {
 | 
| -          case Neon8: {
 | 
| -            uint8_t src1[16], src2[16];
 | 
| -            get_q_register(Vn, src1);
 | 
| -            get_q_register(Vm, src2);
 | 
| -            for (int i = 0; i < 16; i++) {
 | 
| -              src1[i] *= src2[i];
 | 
| +            // vtst.i<size> Qd, Qm, Qn.
 | 
| +            switch (size) {
 | 
| +              case Neon8: {
 | 
| +                uint8_t src1[16], src2[16];
 | 
| +                get_q_register(Vn, src1);
 | 
| +                get_q_register(Vm, src2);
 | 
| +                for (int i = 0; i < 16; i++) {
 | 
| +                  src1[i] = (src1[i] & src2[i]) != 0 ? 0xFFu : 0;
 | 
| +                }
 | 
| +                set_q_register(Vd, src1);
 | 
| +                break;
 | 
| +              }
 | 
| +              case Neon16: {
 | 
| +                uint16_t src1[8], src2[8];
 | 
| +                get_q_register(Vn, src1);
 | 
| +                get_q_register(Vm, src2);
 | 
| +                for (int i = 0; i < 8; i++) {
 | 
| +                  src1[i] = (src1[i] & src2[i]) != 0 ? 0xFFFFu : 0;
 | 
| +                }
 | 
| +                set_q_register(Vd, src1);
 | 
| +                break;
 | 
| +              }
 | 
| +              case Neon32: {
 | 
| +                uint32_t src1[4], src2[4];
 | 
| +                get_q_register(Vn, src1);
 | 
| +                get_q_register(Vm, src2);
 | 
| +                for (int i = 0; i < 4; i++) {
 | 
| +                  src1[i] = (src1[i] & src2[i]) != 0 ? 0xFFFFFFFFu : 0;
 | 
| +                }
 | 
| +                set_q_register(Vd, src1);
 | 
| +                break;
 | 
| +              }
 | 
| +              default:
 | 
| +                UNREACHABLE();
 | 
| +                break;
 | 
|              }
 | 
| -            set_q_register(Vd, src1);
 | 
| -            break;
 | 
|            }
 | 
| -          case Neon16: {
 | 
| -            uint16_t src1[8], src2[8];
 | 
| -            get_q_register(Vn, src1);
 | 
| -            get_q_register(Vm, src2);
 | 
| -            for (int i = 0; i < 8; i++) {
 | 
| -              src1[i] *= src2[i];
 | 
| +          break;
 | 
| +        }
 | 
| +        case 0x9: {
 | 
| +          if (instr->Bit(6) == 1 && instr->Bit(4) == 1) {
 | 
| +            // vmul.i<size> Qd, Qm, Qn.
 | 
| +            NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20));
 | 
| +            switch (size) {
 | 
| +              case Neon8: {
 | 
| +                uint8_t src1[16], src2[16];
 | 
| +                get_q_register(Vn, src1);
 | 
| +                get_q_register(Vm, src2);
 | 
| +                for (int i = 0; i < 16; i++) {
 | 
| +                  src1[i] *= src2[i];
 | 
| +                }
 | 
| +                set_q_register(Vd, src1);
 | 
| +                break;
 | 
| +              }
 | 
| +              case Neon16: {
 | 
| +                uint16_t src1[8], src2[8];
 | 
| +                get_q_register(Vn, src1);
 | 
| +                get_q_register(Vm, src2);
 | 
| +                for (int i = 0; i < 8; i++) {
 | 
| +                  src1[i] *= src2[i];
 | 
| +                }
 | 
| +                set_q_register(Vd, src1);
 | 
| +                break;
 | 
| +              }
 | 
| +              case Neon32: {
 | 
| +                uint32_t src1[4], src2[4];
 | 
| +                get_q_register(Vn, src1);
 | 
| +                get_q_register(Vm, src2);
 | 
| +                for (int i = 0; i < 4; i++) {
 | 
| +                  src1[i] *= src2[i];
 | 
| +                }
 | 
| +                set_q_register(Vd, src1);
 | 
| +                break;
 | 
| +              }
 | 
| +              default:
 | 
| +                UNREACHABLE();
 | 
| +                break;
 | 
|              }
 | 
| -            set_q_register(Vd, src1);
 | 
| -            break;
 | 
| +          } else {
 | 
| +            UNIMPLEMENTED();
 | 
|            }
 | 
| -          case Neon32: {
 | 
| -            uint32_t src1[4], src2[4];
 | 
| +          break;
 | 
| +        }
 | 
| +        case 0xd: {
 | 
| +          if (instr->Bit(4) == 0) {
 | 
| +            float src1[4], src2[4];
 | 
|              get_q_register(Vn, src1);
 | 
|              get_q_register(Vm, src2);
 | 
|              for (int i = 0; i < 4; i++) {
 | 
| -              src1[i] *= src2[i];
 | 
| +              if (instr->Bit(21) == 0) {
 | 
| +                // vadd.f32 Qd, Qm, Qn.
 | 
| +                src1[i] = src1[i] + src2[i];
 | 
| +              } else {
 | 
| +                // vsub.f32 Qd, Qm, Qn.
 | 
| +                src1[i] = src1[i] - src2[i];
 | 
| +              }
 | 
|              }
 | 
|              set_q_register(Vd, src1);
 | 
| -            break;
 | 
| -          }
 | 
| -          default:
 | 
| +          } else {
 | 
|              UNIMPLEMENTED();
 | 
| -            break;
 | 
| -        }
 | 
| -      } else if (instr->Bits(11, 8) == 0xe && instr->Bits(21, 20) == 0 &&
 | 
| -                 instr->Bit(4) == 0) {
 | 
| -        // vceq.f32.
 | 
| -        int Vd = instr->VFPDRegValue(kSimd128Precision);
 | 
| -        int Vm = instr->VFPMRegValue(kSimd128Precision);
 | 
| -        int Vn = instr->VFPNRegValue(kSimd128Precision);
 | 
| -        float src1[4], src2[4];
 | 
| -        get_q_register(Vn, src1);
 | 
| -        get_q_register(Vm, src2);
 | 
| -        uint32_t dst[4];
 | 
| -        for (int i = 0; i < 4; i++) {
 | 
| -          dst[i] = (src1[i] == src2[i]) ? 0xFFFFFFFF : 0;
 | 
| -        }
 | 
| -        set_q_register(Vd, dst);
 | 
| -      } else if (instr->Bits(11, 8) == 1 && instr->Bits(21, 20) == 0 &&
 | 
| -                 instr->Bit(6) == 1 && instr->Bit(4) == 1) {
 | 
| -        int Vd = instr->VFPDRegValue(kSimd128Precision);
 | 
| -        int Vm = instr->VFPMRegValue(kSimd128Precision);
 | 
| -        int Vn = instr->VFPNRegValue(kSimd128Precision);
 | 
| -        // vand Qd, Qm, Qn.
 | 
| -        uint32_t src1[4], src2[4];
 | 
| -        get_q_register(Vn, src1);
 | 
| -        get_q_register(Vm, src2);
 | 
| -        for (int i = 0; i < 4; i++) {
 | 
| -          src1[i] = src1[i] & src2[i];
 | 
| -        }
 | 
| -        set_q_register(Vd, src1);
 | 
| -      } else if (instr->Bits(11, 8) == 0x3) {
 | 
| -        // vcge/vcgt.s<size> Qd, Qm, Qn.
 | 
| -        bool ge = instr->Bit(4) == 1;
 | 
| -        NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20));
 | 
| -        int Vd = instr->VFPDRegValue(kSimd128Precision);
 | 
| -        int Vm = instr->VFPMRegValue(kSimd128Precision);
 | 
| -        int Vn = instr->VFPNRegValue(kSimd128Precision);
 | 
| -        switch (size) {
 | 
| -          case Neon8: {
 | 
| -            int8_t src1[16], src2[16];
 | 
| -            get_q_register(Vn, src1);
 | 
| -            get_q_register(Vm, src2);
 | 
| -            for (int i = 0; i < 16; i++) {
 | 
| -              if (ge)
 | 
| -                src1[i] = src1[i] >= src2[i] ? 0xFF : 0;
 | 
| -              else
 | 
| -                src1[i] = src1[i] > src2[i] ? 0xFF : 0;
 | 
| -            }
 | 
| -            set_q_register(Vd, src1);
 | 
| -            break;
 | 
| -          }
 | 
| -          case Neon16: {
 | 
| -            int16_t src1[8], src2[8];
 | 
| -            get_q_register(Vn, src1);
 | 
| -            get_q_register(Vm, src2);
 | 
| -            for (int i = 0; i < 8; i++) {
 | 
| -              if (ge)
 | 
| -                src1[i] = src1[i] >= src2[i] ? 0xFFFF : 0;
 | 
| -              else
 | 
| -                src1[i] = src1[i] > src2[i] ? 0xFFFF : 0;
 | 
| -            }
 | 
| -            set_q_register(Vd, src1);
 | 
| -            break;
 | 
|            }
 | 
| -          case Neon32: {
 | 
| -            int32_t src1[4], src2[4];
 | 
| +          break;
 | 
| +        }
 | 
| +        case 0xe: {
 | 
| +          if (instr->Bits(21, 20) == 0 && instr->Bit(4) == 0) {
 | 
| +            // vceq.f32.
 | 
| +            float src1[4], src2[4];
 | 
|              get_q_register(Vn, src1);
 | 
|              get_q_register(Vm, src2);
 | 
| +            uint32_t dst[4];
 | 
|              for (int i = 0; i < 4; i++) {
 | 
| -              if (ge)
 | 
| -                src1[i] = src1[i] >= src2[i] ? 0xFFFFFFFF : 0;
 | 
| -              else
 | 
| -                src1[i] = src1[i] > src2[i] ? 0xFFFFFFFF : 0;
 | 
| -            }
 | 
| -            set_q_register(Vd, src1);
 | 
| -            break;
 | 
| -          }
 | 
| -          default:
 | 
| -            UNREACHABLE();
 | 
| -            break;
 | 
| -        }
 | 
| -      } else if (instr->Bits(11, 8) == 0xf && instr->Bit(20) == 0 &&
 | 
| -                 instr->Bit(6) == 1) {
 | 
| -        int Vd = instr->VFPDRegValue(kSimd128Precision);
 | 
| -        int Vm = instr->VFPMRegValue(kSimd128Precision);
 | 
| -        int Vn = instr->VFPNRegValue(kSimd128Precision);
 | 
| -        float src1[4], src2[4];
 | 
| -        get_q_register(Vn, src1);
 | 
| -        get_q_register(Vm, src2);
 | 
| -        if (instr->Bit(4) == 1) {
 | 
| -          if (instr->Bit(21) == 0) {
 | 
| -            // vrecps.f32 Qd, Qm, Qn.
 | 
| -            for (int i = 0; i < 4; i++) {
 | 
| -              src1[i] = 2.0f - src1[i] * src2[i];
 | 
| -            }
 | 
| -          } else {
 | 
| -            // vrsqrts.f32 Qd, Qm, Qn.
 | 
| -            for (int i = 0; i < 4; i++) {
 | 
| -              src1[i] = (3.0f - src1[i] * src2[i]) * 0.5f;
 | 
| -            }
 | 
| -          }
 | 
| -        } else {
 | 
| -          if (instr->Bit(21) == 1) {
 | 
| -            // vmin.f32 Qd, Qm, Qn.
 | 
| -            for (int i = 0; i < 4; i++) {
 | 
| -              src1[i] = std::min(src1[i], src2[i]);
 | 
| +              dst[i] = (src1[i] == src2[i]) ? 0xFFFFFFFF : 0;
 | 
|              }
 | 
| +            set_q_register(Vd, dst);
 | 
|            } else {
 | 
| -            // vmax.f32 Qd, Qm, Qn.
 | 
| -            for (int i = 0; i < 4; i++) {
 | 
| -              src1[i] = std::max(src1[i], src2[i]);
 | 
| -            }
 | 
| +            UNIMPLEMENTED();
 | 
|            }
 | 
| +          break;
 | 
|          }
 | 
| -        set_q_register(Vd, src1);
 | 
| -      } else if (instr->Bits(11, 8) == 0x6) {
 | 
| -        // vmin/vmax.s<size> Qd, Qm, Qn.
 | 
| -        NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20));
 | 
| -        int Vd = instr->VFPDRegValue(kSimd128Precision);
 | 
| -        int Vm = instr->VFPMRegValue(kSimd128Precision);
 | 
| -        int Vn = instr->VFPNRegValue(kSimd128Precision);
 | 
| -        bool min = instr->Bit(4) != 0;
 | 
| -        switch (size) {
 | 
| -          case Neon8: {
 | 
| -            int8_t src1[16], src2[16];
 | 
| -            get_q_register(Vn, src1);
 | 
| -            get_q_register(Vm, src2);
 | 
| -            for (int i = 0; i < 16; i++) {
 | 
| -              if (min)
 | 
| -                src1[i] = std::min(src1[i], src2[i]);
 | 
| -              else
 | 
| -                src1[i] = std::max(src1[i], src2[i]);
 | 
| -            }
 | 
| -            set_q_register(Vd, src1);
 | 
| -            break;
 | 
| -          }
 | 
| -          case Neon16: {
 | 
| -            int16_t src1[8], src2[8];
 | 
| -            get_q_register(Vn, src1);
 | 
| -            get_q_register(Vm, src2);
 | 
| -            for (int i = 0; i < 8; i++) {
 | 
| -              if (min)
 | 
| -                src1[i] = std::min(src1[i], src2[i]);
 | 
| -              else
 | 
| -                src1[i] = std::max(src1[i], src2[i]);
 | 
| -            }
 | 
| -            set_q_register(Vd, src1);
 | 
| -            break;
 | 
| -          }
 | 
| -          case Neon32: {
 | 
| -            int32_t src1[4], src2[4];
 | 
| +        case 0xf: {
 | 
| +          if (instr->Bit(20) == 0 && instr->Bit(6) == 1) {
 | 
| +            float src1[4], src2[4];
 | 
|              get_q_register(Vn, src1);
 | 
|              get_q_register(Vm, src2);
 | 
| -            for (int i = 0; i < 4; i++) {
 | 
| -              if (min)
 | 
| -                src1[i] = std::min(src1[i], src2[i]);
 | 
| -              else
 | 
| -                src1[i] = std::max(src1[i], src2[i]);
 | 
| +            if (instr->Bit(4) == 1) {
 | 
| +              if (instr->Bit(21) == 0) {
 | 
| +                // vrecps.f32 Qd, Qm, Qn.
 | 
| +                for (int i = 0; i < 4; i++) {
 | 
| +                  src1[i] = 2.0f - src1[i] * src2[i];
 | 
| +                }
 | 
| +              } else {
 | 
| +                // vrsqrts.f32 Qd, Qm, Qn.
 | 
| +                for (int i = 0; i < 4; i++) {
 | 
| +                  src1[i] = (3.0f - src1[i] * src2[i]) * 0.5f;
 | 
| +                }
 | 
| +              }
 | 
| +            } else {
 | 
| +              if (instr->Bit(21) == 1) {
 | 
| +                // vmin.f32 Qd, Qm, Qn.
 | 
| +                for (int i = 0; i < 4; i++) {
 | 
| +                  src1[i] = std::min(src1[i], src2[i]);
 | 
| +                }
 | 
| +              } else {
 | 
| +                // vmax.f32 Qd, Qm, Qn.
 | 
| +                for (int i = 0; i < 4; i++) {
 | 
| +                  src1[i] = std::max(src1[i], src2[i]);
 | 
| +                }
 | 
| +              }
 | 
|              }
 | 
|              set_q_register(Vd, src1);
 | 
| -            break;
 | 
| +          } else {
 | 
| +            UNIMPLEMENTED();
 | 
|            }
 | 
| -          default:
 | 
| -            UNREACHABLE();
 | 
| -            break;
 | 
| +          break;
 | 
|          }
 | 
| -      } else {
 | 
| -        UNIMPLEMENTED();
 | 
| +        default:
 | 
| +          UNIMPLEMENTED();
 | 
| +          break;
 | 
|        }
 | 
|        break;
 | 
| +    }
 | 
|      case 5:
 | 
|        if ((instr->Bits(18, 16) == 0) && (instr->Bits(11, 6) == 0x28) &&
 | 
|            (instr->Bit(4) == 1)) {
 | 
| @@ -4436,265 +4541,319 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) {
 | 
|          UNIMPLEMENTED();
 | 
|        }
 | 
|        break;
 | 
| -    case 6:
 | 
| -      if (instr->Bits(11, 8) == 8 && instr->Bit(4) == 0) {
 | 
| -        // vsub.size Qd, Qm, Qn.
 | 
| -        NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20));
 | 
| -        int Vd = instr->VFPDRegValue(kSimd128Precision);
 | 
| -        int Vm = instr->VFPMRegValue(kSimd128Precision);
 | 
| -        int Vn = instr->VFPNRegValue(kSimd128Precision);
 | 
| -        switch (size) {
 | 
| -          case Neon8: {
 | 
| -            uint8_t src1[16], src2[16];
 | 
| -            get_q_register(Vn, src1);
 | 
| -            get_q_register(Vm, src2);
 | 
| -            for (int i = 0; i < 16; i++) {
 | 
| -              src1[i] -= src2[i];
 | 
| -            }
 | 
| -            set_q_register(Vd, src1);
 | 
| -            break;
 | 
| -          }
 | 
| -          case Neon16: {
 | 
| -            uint16_t src1[8], src2[8];
 | 
| -            get_q_register(Vn, src1);
 | 
| -            get_q_register(Vm, src2);
 | 
| -            for (int i = 0; i < 8; i++) {
 | 
| -              src1[i] -= src2[i];
 | 
| +    case 6: {
 | 
| +      int Vd, Vm, Vn;
 | 
| +      if (instr->Bit(6) == 0) {
 | 
| +        Vd = instr->VFPDRegValue(kDoublePrecision);
 | 
| +        Vm = instr->VFPMRegValue(kDoublePrecision);
 | 
| +        Vn = instr->VFPNRegValue(kDoublePrecision);
 | 
| +      } else {
 | 
| +        Vd = instr->VFPDRegValue(kSimd128Precision);
 | 
| +        Vm = instr->VFPMRegValue(kSimd128Precision);
 | 
| +        Vn = instr->VFPNRegValue(kSimd128Precision);
 | 
| +      }
 | 
| +      switch (instr->Bits(11, 8)) {
 | 
| +        case 0x0: {
 | 
| +          if (instr->Bit(4) == 1) {
 | 
| +            // vqadd.u<size> Qd, Qm, Qn.
 | 
| +            NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20));
 | 
| +            switch (size) {
 | 
| +              case Neon8:
 | 
| +                AddSaturate<uint8_t>(this, Vd, Vm, Vn);
 | 
| +                break;
 | 
| +              case Neon16:
 | 
| +                AddSaturate<uint16_t>(this, Vd, Vm, Vn);
 | 
| +                break;
 | 
| +              case Neon32:
 | 
| +                AddSaturate<uint32_t>(this, Vd, Vm, Vn);
 | 
| +                break;
 | 
| +              default:
 | 
| +                UNREACHABLE();
 | 
| +                break;
 | 
|              }
 | 
| -            set_q_register(Vd, src1);
 | 
| -            break;
 | 
| +          } else {
 | 
| +            UNIMPLEMENTED();
 | 
|            }
 | 
| -          case Neon32: {
 | 
| -            uint32_t src1[4], src2[4];
 | 
| +          break;
 | 
| +        }
 | 
| +        case 0x1: {
 | 
| +          if (instr->Bits(21, 20) == 1 && instr->Bit(4) == 1) {
 | 
| +            // vbsl.size Qd, Qm, Qn.
 | 
| +            uint32_t dst[4], src1[4], src2[4];
 | 
| +            get_q_register(Vd, dst);
 | 
|              get_q_register(Vn, src1);
 | 
|              get_q_register(Vm, src2);
 | 
|              for (int i = 0; i < 4; i++) {
 | 
| -              src1[i] -= src2[i];
 | 
| +              dst[i] = (dst[i] & src1[i]) | (~dst[i] & src2[i]);
 | 
|              }
 | 
| -            set_q_register(Vd, src1);
 | 
| -            break;
 | 
| +            set_q_register(Vd, dst);
 | 
| +          } else if (instr->Bits(21, 20) == 0 && instr->Bit(4) == 1) {
 | 
| +            if (instr->Bit(6) == 0) {
 | 
| +              // veor Dd, Dn, Dm
 | 
| +              uint64_t src1, src2;
 | 
| +              get_d_register(Vn, &src1);
 | 
| +              get_d_register(Vm, &src2);
 | 
| +              src1 ^= src2;
 | 
| +              set_d_register(Vd, &src1);
 | 
| +
 | 
| +            } else {
 | 
| +              // veor Qd, Qn, Qm
 | 
| +              uint32_t src1[4], src2[4];
 | 
| +              get_q_register(Vn, src1);
 | 
| +              get_q_register(Vm, src2);
 | 
| +              for (int i = 0; i < 4; i++) src1[i] ^= src2[i];
 | 
| +              set_q_register(Vd, src1);
 | 
| +            }
 | 
| +          } else {
 | 
| +            UNIMPLEMENTED();
 | 
|            }
 | 
| -          default:
 | 
| -            UNREACHABLE();
 | 
| -            break;
 | 
| +          break;
 | 
|          }
 | 
| -      } else if (instr->Bits(11, 8) == 8 && instr->Bit(4) == 1) {
 | 
| -        // vceq.size Qd, Qm, Qn.
 | 
| -        NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20));
 | 
| -        int Vd = instr->VFPDRegValue(kSimd128Precision);
 | 
| -        int Vm = instr->VFPMRegValue(kSimd128Precision);
 | 
| -        int Vn = instr->VFPNRegValue(kSimd128Precision);
 | 
| -        switch (size) {
 | 
| -          case Neon8: {
 | 
| -            uint8_t src1[16], src2[16];
 | 
| -            get_q_register(Vn, src1);
 | 
| -            get_q_register(Vm, src2);
 | 
| -            for (int i = 0; i < 16; i++) {
 | 
| -              src1[i] = (src1[i] == src2[i]) ? 0xFFu : 0;
 | 
| +        case 0x2: {
 | 
| +          if (instr->Bit(4) == 1) {
 | 
| +            // vqsub.u<size> Qd, Qm, Qn.
 | 
| +            NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20));
 | 
| +            switch (size) {
 | 
| +              case Neon8:
 | 
| +                SubSaturate<uint8_t>(this, Vd, Vm, Vn);
 | 
| +                break;
 | 
| +              case Neon16:
 | 
| +                SubSaturate<uint16_t>(this, Vd, Vm, Vn);
 | 
| +                break;
 | 
| +              case Neon32:
 | 
| +                SubSaturate<uint32_t>(this, Vd, Vm, Vn);
 | 
| +                break;
 | 
| +              default:
 | 
| +                UNREACHABLE();
 | 
| +                break;
 | 
|              }
 | 
| -            set_q_register(Vd, src1);
 | 
| -            break;
 | 
| +          } else {
 | 
| +            UNIMPLEMENTED();
 | 
|            }
 | 
| -          case Neon16: {
 | 
| -            uint16_t src1[8], src2[8];
 | 
| -            get_q_register(Vn, src1);
 | 
| -            get_q_register(Vm, src2);
 | 
| -            for (int i = 0; i < 8; i++) {
 | 
| -              src1[i] = (src1[i] == src2[i]) ? 0xFFFFu : 0;
 | 
| +          break;
 | 
| +        }
 | 
| +        case 0x3: {
 | 
| +          // vcge/vcgt.u<size> Qd, Qm, Qn.
 | 
| +          bool ge = instr->Bit(4) == 1;
 | 
| +          NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20));
 | 
| +          switch (size) {
 | 
| +            case Neon8: {
 | 
| +              uint8_t src1[16], src2[16];
 | 
| +              get_q_register(Vn, src1);
 | 
| +              get_q_register(Vm, src2);
 | 
| +              for (int i = 0; i < 16; i++) {
 | 
| +                if (ge)
 | 
| +                  src1[i] = src1[i] >= src2[i] ? 0xFFu : 0;
 | 
| +                else
 | 
| +                  src1[i] = src1[i] > src2[i] ? 0xFFu : 0;
 | 
| +              }
 | 
| +              set_q_register(Vd, src1);
 | 
| +              break;
 | 
|              }
 | 
| -            set_q_register(Vd, src1);
 | 
| -            break;
 | 
| -          }
 | 
| -          case Neon32: {
 | 
| -            uint32_t src1[4], src2[4];
 | 
| -            get_q_register(Vn, src1);
 | 
| -            get_q_register(Vm, src2);
 | 
| -            for (int i = 0; i < 4; i++) {
 | 
| -              src1[i] = (src1[i] == src2[i]) ? 0xFFFFFFFFu : 0;
 | 
| +            case Neon16: {
 | 
| +              uint16_t src1[8], src2[8];
 | 
| +              get_q_register(Vn, src1);
 | 
| +              get_q_register(Vm, src2);
 | 
| +              for (int i = 0; i < 8; i++) {
 | 
| +                if (ge)
 | 
| +                  src1[i] = src1[i] >= src2[i] ? 0xFFFFu : 0;
 | 
| +                else
 | 
| +                  src1[i] = src1[i] > src2[i] ? 0xFFFFu : 0;
 | 
| +              }
 | 
| +              set_q_register(Vd, src1);
 | 
| +              break;
 | 
|              }
 | 
| -            set_q_register(Vd, src1);
 | 
| -            break;
 | 
| -          }
 | 
| -          default:
 | 
| -            UNREACHABLE();
 | 
| -            break;
 | 
| -        }
 | 
| -      } else if (instr->Bits(11, 8) == 1 && instr->Bits(21, 20) == 1 &&
 | 
| -                 instr->Bit(4) == 1) {
 | 
| -        // vbsl.size Qd, Qm, Qn.
 | 
| -        int Vd = instr->VFPDRegValue(kSimd128Precision);
 | 
| -        int Vm = instr->VFPMRegValue(kSimd128Precision);
 | 
| -        int Vn = instr->VFPNRegValue(kSimd128Precision);
 | 
| -        uint32_t dst[4], src1[4], src2[4];
 | 
| -        get_q_register(Vd, dst);
 | 
| -        get_q_register(Vn, src1);
 | 
| -        get_q_register(Vm, src2);
 | 
| -        for (int i = 0; i < 4; i++) {
 | 
| -          dst[i] = (dst[i] & src1[i]) | (~dst[i] & src2[i]);
 | 
| -        }
 | 
| -        set_q_register(Vd, dst);
 | 
| -      } else if (instr->Bits(11, 8) == 1 && instr->Bits(21, 20) == 0 &&
 | 
| -                 instr->Bit(4) == 1) {
 | 
| -        if (instr->Bit(6) == 0) {
 | 
| -          // veor Dd, Dn, Dm
 | 
| -          int Vd = instr->VFPDRegValue(kDoublePrecision);
 | 
| -          int Vn = instr->VFPNRegValue(kDoublePrecision);
 | 
| -          int Vm = instr->VFPMRegValue(kDoublePrecision);
 | 
| -          uint64_t src1, src2;
 | 
| -          get_d_register(Vn, &src1);
 | 
| -          get_d_register(Vm, &src2);
 | 
| -          src1 ^= src2;
 | 
| -          set_d_register(Vd, &src1);
 | 
| -
 | 
| -        } else {
 | 
| -          // veor Qd, Qn, Qm
 | 
| -          int Vd = instr->VFPDRegValue(kSimd128Precision);
 | 
| -          int Vn = instr->VFPNRegValue(kSimd128Precision);
 | 
| -          int Vm = instr->VFPMRegValue(kSimd128Precision);
 | 
| -          uint32_t src1[4], src2[4];
 | 
| -          get_q_register(Vn, src1);
 | 
| -          get_q_register(Vm, src2);
 | 
| -          for (int i = 0; i < 4; i++) src1[i] ^= src2[i];
 | 
| -          set_q_register(Vd, src1);
 | 
| -        }
 | 
| -      } else if (instr->Bits(11, 8) == 0xd && instr->Bit(21) == 0 &&
 | 
| -                 instr->Bit(6) == 1 && instr->Bit(4) == 1) {
 | 
| -        // vmul.f32 Qd, Qn, Qm
 | 
| -        int Vd = instr->VFPDRegValue(kSimd128Precision);
 | 
| -        int Vn = instr->VFPNRegValue(kSimd128Precision);
 | 
| -        int Vm = instr->VFPMRegValue(kSimd128Precision);
 | 
| -        float src1[4], src2[4];
 | 
| -        get_q_register(Vn, src1);
 | 
| -        get_q_register(Vm, src2);
 | 
| -        for (int i = 0; i < 4; i++) {
 | 
| -          src1[i] = src1[i] * src2[i];
 | 
| -        }
 | 
| -        set_q_register(Vd, src1);
 | 
| -      } else if (instr->Bits(11, 8) == 0xe && instr->Bit(20) == 0 &&
 | 
| -                 instr->Bit(4) == 0) {
 | 
| -        // vcge/vcgt.f32 Qd, Qm, Qn
 | 
| -        bool ge = instr->Bit(21) == 0;
 | 
| -        int Vd = instr->VFPDRegValue(kSimd128Precision);
 | 
| -        int Vm = instr->VFPMRegValue(kSimd128Precision);
 | 
| -        int Vn = instr->VFPNRegValue(kSimd128Precision);
 | 
| -        float src1[4], src2[4];
 | 
| -        get_q_register(Vn, src1);
 | 
| -        get_q_register(Vm, src2);
 | 
| -        uint32_t dst[4];
 | 
| -        for (int i = 0; i < 4; i++) {
 | 
| -          if (ge) {
 | 
| -            dst[i] = src1[i] >= src2[i] ? 0xFFFFFFFFu : 0;
 | 
| -          } else {
 | 
| -            dst[i] = src1[i] > src2[i] ? 0xFFFFFFFFu : 0;
 | 
| +            case Neon32: {
 | 
| +              uint32_t src1[4], src2[4];
 | 
| +              get_q_register(Vn, src1);
 | 
| +              get_q_register(Vm, src2);
 | 
| +              for (int i = 0; i < 4; i++) {
 | 
| +                if (ge)
 | 
| +                  src1[i] = src1[i] >= src2[i] ? 0xFFFFFFFFu : 0;
 | 
| +                else
 | 
| +                  src1[i] = src1[i] > src2[i] ? 0xFFFFFFFFu : 0;
 | 
| +              }
 | 
| +              set_q_register(Vd, src1);
 | 
| +              break;
 | 
| +            }
 | 
| +            default:
 | 
| +              UNREACHABLE();
 | 
| +              break;
 | 
|            }
 | 
| +          break;
 | 
|          }
 | 
| -        set_q_register(Vd, dst);
 | 
| -      } else if (instr->Bits(11, 8) == 0x3) {
 | 
| -        // vcge/vcgt.u<size> Qd, Qm, Qn.
 | 
| -        bool ge = instr->Bit(4) == 1;
 | 
| -        NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20));
 | 
| -        int Vd = instr->VFPDRegValue(kSimd128Precision);
 | 
| -        int Vm = instr->VFPMRegValue(kSimd128Precision);
 | 
| -        int Vn = instr->VFPNRegValue(kSimd128Precision);
 | 
| -        switch (size) {
 | 
| -          case Neon8: {
 | 
| -            uint8_t src1[16], src2[16];
 | 
| -            get_q_register(Vn, src1);
 | 
| -            get_q_register(Vm, src2);
 | 
| -            for (int i = 0; i < 16; i++) {
 | 
| -              if (ge)
 | 
| -                src1[i] = src1[i] >= src2[i] ? 0xFFu : 0;
 | 
| -              else
 | 
| -                src1[i] = src1[i] > src2[i] ? 0xFFu : 0;
 | 
| +        case 0x6: {
 | 
| +          // vmin/vmax.u<size> Qd, Qm, Qn.
 | 
| +          NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20));
 | 
| +          bool min = instr->Bit(4) != 0;
 | 
| +          switch (size) {
 | 
| +            case Neon8: {
 | 
| +              uint8_t src1[16], src2[16];
 | 
| +              get_q_register(Vn, src1);
 | 
| +              get_q_register(Vm, src2);
 | 
| +              for (int i = 0; i < 16; i++) {
 | 
| +                if (min)
 | 
| +                  src1[i] = std::min(src1[i], src2[i]);
 | 
| +                else
 | 
| +                  src1[i] = std::max(src1[i], src2[i]);
 | 
| +              }
 | 
| +              set_q_register(Vd, src1);
 | 
| +              break;
 | 
|              }
 | 
| -            set_q_register(Vd, src1);
 | 
| -            break;
 | 
| -          }
 | 
| -          case Neon16: {
 | 
| -            uint16_t src1[8], src2[8];
 | 
| -            get_q_register(Vn, src1);
 | 
| -            get_q_register(Vm, src2);
 | 
| -            for (int i = 0; i < 8; i++) {
 | 
| -              if (ge)
 | 
| -                src1[i] = src1[i] >= src2[i] ? 0xFFFFu : 0;
 | 
| -              else
 | 
| -                src1[i] = src1[i] > src2[i] ? 0xFFFFu : 0;
 | 
| +            case Neon16: {
 | 
| +              uint16_t src1[8], src2[8];
 | 
| +              get_q_register(Vn, src1);
 | 
| +              get_q_register(Vm, src2);
 | 
| +              for (int i = 0; i < 8; i++) {
 | 
| +                if (min)
 | 
| +                  src1[i] = std::min(src1[i], src2[i]);
 | 
| +                else
 | 
| +                  src1[i] = std::max(src1[i], src2[i]);
 | 
| +              }
 | 
| +              set_q_register(Vd, src1);
 | 
| +              break;
 | 
|              }
 | 
| -            set_q_register(Vd, src1);
 | 
| -            break;
 | 
| -          }
 | 
| -          case Neon32: {
 | 
| -            uint32_t src1[4], src2[4];
 | 
| -            get_q_register(Vn, src1);
 | 
| -            get_q_register(Vm, src2);
 | 
| -            for (int i = 0; i < 4; i++) {
 | 
| -              if (ge)
 | 
| -                src1[i] = src1[i] >= src2[i] ? 0xFFFFFFFFu : 0;
 | 
| -              else
 | 
| -                src1[i] = src1[i] > src2[i] ? 0xFFFFFFFFu : 0;
 | 
| +            case Neon32: {
 | 
| +              uint32_t src1[4], src2[4];
 | 
| +              get_q_register(Vn, src1);
 | 
| +              get_q_register(Vm, src2);
 | 
| +              for (int i = 0; i < 4; i++) {
 | 
| +                if (min)
 | 
| +                  src1[i] = std::min(src1[i], src2[i]);
 | 
| +                else
 | 
| +                  src1[i] = std::max(src1[i], src2[i]);
 | 
| +              }
 | 
| +              set_q_register(Vd, src1);
 | 
| +              break;
 | 
|              }
 | 
| -            set_q_register(Vd, src1);
 | 
| -            break;
 | 
| +            default:
 | 
| +              UNREACHABLE();
 | 
| +              break;
 | 
|            }
 | 
| -          default:
 | 
| -            UNREACHABLE();
 | 
| -            break;
 | 
| +          break;
 | 
|          }
 | 
| -      } else if (instr->Bits(11, 8) == 0x6) {
 | 
| -        // vmin/vmax.u<size> Qd, Qm, Qn.
 | 
| -        NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20));
 | 
| -        int Vd = instr->VFPDRegValue(kSimd128Precision);
 | 
| -        int Vm = instr->VFPMRegValue(kSimd128Precision);
 | 
| -        int Vn = instr->VFPNRegValue(kSimd128Precision);
 | 
| -        bool min = instr->Bit(4) != 0;
 | 
| -        switch (size) {
 | 
| -          case Neon8: {
 | 
| -            uint8_t src1[16], src2[16];
 | 
| -            get_q_register(Vn, src1);
 | 
| -            get_q_register(Vm, src2);
 | 
| -            for (int i = 0; i < 16; i++) {
 | 
| -              if (min)
 | 
| -                src1[i] = std::min(src1[i], src2[i]);
 | 
| -              else
 | 
| -                src1[i] = std::max(src1[i], src2[i]);
 | 
| +        case 0x8: {
 | 
| +          if (instr->Bit(4) == 0) {
 | 
| +            // vsub.size Qd, Qm, Qn.
 | 
| +            NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20));
 | 
| +            switch (size) {
 | 
| +              case Neon8: {
 | 
| +                uint8_t src1[16], src2[16];
 | 
| +                get_q_register(Vn, src1);
 | 
| +                get_q_register(Vm, src2);
 | 
| +                for (int i = 0; i < 16; i++) {
 | 
| +                  src1[i] -= src2[i];
 | 
| +                }
 | 
| +                set_q_register(Vd, src1);
 | 
| +                break;
 | 
| +              }
 | 
| +              case Neon16: {
 | 
| +                uint16_t src1[8], src2[8];
 | 
| +                get_q_register(Vn, src1);
 | 
| +                get_q_register(Vm, src2);
 | 
| +                for (int i = 0; i < 8; i++) {
 | 
| +                  src1[i] -= src2[i];
 | 
| +                }
 | 
| +                set_q_register(Vd, src1);
 | 
| +                break;
 | 
| +              }
 | 
| +              case Neon32: {
 | 
| +                uint32_t src1[4], src2[4];
 | 
| +                get_q_register(Vn, src1);
 | 
| +                get_q_register(Vm, src2);
 | 
| +                for (int i = 0; i < 4; i++) {
 | 
| +                  src1[i] -= src2[i];
 | 
| +                }
 | 
| +                set_q_register(Vd, src1);
 | 
| +                break;
 | 
| +              }
 | 
| +              default:
 | 
| +                UNREACHABLE();
 | 
| +                break;
 | 
| +            }
 | 
| +          } else {
 | 
| +            // vceq.size Qd, Qm, Qn.
 | 
| +            NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20));
 | 
| +            switch (size) {
 | 
| +              case Neon8: {
 | 
| +                uint8_t src1[16], src2[16];
 | 
| +                get_q_register(Vn, src1);
 | 
| +                get_q_register(Vm, src2);
 | 
| +                for (int i = 0; i < 16; i++) {
 | 
| +                  src1[i] = (src1[i] == src2[i]) ? 0xFFu : 0;
 | 
| +                }
 | 
| +                set_q_register(Vd, src1);
 | 
| +                break;
 | 
| +              }
 | 
| +              case Neon16: {
 | 
| +                uint16_t src1[8], src2[8];
 | 
| +                get_q_register(Vn, src1);
 | 
| +                get_q_register(Vm, src2);
 | 
| +                for (int i = 0; i < 8; i++) {
 | 
| +                  src1[i] = (src1[i] == src2[i]) ? 0xFFFFu : 0;
 | 
| +                }
 | 
| +                set_q_register(Vd, src1);
 | 
| +                break;
 | 
| +              }
 | 
| +              case Neon32: {
 | 
| +                uint32_t src1[4], src2[4];
 | 
| +                get_q_register(Vn, src1);
 | 
| +                get_q_register(Vm, src2);
 | 
| +                for (int i = 0; i < 4; i++) {
 | 
| +                  src1[i] = (src1[i] == src2[i]) ? 0xFFFFFFFFu : 0;
 | 
| +                }
 | 
| +                set_q_register(Vd, src1);
 | 
| +                break;
 | 
| +              }
 | 
| +              default:
 | 
| +                UNREACHABLE();
 | 
| +                break;
 | 
|              }
 | 
| -            set_q_register(Vd, src1);
 | 
| -            break;
 | 
|            }
 | 
| -          case Neon16: {
 | 
| -            uint16_t src1[8], src2[8];
 | 
| +          break;
 | 
| +        }
 | 
| +        case 0xd: {
 | 
| +          if (instr->Bit(21) == 0 && instr->Bit(6) == 1 && instr->Bit(4) == 1) {
 | 
| +            // vmul.f32 Qd, Qn, Qm
 | 
| +            float src1[4], src2[4];
 | 
|              get_q_register(Vn, src1);
 | 
|              get_q_register(Vm, src2);
 | 
| -            for (int i = 0; i < 8; i++) {
 | 
| -              if (min)
 | 
| -                src1[i] = std::min(src1[i], src2[i]);
 | 
| -              else
 | 
| -                src1[i] = std::max(src1[i], src2[i]);
 | 
| +            for (int i = 0; i < 4; i++) {
 | 
| +              src1[i] = src1[i] * src2[i];
 | 
|              }
 | 
|              set_q_register(Vd, src1);
 | 
| -            break;
 | 
| +          } else {
 | 
| +            UNIMPLEMENTED();
 | 
|            }
 | 
| -          case Neon32: {
 | 
| -            uint32_t src1[4], src2[4];
 | 
| +          break;
 | 
| +        }
 | 
| +        case 0xe: {
 | 
| +          if (instr->Bit(20) == 0 && instr->Bit(4) == 0) {
 | 
| +            // vcge/vcgt.f32 Qd, Qm, Qn
 | 
| +            bool ge = instr->Bit(21) == 0;
 | 
| +            float src1[4], src2[4];
 | 
|              get_q_register(Vn, src1);
 | 
|              get_q_register(Vm, src2);
 | 
| +            uint32_t dst[4];
 | 
|              for (int i = 0; i < 4; i++) {
 | 
| -              if (min)
 | 
| -                src1[i] = std::min(src1[i], src2[i]);
 | 
| -              else
 | 
| -                src1[i] = std::max(src1[i], src2[i]);
 | 
| +              if (ge) {
 | 
| +                dst[i] = src1[i] >= src2[i] ? 0xFFFFFFFFu : 0;
 | 
| +              } else {
 | 
| +                dst[i] = src1[i] > src2[i] ? 0xFFFFFFFFu : 0;
 | 
| +              }
 | 
|              }
 | 
| -            set_q_register(Vd, src1);
 | 
| -            break;
 | 
| +            set_q_register(Vd, dst);
 | 
| +          } else {
 | 
| +            UNIMPLEMENTED();
 | 
|            }
 | 
| -          default:
 | 
| -            UNREACHABLE();
 | 
| -            break;
 | 
| +          break;
 | 
|          }
 | 
| -      } else {
 | 
| -        UNIMPLEMENTED();
 | 
| +        default:
 | 
| +          UNREACHABLE();
 | 
| +          break;
 | 
|        }
 | 
|        break;
 | 
| +    }
 | 
|      case 7:
 | 
|        if ((instr->Bits(18, 16) == 0) && (instr->Bits(11, 6) == 0x28) &&
 | 
|            (instr->Bit(4) == 1)) {
 | 
| 
 |