| Index: src/IceTargetLoweringX8632.cpp
 | 
| diff --git a/src/IceTargetLoweringX8632.cpp b/src/IceTargetLoweringX8632.cpp
 | 
| index 19a1256ea38df0a24b6041707338a4394a1308dd..2b14a65c96a243bf997cad303ca7e05f4ddb7881 100644
 | 
| --- a/src/IceTargetLoweringX8632.cpp
 | 
| +++ b/src/IceTargetLoweringX8632.cpp
 | 
| @@ -90,6 +90,20 @@ const unsigned X86_MAX_XMM_ARGS = 4;
 | 
|  // The number of bits in a byte
 | 
|  const unsigned X86_CHAR_BIT = 8;
 | 
|  
 | 
| +// Return a string representation of the type that is suitable for use
 | 
| +// in an identifier.
 | 
| +IceString typeIdentString(const Type Ty) {
 | 
| +  IceString Str;
 | 
| +  llvm::raw_string_ostream BaseOS(Str);
 | 
| +  Ostream OS(&BaseOS);
 | 
| +  if (isVectorType(Ty)) {
 | 
| +    OS << "v" << typeNumElements(Ty) << typeElementType(Ty);
 | 
| +  } else {
 | 
| +    OS << Ty;
 | 
| +  }
 | 
| +  return BaseOS.str();
 | 
| +}
 | 
| +
 | 
|  // In some cases, there are x-macros tables for both high-level and
 | 
|  // low-level instructions/operands that use the same enum key value.
 | 
|  // The tables are kept separate to maintain a proper separation
 | 
| @@ -1139,58 +1153,206 @@ void TargetX8632::lowerArithmetic(const InstArithmetic *Inst) {
 | 
|        break;
 | 
|      }
 | 
|    } else if (isVectorType(Dest->getType())) {
 | 
| +    // TODO: Trap on integer divide and integer modulo by zero.
 | 
| +    // See: https://code.google.com/p/nativeclient/issues/detail?id=3899
 | 
| +    //
 | 
| +    // TODO(wala): ALIGNHACK: All vector arithmetic is currently done in
 | 
| +    // registers.  This is a workaround of the fact that there is no
 | 
| +    // support for aligning stack operands.  Once there is support,
 | 
| +    // remove LEGAL_HACK.
 | 
| +#define LEGAL_HACK(s) legalizeToVar((s))
 | 
|      switch (Inst->getOp()) {
 | 
|      case InstArithmetic::_num:
 | 
|        llvm_unreachable("Unknown arithmetic operator");
 | 
|        break;
 | 
| -    case InstArithmetic::Add:
 | 
| -    case InstArithmetic::And:
 | 
| -    case InstArithmetic::Or:
 | 
| -    case InstArithmetic::Xor:
 | 
| -    case InstArithmetic::Sub:
 | 
| -    case InstArithmetic::Mul:
 | 
| -    case InstArithmetic::Shl:
 | 
| -    case InstArithmetic::Lshr:
 | 
| -    case InstArithmetic::Ashr:
 | 
| -    case InstArithmetic::Udiv:
 | 
| -    case InstArithmetic::Sdiv:
 | 
| -    case InstArithmetic::Urem:
 | 
| -    case InstArithmetic::Srem:
 | 
| -      // TODO(wala): Handle these.
 | 
| -      Func->setError("Unhandled instruction");
 | 
| -      break;
 | 
| +    case InstArithmetic::Add: {
 | 
| +      Variable *T = makeReg(Dest->getType());
 | 
| +      _movp(T, Src0);
 | 
| +      _padd(T, LEGAL_HACK(Src1));
 | 
| +      _movp(Dest, T);
 | 
| +    } break;
 | 
| +    case InstArithmetic::And: {
 | 
| +      Variable *T = makeReg(Dest->getType());
 | 
| +      _movp(T, Src0);
 | 
| +      _pand(T, LEGAL_HACK(Src1));
 | 
| +      _movp(Dest, T);
 | 
| +    } break;
 | 
| +    case InstArithmetic::Or: {
 | 
| +      Variable *T = makeReg(Dest->getType());
 | 
| +      _movp(T, Src0);
 | 
| +      _por(T, LEGAL_HACK(Src1));
 | 
| +      _movp(Dest, T);
 | 
| +    } break;
 | 
| +    case InstArithmetic::Xor: {
 | 
| +      Variable *T = makeReg(Dest->getType());
 | 
| +      _movp(T, Src0);
 | 
| +      _pxor(T, LEGAL_HACK(Src1));
 | 
| +      _movp(Dest, T);
 | 
| +    } break;
 | 
| +    case InstArithmetic::Sub: {
 | 
| +      Variable *T = makeReg(Dest->getType());
 | 
| +      _movp(T, Src0);
 | 
| +      _psub(T, LEGAL_HACK(Src1));
 | 
| +      _movp(Dest, T);
 | 
| +    } break;
 | 
| +    case InstArithmetic::Mul: {
 | 
| +      if (Dest->getType() == IceType_v4i32) {
 | 
| +        // Lowering sequence:
 | 
| +        // Note: The mask arguments have index 0 on the left.
 | 
| +        //
 | 
| +        // movups  T1, Src0
 | 
| +        // pshufd  T2, Src0, {1,0,3,0}
 | 
| +        // pshufd  T3, Src1, {1,0,3,0}
 | 
| +        // # T1 = {Src0[0] * Src1[0], Src0[2] * Src1[2]}
 | 
| +        // pmuludq T1, Src1
 | 
| +        // # T2 = {Src0[1] * Src1[1], Src0[3] * Src1[3]}
 | 
| +        // pmuludq T2, T3
 | 
| +        // # T1 = {lo(T1[0]), lo(T1[2]), lo(T2[0]), lo(T2[2])}
 | 
| +        // shufps  T1, T2, {0,2,0,2}
 | 
| +        // pshufd  T4, T1, {0,2,1,3}
 | 
| +        // movups  Dest, T4
 | 
| +        //
 | 
| +        // TODO(wala): SSE4.1 has pmulld.
 | 
| +
 | 
| +        // Mask that directs pshufd to create a vector with entries
 | 
| +        // Src[1, 0, 3, 0]
 | 
| +        const unsigned Constant1030 = 0x31;
 | 
| +        Constant *Mask1030 = Ctx->getConstantInt(IceType_i8, Constant1030);
 | 
| +        // Mask that directs shufps to create a vector with entries
 | 
| +        // Dest[0, 2], Src[0, 2]
 | 
| +        const unsigned Mask0202 = 0x88;
 | 
| +        // Mask that directs pshufd to create a vector with entries
 | 
| +        // Src[0, 2, 1, 3]
 | 
| +        const unsigned Mask0213 = 0xd8;
 | 
| +        Variable *T1 = makeReg(IceType_v4i32);
 | 
| +        Variable *T2 = makeReg(IceType_v4i32);
 | 
| +        Variable *T3 = makeReg(IceType_v4i32);
 | 
| +        Variable *T4 = makeReg(IceType_v4i32);
 | 
| +        _movp(T1, Src0);
 | 
| +        // TODO(wala): ALIGHNHACK: Replace Src0R with Src0 and Src1R
 | 
| +        // with Src1 after stack operand alignment support is
 | 
| +        // implemented.
 | 
| +        Variable *Src0R = LEGAL_HACK(Src0);
 | 
| +        Variable *Src1R = LEGAL_HACK(Src1);
 | 
| +        _pshufd(T2, Src0R, Mask1030);
 | 
| +        _pshufd(T3, Src1R, Mask1030);
 | 
| +        _pmuludq(T1, Src1R);
 | 
| +        _pmuludq(T2, T3);
 | 
| +        _shufps(T1, T2, Ctx->getConstantInt(IceType_i8, Mask0202));
 | 
| +        _pshufd(T4, T1, Ctx->getConstantInt(IceType_i8, Mask0213));
 | 
| +        _movp(Dest, T4);
 | 
| +      } else if (Dest->getType() == IceType_v8i16) {
 | 
| +        Variable *T = makeReg(IceType_v8i16);
 | 
| +        _movp(T, Src0);
 | 
| +        _pmullw(T, legalizeToVar(Src1));
 | 
| +        _movp(Dest, T);
 | 
| +      } else {
 | 
| +        assert(Dest->getType() == IceType_v16i8);
 | 
| +        // Sz_mul_v16i8
 | 
| +        const IceString Helper = "Sz_mul_v16i8";
 | 
| +        const SizeT MaxSrcs = 2;
 | 
| +        InstCall *Call = makeHelperCall(Helper, Dest, MaxSrcs);
 | 
| +        Call->addArg(Src0);
 | 
| +        Call->addArg(Src1);
 | 
| +        lowerCall(Call);
 | 
| +      }
 | 
| +    } break;
 | 
| +    case InstArithmetic::Shl: {
 | 
| +      // Sz_shl_v4i32, Sz_shl_v8i16, Sz_shl_v16i8
 | 
| +      const IceString Helper = "Sz_shl_" + typeIdentString(Dest->getType());
 | 
| +      const SizeT MaxSrcs = 2;
 | 
| +      InstCall *Call = makeHelperCall(Helper, Dest, MaxSrcs);
 | 
| +      Call->addArg(Src0);
 | 
| +      Call->addArg(Src1);
 | 
| +      lowerCall(Call);
 | 
| +    } break;
 | 
| +    case InstArithmetic::Lshr: {
 | 
| +      // Sz_lshr_v4i32, Sz_lshr_v8i16, Sz_lshr_v16i8
 | 
| +      const IceString Helper = "Sz_lshr_" + typeIdentString(Dest->getType());
 | 
| +      const SizeT MaxSrcs = 2;
 | 
| +      InstCall *Call = makeHelperCall(Helper, Dest, MaxSrcs);
 | 
| +      Call->addArg(Src0);
 | 
| +      Call->addArg(Src1);
 | 
| +      lowerCall(Call);
 | 
| +    } break;
 | 
| +    case InstArithmetic::Ashr: {
 | 
| +      // Sz_ashr_v4i32, Sz_ashr_v8i16, Sz_ashr_v16i8
 | 
| +      const IceString Helper = "Sz_ashr_" + typeIdentString(Dest->getType());
 | 
| +      const SizeT MaxSrcs = 2;
 | 
| +      InstCall *Call = makeHelperCall(Helper, Dest, MaxSrcs);
 | 
| +      Call->addArg(Src0);
 | 
| +      Call->addArg(Src1);
 | 
| +      lowerCall(Call);
 | 
| +    } break;
 | 
| +    case InstArithmetic::Udiv: {
 | 
| +      // Sz_udiv_v4i32, Sz_udiv_v8i16, Sz_udiv_v16i8
 | 
| +      const IceString Helper = "Sz_udiv_" + typeIdentString(Dest->getType());
 | 
| +      const SizeT MaxSrcs = 2;
 | 
| +      InstCall *Call = makeHelperCall(Helper, Dest, MaxSrcs);
 | 
| +      Call->addArg(Src0);
 | 
| +      Call->addArg(Src1);
 | 
| +      lowerCall(Call);
 | 
| +    } break;
 | 
| +    case InstArithmetic::Sdiv: {
 | 
| +      // Sz_sdiv_v4i32, Sz_sdiv_v8i16, Sz_sdiv_v16i8
 | 
| +      const IceString Helper = "Sz_sdiv_" + typeIdentString(Dest->getType());
 | 
| +      const SizeT MaxSrcs = 2;
 | 
| +      InstCall *Call = makeHelperCall(Helper, Dest, MaxSrcs);
 | 
| +      Call->addArg(Src0);
 | 
| +      Call->addArg(Src1);
 | 
| +      lowerCall(Call);
 | 
| +    } break;
 | 
| +    case InstArithmetic::Urem: {
 | 
| +      // Sz_urem_v4i32, Sz_urem_v8i16, Sz_urem_v16i8
 | 
| +      const IceString Helper = "Sz_urem_" + typeIdentString(Dest->getType());
 | 
| +      const SizeT MaxSrcs = 2;
 | 
| +      InstCall *Call = makeHelperCall(Helper, Dest, MaxSrcs);
 | 
| +      Call->addArg(Src0);
 | 
| +      Call->addArg(Src1);
 | 
| +      lowerCall(Call);
 | 
| +    } break;
 | 
| +    case InstArithmetic::Srem: {
 | 
| +      // Sz_srem_v4i32, Sz_srem_v8i16, Sz_srem_v16i8
 | 
| +      const IceString Helper = "Sz_srem_" + typeIdentString(Dest->getType());
 | 
| +      const SizeT MaxSrcs = 2;
 | 
| +      InstCall *Call = makeHelperCall(Helper, Dest, MaxSrcs);
 | 
| +      Call->addArg(Src0);
 | 
| +      Call->addArg(Src1);
 | 
| +      lowerCall(Call);
 | 
| +    } break;
 | 
|      case InstArithmetic::Fadd: {
 | 
|        Variable *T = makeReg(Dest->getType());
 | 
|        _movp(T, Src0);
 | 
| -      _addps(T, Src1);
 | 
| +      _addps(T, LEGAL_HACK(Src1));
 | 
|        _movp(Dest, T);
 | 
|      } break;
 | 
|      case InstArithmetic::Fsub: {
 | 
|        Variable *T = makeReg(Dest->getType());
 | 
|        _movp(T, Src0);
 | 
| -      _subps(T, Src1);
 | 
| +      _subps(T, LEGAL_HACK(Src1));
 | 
|        _movp(Dest, T);
 | 
|      } break;
 | 
|      case InstArithmetic::Fmul: {
 | 
|        Variable *T = makeReg(Dest->getType());
 | 
|        _movp(T, Src0);
 | 
| -      _mulps(T, Src1);
 | 
| +      _mulps(T, LEGAL_HACK(Src1));
 | 
|        _movp(Dest, T);
 | 
|      } break;
 | 
|      case InstArithmetic::Fdiv: {
 | 
|        Variable *T = makeReg(Dest->getType());
 | 
|        _movp(T, Src0);
 | 
| -      _divps(T, Src1);
 | 
| +      _divps(T, LEGAL_HACK(Src1));
 | 
|        _movp(Dest, T);
 | 
|      } break;
 | 
|      case InstArithmetic::Frem: {
 | 
|        const SizeT MaxSrcs = 2;
 | 
| -      InstCall *Call = makeHelperCall("__frem_v4f32", Dest, MaxSrcs);
 | 
| +      InstCall *Call = makeHelperCall("Sz_frem_v4f32", Dest, MaxSrcs);
 | 
|        Call->addArg(Src0);
 | 
|        Call->addArg(Src1);
 | 
|        lowerCall(Call);
 | 
|      } break;
 | 
|      }
 | 
| +#undef LEGAL_HACK
 | 
|    } else { // Dest->getType() is non-i64 scalar
 | 
|      Variable *T_edx = NULL;
 | 
|      Variable *T = NULL;
 | 
| 
 |