OLD | NEW |
---|---|
1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===// | 1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===// |
2 // | 2 // |
3 // The Subzero Code Generator | 3 // The Subzero Code Generator |
4 // | 4 // |
5 // This file is distributed under the University of Illinois Open Source | 5 // This file is distributed under the University of Illinois Open Source |
6 // License. See LICENSE.TXT for details. | 6 // License. See LICENSE.TXT for details. |
7 // | 7 // |
8 //===----------------------------------------------------------------------===// | 8 //===----------------------------------------------------------------------===// |
9 // | 9 // |
10 // This file implements the TargetLoweringX8632 class, which | 10 // This file implements the TargetLoweringX8632 class, which |
(...skipping 4058 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
4069 // function. Otherwise post-call esp adjustments get dead-code | 4069 // function. Otherwise post-call esp adjustments get dead-code |
4070 // eliminated. TODO: Are there more places where the fake use | 4070 // eliminated. TODO: Are there more places where the fake use |
4071 // should be inserted? E.g. "void f(int n){while(1) g(n);}" may not | 4071 // should be inserted? E.g. "void f(int n){while(1) g(n);}" may not |
4072 // have a ret instruction. | 4072 // have a ret instruction. |
4073 Variable *esp = Func->getTarget()->getPhysicalRegister(RegX8632::Reg_esp); | 4073 Variable *esp = Func->getTarget()->getPhysicalRegister(RegX8632::Reg_esp); |
4074 Context.insert(InstFakeUse::create(Func, esp)); | 4074 Context.insert(InstFakeUse::create(Func, esp)); |
4075 } | 4075 } |
4076 | 4076 |
4077 void TargetX8632::lowerSelect(const InstSelect *Inst) { | 4077 void TargetX8632::lowerSelect(const InstSelect *Inst) { |
4078 Variable *Dest = Inst->getDest(); | 4078 Variable *Dest = Inst->getDest(); |
4079 Type DestTy = Dest->getType(); | |
4079 Operand *SrcT = Inst->getTrueOperand(); | 4080 Operand *SrcT = Inst->getTrueOperand(); |
4080 Operand *SrcF = Inst->getFalseOperand(); | 4081 Operand *SrcF = Inst->getFalseOperand(); |
4081 Operand *Condition = Inst->getCondition(); | 4082 Operand *Condition = Inst->getCondition(); |
4082 | 4083 |
4083 if (isVectorType(Dest->getType())) { | 4084 if (isVectorType(DestTy)) { |
4084 Type SrcTy = SrcT->getType(); | 4085 Type SrcTy = SrcT->getType(); |
4085 Variable *T = makeReg(SrcTy); | 4086 Variable *T = makeReg(SrcTy); |
4086 Operand *SrcTRM = legalize(SrcT, Legal_Reg | Legal_Mem); | 4087 Operand *SrcTRM = legalize(SrcT, Legal_Reg | Legal_Mem); |
4087 Operand *SrcFRM = legalize(SrcF, Legal_Reg | Legal_Mem); | 4088 Operand *SrcFRM = legalize(SrcF, Legal_Reg | Legal_Mem); |
4088 if (InstructionSet >= SSE4_1) { | 4089 if (InstructionSet >= SSE4_1) { |
4089 // TODO(wala): If the condition operand is a constant, use blendps | 4090 // TODO(wala): If the condition operand is a constant, use blendps |
4090 // or pblendw. | 4091 // or pblendw. |
4091 // | 4092 // |
4092 // Use blendvps or pblendvb to implement select. | 4093 // Use blendvps or pblendvb to implement select. |
4093 if (SrcTy == IceType_v4i1 || SrcTy == IceType_v4i32 || | 4094 if (SrcTy == IceType_v4i1 || SrcTy == IceType_v4i32 || |
(...skipping 37 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
4131 } | 4132 } |
4132 _movp(T2, T); | 4133 _movp(T2, T); |
4133 _pand(T, SrcTRM); | 4134 _pand(T, SrcTRM); |
4134 _pandn(T2, SrcFRM); | 4135 _pandn(T2, SrcFRM); |
4135 _por(T, T2); | 4136 _por(T, T2); |
4136 _movp(Dest, T); | 4137 _movp(Dest, T); |
4137 | 4138 |
4138 return; | 4139 return; |
4139 } | 4140 } |
4140 | 4141 |
4142 CondX86::BrCond Cond = CondX86::Br_ne; | |
4143 Operand *CmpOpnd0 = nullptr; | |
4144 Operand *CmpOpnd1 = nullptr; | |
4141 // Handle folding opportunities. | 4145 // Handle folding opportunities. |
4142 if (const class Inst *Producer = FoldingInfo.getProducerFor(Condition)) { | 4146 if (const class Inst *Producer = FoldingInfo.getProducerFor(Condition)) { |
4143 assert(Producer->isDeleted()); | 4147 assert(Producer->isDeleted()); |
4144 switch (BoolFolding::getProducerKind(Producer)) { | 4148 switch (BoolFolding::getProducerKind(Producer)) { |
4145 default: | 4149 default: |
4146 break; | 4150 break; |
4147 case BoolFolding::PK_Icmp32: { | 4151 case BoolFolding::PK_Icmp32: { |
4148 // d=cmp e,f; a=d?b:c ==> cmp e,f; a=b; jne L1; a=c; L1: | |
4149 auto *Cmp = llvm::dyn_cast<InstIcmp>(Producer); | 4152 auto *Cmp = llvm::dyn_cast<InstIcmp>(Producer); |
4150 InstX8632Label *Label = InstX8632Label::create(Func, this); | 4153 Cond = getIcmp32Mapping(Cmp->getCondition()); |
4151 Operand *Src0 = Producer->getSrc(0); | 4154 CmpOpnd1 = legalize(Producer->getSrc(1)); |
4152 Operand *Src1 = legalize(Producer->getSrc(1)); | 4155 CmpOpnd0 = legalizeSrc0ForCmp(Producer->getSrc(0), CmpOpnd1); |
4153 Operand *Src0RM = legalizeSrc0ForCmp(Src0, Src1); | 4156 } break; |
4154 _cmp(Src0RM, Src1); | |
4155 // This is the same code as below (for both i64 and non-i64), | |
4156 // except without the _cmp instruction and with a different | |
4157 // branch condition. TODO(stichnot): refactor. | |
4158 if (Dest->getType() == IceType_i64) { | |
4159 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); | |
4160 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); | |
4161 Operand *SrcLoRI = legalize(loOperand(SrcT), Legal_Reg | Legal_Imm); | |
4162 Operand *SrcHiRI = legalize(hiOperand(SrcT), Legal_Reg | Legal_Imm); | |
4163 _mov(DestLo, SrcLoRI); | |
4164 _mov(DestHi, SrcHiRI); | |
4165 _br(getIcmp32Mapping(Cmp->getCondition()), Label); | |
4166 Operand *SrcFLo = loOperand(SrcF); | |
4167 Operand *SrcFHi = hiOperand(SrcF); | |
4168 SrcLoRI = legalize(SrcFLo, Legal_Reg | Legal_Imm); | |
4169 SrcHiRI = legalize(SrcFHi, Legal_Reg | Legal_Imm); | |
4170 _mov_nonkillable(DestLo, SrcLoRI); | |
4171 _mov_nonkillable(DestHi, SrcHiRI); | |
4172 } else { | |
4173 SrcT = legalize(SrcT, Legal_Reg | Legal_Imm); | |
4174 _mov(Dest, SrcT); | |
4175 _br(getIcmp32Mapping(Cmp->getCondition()), Label); | |
4176 SrcF = legalize(SrcF, Legal_Reg | Legal_Imm); | |
4177 _mov_nonkillable(Dest, SrcF); | |
4178 } | |
4179 Context.insert(Label); | |
4180 return; | |
4181 } | |
4182 } | 4157 } |
4183 } | 4158 } |
4159 if (CmpOpnd0 == nullptr) { | |
4160 CmpOpnd0 = legalize(Condition, Legal_Reg | Legal_Mem); | |
4161 CmpOpnd1 = Ctx->getConstantZero(IceType_i32); | |
4162 } | |
4163 assert(CmpOpnd0); | |
4164 assert(CmpOpnd1); | |
4184 | 4165 |
4185 // a=d?b:c ==> cmp d,0; a=b; jne L1; a=c; L1: | 4166 _cmp(CmpOpnd0, CmpOpnd1); |
4186 Operand *ConditionRM = legalize(Condition, Legal_Reg | Legal_Mem); | 4167 if (typeWidthInBytes(DestTy) == 1 || isFloatingType(DestTy)) { |
4187 Constant *Zero = Ctx->getConstantZero(IceType_i32); | 4168 // The cmov instruction doesn't allow 8-bit or FP operands, so |
4188 InstX8632Label *Label = InstX8632Label::create(Func, this); | 4169 // we need explicit control flow. |
4189 | 4170 // XXX test select of i1 src operands, also f32/f64. |
jvoung (off chromium)
2015/05/19 16:25:37
Update XXX ?
Jim Stichnoth
2015/05/19 20:41:51
Done. (forgot to cleanup my notes...)
| |
4190 if (Dest->getType() == IceType_i64) { | 4171 // d=cmp e,f; a=d?b:c ==> cmp e,f; a=b; jne L1; a=c; L1: |
4191 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); | 4172 InstX8632Label *Label = InstX8632Label::create(Func, this); |
4192 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); | |
4193 Operand *SrcLoRI = legalize(loOperand(SrcT), Legal_Reg | Legal_Imm); | |
4194 Operand *SrcHiRI = legalize(hiOperand(SrcT), Legal_Reg | Legal_Imm); | |
4195 _cmp(ConditionRM, Zero); | |
4196 _mov(DestLo, SrcLoRI); | |
4197 _mov(DestHi, SrcHiRI); | |
4198 _br(CondX86::Br_ne, Label); | |
4199 Operand *SrcFLo = loOperand(SrcF); | |
4200 Operand *SrcFHi = hiOperand(SrcF); | |
4201 SrcLoRI = legalize(SrcFLo, Legal_Reg | Legal_Imm); | |
4202 SrcHiRI = legalize(SrcFHi, Legal_Reg | Legal_Imm); | |
4203 _mov_nonkillable(DestLo, SrcLoRI); | |
4204 _mov_nonkillable(DestHi, SrcHiRI); | |
4205 } else { | |
4206 _cmp(ConditionRM, Zero); | |
4207 SrcT = legalize(SrcT, Legal_Reg | Legal_Imm); | 4173 SrcT = legalize(SrcT, Legal_Reg | Legal_Imm); |
4208 _mov(Dest, SrcT); | 4174 _mov(Dest, SrcT); |
4209 _br(CondX86::Br_ne, Label); | 4175 _br(Cond, Label); |
4210 SrcF = legalize(SrcF, Legal_Reg | Legal_Imm); | 4176 SrcF = legalize(SrcF, Legal_Reg | Legal_Imm); |
4211 _mov_nonkillable(Dest, SrcF); | 4177 _mov_nonkillable(Dest, SrcF); |
4178 Context.insert(Label); | |
4179 return; | |
4180 } | |
4181 // mov t, SrcF; cmov_cond t, SrcT; mov dest, t | |
4182 // But if SrcT is immediate, we might be able to do better, as | |
4183 // the cmov instruction doesn't allow an immediate operand: | |
4184 // mov t, SrcT; cmov_!cond t, SrcF; mov dest, t | |
jvoung (off chromium)
2015/05/19 16:25:38
Is it often that both are immediates (does that ha
Jim Stichnoth
2015/05/19 20:41:51
I counted 1115 instances across all of spec2k:
e
| |
4185 if (llvm::isa<Constant>(SrcT) && !llvm::isa<Constant>(SrcF)) { | |
4186 std::swap(SrcT, SrcF); | |
4187 Cond = InstX8632::getOppositeCondition(Cond); | |
4188 } | |
4189 if (DestTy == IceType_i64) { | |
4190 // Set the low portion. | |
4191 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); | |
4192 Variable *TLo = nullptr; | |
4193 Operand *SrcFLo = legalize(loOperand(SrcF)); | |
4194 _mov(TLo, SrcFLo); | |
4195 Operand *SrcTLo = legalize(loOperand(SrcT), Legal_Reg | Legal_Mem); | |
4196 _cmov(TLo, SrcTLo, Cond); | |
4197 _mov(DestLo, TLo); | |
4198 // Set the high portion. | |
4199 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); | |
4200 Variable *THi = nullptr; | |
4201 Operand *SrcFHi = legalize(hiOperand(SrcF)); | |
4202 _mov(THi, SrcFHi); | |
4203 Operand *SrcTHi = legalize(hiOperand(SrcT), Legal_Reg | Legal_Mem); | |
4204 _cmov(THi, SrcTHi, Cond); | |
4205 _mov(DestHi, THi); | |
4206 return; | |
4212 } | 4207 } |
4213 | 4208 |
4214 Context.insert(Label); | 4209 assert(DestTy == IceType_i16 || DestTy == IceType_i32); |
4210 Variable *T = nullptr; | |
4211 SrcF = legalize(SrcF); | |
4212 _mov(T, SrcF); | |
4213 SrcT = legalize(SrcT, Legal_Reg | Legal_Mem); | |
4214 _cmov(T, SrcT, Cond); | |
4215 _mov(Dest, T); | |
4215 } | 4216 } |
4216 | 4217 |
4217 void TargetX8632::lowerStore(const InstStore *Inst) { | 4218 void TargetX8632::lowerStore(const InstStore *Inst) { |
4218 Operand *Value = Inst->getData(); | 4219 Operand *Value = Inst->getData(); |
4219 Operand *Addr = Inst->getAddr(); | 4220 Operand *Addr = Inst->getAddr(); |
4220 OperandX8632Mem *NewAddr = FormMemoryOperand(Addr, Value->getType()); | 4221 OperandX8632Mem *NewAddr = FormMemoryOperand(Addr, Value->getType()); |
4221 Type Ty = NewAddr->getType(); | 4222 Type Ty = NewAddr->getType(); |
4222 | 4223 |
4223 if (Ty == IceType_i64) { | 4224 if (Ty == IceType_i64) { |
4224 Value = legalize(Value); | 4225 Value = legalize(Value); |
(...skipping 810 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
5035 case FT_Asm: | 5036 case FT_Asm: |
5036 case FT_Iasm: { | 5037 case FT_Iasm: { |
5037 OstreamLocker L(Ctx); | 5038 OstreamLocker L(Ctx); |
5038 emitConstantPool<PoolTypeConverter<float>>(Ctx); | 5039 emitConstantPool<PoolTypeConverter<float>>(Ctx); |
5039 emitConstantPool<PoolTypeConverter<double>>(Ctx); | 5040 emitConstantPool<PoolTypeConverter<double>>(Ctx); |
5040 } break; | 5041 } break; |
5041 } | 5042 } |
5042 } | 5043 } |
5043 | 5044 |
5044 } // end of namespace Ice | 5045 } // end of namespace Ice |
OLD | NEW |