Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(652)

Side by Side Diff: src/IceTargetLoweringX8632.cpp

Issue 1125323004: Subzero: Use cmov to improve lowering for the select instruction. (Closed) Base URL: https://chromium.googlesource.com/native_client/pnacl-subzero.git@master
Patch Set: Remove a rogue comment Created 5 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/IceInstX8632.cpp ('k') | src/assembler_ia32.h » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===// 1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===//
2 // 2 //
3 // The Subzero Code Generator 3 // The Subzero Code Generator
4 // 4 //
5 // This file is distributed under the University of Illinois Open Source 5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details. 6 // License. See LICENSE.TXT for details.
7 // 7 //
8 //===----------------------------------------------------------------------===// 8 //===----------------------------------------------------------------------===//
9 // 9 //
10 // This file implements the TargetLoweringX8632 class, which 10 // This file implements the TargetLoweringX8632 class, which
(...skipping 4058 matching lines...) Expand 10 before | Expand all | Expand 10 after
4069 // function. Otherwise post-call esp adjustments get dead-code 4069 // function. Otherwise post-call esp adjustments get dead-code
4070 // eliminated. TODO: Are there more places where the fake use 4070 // eliminated. TODO: Are there more places where the fake use
4071 // should be inserted? E.g. "void f(int n){while(1) g(n);}" may not 4071 // should be inserted? E.g. "void f(int n){while(1) g(n);}" may not
4072 // have a ret instruction. 4072 // have a ret instruction.
4073 Variable *esp = Func->getTarget()->getPhysicalRegister(RegX8632::Reg_esp); 4073 Variable *esp = Func->getTarget()->getPhysicalRegister(RegX8632::Reg_esp);
4074 Context.insert(InstFakeUse::create(Func, esp)); 4074 Context.insert(InstFakeUse::create(Func, esp));
4075 } 4075 }
4076 4076
4077 void TargetX8632::lowerSelect(const InstSelect *Inst) { 4077 void TargetX8632::lowerSelect(const InstSelect *Inst) {
4078 Variable *Dest = Inst->getDest(); 4078 Variable *Dest = Inst->getDest();
4079 Type DestTy = Dest->getType();
4079 Operand *SrcT = Inst->getTrueOperand(); 4080 Operand *SrcT = Inst->getTrueOperand();
4080 Operand *SrcF = Inst->getFalseOperand(); 4081 Operand *SrcF = Inst->getFalseOperand();
4081 Operand *Condition = Inst->getCondition(); 4082 Operand *Condition = Inst->getCondition();
4082 4083
4083 if (isVectorType(Dest->getType())) { 4084 if (isVectorType(DestTy)) {
4084 Type SrcTy = SrcT->getType(); 4085 Type SrcTy = SrcT->getType();
4085 Variable *T = makeReg(SrcTy); 4086 Variable *T = makeReg(SrcTy);
4086 Operand *SrcTRM = legalize(SrcT, Legal_Reg | Legal_Mem); 4087 Operand *SrcTRM = legalize(SrcT, Legal_Reg | Legal_Mem);
4087 Operand *SrcFRM = legalize(SrcF, Legal_Reg | Legal_Mem); 4088 Operand *SrcFRM = legalize(SrcF, Legal_Reg | Legal_Mem);
4088 if (InstructionSet >= SSE4_1) { 4089 if (InstructionSet >= SSE4_1) {
4089 // TODO(wala): If the condition operand is a constant, use blendps 4090 // TODO(wala): If the condition operand is a constant, use blendps
4090 // or pblendw. 4091 // or pblendw.
4091 // 4092 //
4092 // Use blendvps or pblendvb to implement select. 4093 // Use blendvps or pblendvb to implement select.
4093 if (SrcTy == IceType_v4i1 || SrcTy == IceType_v4i32 || 4094 if (SrcTy == IceType_v4i1 || SrcTy == IceType_v4i32 ||
(...skipping 37 matching lines...) Expand 10 before | Expand all | Expand 10 after
4131 } 4132 }
4132 _movp(T2, T); 4133 _movp(T2, T);
4133 _pand(T, SrcTRM); 4134 _pand(T, SrcTRM);
4134 _pandn(T2, SrcFRM); 4135 _pandn(T2, SrcFRM);
4135 _por(T, T2); 4136 _por(T, T2);
4136 _movp(Dest, T); 4137 _movp(Dest, T);
4137 4138
4138 return; 4139 return;
4139 } 4140 }
4140 4141
4142 CondX86::BrCond Cond = CondX86::Br_ne;
4143 Operand *CmpOpnd0 = nullptr;
4144 Operand *CmpOpnd1 = nullptr;
4141 // Handle folding opportunities. 4145 // Handle folding opportunities.
4142 if (const class Inst *Producer = FoldingInfo.getProducerFor(Condition)) { 4146 if (const class Inst *Producer = FoldingInfo.getProducerFor(Condition)) {
4143 assert(Producer->isDeleted()); 4147 assert(Producer->isDeleted());
4144 switch (BoolFolding::getProducerKind(Producer)) { 4148 switch (BoolFolding::getProducerKind(Producer)) {
4145 default: 4149 default:
4146 break; 4150 break;
4147 case BoolFolding::PK_Icmp32: { 4151 case BoolFolding::PK_Icmp32: {
4148 // d=cmp e,f; a=d?b:c ==> cmp e,f; a=b; jne L1; a=c; L1:
4149 auto *Cmp = llvm::dyn_cast<InstIcmp>(Producer); 4152 auto *Cmp = llvm::dyn_cast<InstIcmp>(Producer);
4150 InstX8632Label *Label = InstX8632Label::create(Func, this); 4153 Cond = getIcmp32Mapping(Cmp->getCondition());
4151 Operand *Src0 = Producer->getSrc(0); 4154 CmpOpnd1 = legalize(Producer->getSrc(1));
4152 Operand *Src1 = legalize(Producer->getSrc(1)); 4155 CmpOpnd0 = legalizeSrc0ForCmp(Producer->getSrc(0), CmpOpnd1);
4153 Operand *Src0RM = legalizeSrc0ForCmp(Src0, Src1); 4156 } break;
4154 _cmp(Src0RM, Src1);
4155 // This is the same code as below (for both i64 and non-i64),
4156 // except without the _cmp instruction and with a different
4157 // branch condition. TODO(stichnot): refactor.
4158 if (Dest->getType() == IceType_i64) {
4159 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
4160 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
4161 Operand *SrcLoRI = legalize(loOperand(SrcT), Legal_Reg | Legal_Imm);
4162 Operand *SrcHiRI = legalize(hiOperand(SrcT), Legal_Reg | Legal_Imm);
4163 _mov(DestLo, SrcLoRI);
4164 _mov(DestHi, SrcHiRI);
4165 _br(getIcmp32Mapping(Cmp->getCondition()), Label);
4166 Operand *SrcFLo = loOperand(SrcF);
4167 Operand *SrcFHi = hiOperand(SrcF);
4168 SrcLoRI = legalize(SrcFLo, Legal_Reg | Legal_Imm);
4169 SrcHiRI = legalize(SrcFHi, Legal_Reg | Legal_Imm);
4170 _mov_nonkillable(DestLo, SrcLoRI);
4171 _mov_nonkillable(DestHi, SrcHiRI);
4172 } else {
4173 SrcT = legalize(SrcT, Legal_Reg | Legal_Imm);
4174 _mov(Dest, SrcT);
4175 _br(getIcmp32Mapping(Cmp->getCondition()), Label);
4176 SrcF = legalize(SrcF, Legal_Reg | Legal_Imm);
4177 _mov_nonkillable(Dest, SrcF);
4178 }
4179 Context.insert(Label);
4180 return;
4181 }
4182 } 4157 }
4183 } 4158 }
4159 if (CmpOpnd0 == nullptr) {
4160 CmpOpnd0 = legalize(Condition, Legal_Reg | Legal_Mem);
4161 CmpOpnd1 = Ctx->getConstantZero(IceType_i32);
4162 }
4163 assert(CmpOpnd0);
4164 assert(CmpOpnd1);
4184 4165
4185 // a=d?b:c ==> cmp d,0; a=b; jne L1; a=c; L1: 4166 _cmp(CmpOpnd0, CmpOpnd1);
4186 Operand *ConditionRM = legalize(Condition, Legal_Reg | Legal_Mem); 4167 if (typeWidthInBytes(DestTy) == 1 || isFloatingType(DestTy)) {
4187 Constant *Zero = Ctx->getConstantZero(IceType_i32); 4168 // The cmov instruction doesn't allow 8-bit or FP operands, so
4188 InstX8632Label *Label = InstX8632Label::create(Func, this); 4169 // we need explicit control flow.
4189 4170 // d=cmp e,f; a=d?b:c ==> cmp e,f; a=b; jne L1; a=c; L1:
4190 if (Dest->getType() == IceType_i64) { 4171 InstX8632Label *Label = InstX8632Label::create(Func, this);
4191 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
4192 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
4193 Operand *SrcLoRI = legalize(loOperand(SrcT), Legal_Reg | Legal_Imm);
4194 Operand *SrcHiRI = legalize(hiOperand(SrcT), Legal_Reg | Legal_Imm);
4195 _cmp(ConditionRM, Zero);
4196 _mov(DestLo, SrcLoRI);
4197 _mov(DestHi, SrcHiRI);
4198 _br(CondX86::Br_ne, Label);
4199 Operand *SrcFLo = loOperand(SrcF);
4200 Operand *SrcFHi = hiOperand(SrcF);
4201 SrcLoRI = legalize(SrcFLo, Legal_Reg | Legal_Imm);
4202 SrcHiRI = legalize(SrcFHi, Legal_Reg | Legal_Imm);
4203 _mov_nonkillable(DestLo, SrcLoRI);
4204 _mov_nonkillable(DestHi, SrcHiRI);
4205 } else {
4206 _cmp(ConditionRM, Zero);
4207 SrcT = legalize(SrcT, Legal_Reg | Legal_Imm); 4172 SrcT = legalize(SrcT, Legal_Reg | Legal_Imm);
4208 _mov(Dest, SrcT); 4173 _mov(Dest, SrcT);
4209 _br(CondX86::Br_ne, Label); 4174 _br(Cond, Label);
4210 SrcF = legalize(SrcF, Legal_Reg | Legal_Imm); 4175 SrcF = legalize(SrcF, Legal_Reg | Legal_Imm);
4211 _mov_nonkillable(Dest, SrcF); 4176 _mov_nonkillable(Dest, SrcF);
4177 Context.insert(Label);
4178 return;
4179 }
4180 // mov t, SrcF; cmov_cond t, SrcT; mov dest, t
4181 // But if SrcT is immediate, we might be able to do better, as
4182 // the cmov instruction doesn't allow an immediate operand:
4183 // mov t, SrcT; cmov_!cond t, SrcF; mov dest, t
4184 if (llvm::isa<Constant>(SrcT) && !llvm::isa<Constant>(SrcF)) {
4185 std::swap(SrcT, SrcF);
4186 Cond = InstX8632::getOppositeCondition(Cond);
4187 }
4188 if (DestTy == IceType_i64) {
4189 // Set the low portion.
4190 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
4191 Variable *TLo = nullptr;
4192 Operand *SrcFLo = legalize(loOperand(SrcF));
4193 _mov(TLo, SrcFLo);
4194 Operand *SrcTLo = legalize(loOperand(SrcT), Legal_Reg | Legal_Mem);
4195 _cmov(TLo, SrcTLo, Cond);
4196 _mov(DestLo, TLo);
4197 // Set the high portion.
4198 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
4199 Variable *THi = nullptr;
4200 Operand *SrcFHi = legalize(hiOperand(SrcF));
4201 _mov(THi, SrcFHi);
4202 Operand *SrcTHi = legalize(hiOperand(SrcT), Legal_Reg | Legal_Mem);
4203 _cmov(THi, SrcTHi, Cond);
4204 _mov(DestHi, THi);
4205 return;
4212 } 4206 }
4213 4207
4214 Context.insert(Label); 4208 assert(DestTy == IceType_i16 || DestTy == IceType_i32);
4209 Variable *T = nullptr;
4210 SrcF = legalize(SrcF);
4211 _mov(T, SrcF);
4212 SrcT = legalize(SrcT, Legal_Reg | Legal_Mem);
4213 _cmov(T, SrcT, Cond);
4214 _mov(Dest, T);
4215 } 4215 }
4216 4216
4217 void TargetX8632::lowerStore(const InstStore *Inst) { 4217 void TargetX8632::lowerStore(const InstStore *Inst) {
4218 Operand *Value = Inst->getData(); 4218 Operand *Value = Inst->getData();
4219 Operand *Addr = Inst->getAddr(); 4219 Operand *Addr = Inst->getAddr();
4220 OperandX8632Mem *NewAddr = FormMemoryOperand(Addr, Value->getType()); 4220 OperandX8632Mem *NewAddr = FormMemoryOperand(Addr, Value->getType());
4221 Type Ty = NewAddr->getType(); 4221 Type Ty = NewAddr->getType();
4222 4222
4223 if (Ty == IceType_i64) { 4223 if (Ty == IceType_i64) {
4224 Value = legalize(Value); 4224 Value = legalize(Value);
(...skipping 810 matching lines...) Expand 10 before | Expand all | Expand 10 after
5035 case FT_Asm: 5035 case FT_Asm:
5036 case FT_Iasm: { 5036 case FT_Iasm: {
5037 OstreamLocker L(Ctx); 5037 OstreamLocker L(Ctx);
5038 emitConstantPool<PoolTypeConverter<float>>(Ctx); 5038 emitConstantPool<PoolTypeConverter<float>>(Ctx);
5039 emitConstantPool<PoolTypeConverter<double>>(Ctx); 5039 emitConstantPool<PoolTypeConverter<double>>(Ctx);
5040 } break; 5040 } break;
5041 } 5041 }
5042 } 5042 }
5043 5043
5044 } // end of namespace Ice 5044 } // end of namespace Ice
OLDNEW
« no previous file with comments | « src/IceInstX8632.cpp ('k') | src/assembler_ia32.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698