src/IceTargetLoweringARM32.cpp - Issue 1738443002: Subzero. Performance tweaks.

Side by Side Diff: src/IceTargetLoweringARM32.cpp

Issue 1738443002: Subzero. Performance tweaks. (Closed) Base URL: https://chromium.googlesource.com/native_client/pnacl-subzero.git@master

Patch Set: Addresses comments -- all of them Created 4 years, 10 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 //===- subzero/src/IceTargetLoweringARM32.cpp - ARM32 lowering ------------===//	1 //===- subzero/src/IceTargetLoweringARM32.cpp - ARM32 lowering ------------===//

2 //	2 //

3 // The Subzero Code Generator	3 // The Subzero Code Generator

4 //	4 //

5 // This file is distributed under the University of Illinois Open Source	5 // This file is distributed under the University of Illinois Open Source

6 // License. See LICENSE.TXT for details.	6 // License. See LICENSE.TXT for details.

7 //	7 //

8 //===----------------------------------------------------------------------===//	8 //===----------------------------------------------------------------------===//

9 ///	9 ///

10 /// \file	10 /// \file

(...skipping 283 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
294	294

295 } // end of anonymous namespace	295 } // end of anonymous namespace

296	296

297 TargetARM32::TargetARM32(Cfg *Func)	297 TargetARM32::TargetARM32(Cfg *Func)

298 : TargetLowering(Func), NeedSandboxing(SandboxingType == ST_NaCl),	298 : TargetLowering(Func), NeedSandboxing(SandboxingType == ST_NaCl),

299 CPUFeatures(Func->getContext()->getFlags()) {}	299 CPUFeatures(Func->getContext()->getFlags()) {}

300	300

301 void TargetARM32::staticInit(GlobalContext *Ctx) {	301 void TargetARM32::staticInit(GlobalContext *Ctx) {

302 RegNumT::setLimit(RegARM32::Reg_NUM);	302 RegNumT::setLimit(RegARM32::Reg_NUM);

303 // Limit this size (or do all bitsets need to be the same width)???	303 // Limit this size (or do all bitsets need to be the same width)???

304 llvm::SmallBitVector IntegerRegisters(RegARM32::Reg_NUM);	304 SmallBitVector IntegerRegisters(RegARM32::Reg_NUM);

305 llvm::SmallBitVector I64PairRegisters(RegARM32::Reg_NUM);	305 SmallBitVector I64PairRegisters(RegARM32::Reg_NUM);

306 llvm::SmallBitVector Float32Registers(RegARM32::Reg_NUM);	306 SmallBitVector Float32Registers(RegARM32::Reg_NUM);

307 llvm::SmallBitVector Float64Registers(RegARM32::Reg_NUM);	307 SmallBitVector Float64Registers(RegARM32::Reg_NUM);

308 llvm::SmallBitVector VectorRegisters(RegARM32::Reg_NUM);	308 SmallBitVector VectorRegisters(RegARM32::Reg_NUM);

309 llvm::SmallBitVector QtoSRegisters(RegARM32::Reg_NUM);	309 SmallBitVector QtoSRegisters(RegARM32::Reg_NUM);

310 llvm::SmallBitVector InvalidRegisters(RegARM32::Reg_NUM);	310 SmallBitVector InvalidRegisters(RegARM32::Reg_NUM);

311 const unsigned EncodedReg_q8 = RegARM32::RegTable[RegARM32::Reg_q8].Encoding;	311 const unsigned EncodedReg_q8 = RegARM32::RegTable[RegARM32::Reg_q8].Encoding;

312 for (int i = 0; i < RegARM32::Reg_NUM; ++i) {	312 for (int i = 0; i < RegARM32::Reg_NUM; ++i) {

313 const auto &Entry = RegARM32::RegTable[i];	313 const auto &Entry = RegARM32::RegTable[i];

314 IntegerRegisters[i] = Entry.IsInt;	314 IntegerRegisters[i] = Entry.IsInt;

315 I64PairRegisters[i] = Entry.IsI64Pair;	315 I64PairRegisters[i] = Entry.IsI64Pair;

316 Float32Registers[i] = Entry.IsFP32;	316 Float32Registers[i] = Entry.IsFP32;

317 Float64Registers[i] = Entry.IsFP64;	317 Float64Registers[i] = Entry.IsFP64;

318 VectorRegisters[i] = Entry.IsVec128;	318 VectorRegisters[i] = Entry.IsVec128;

319 RegisterAliases[i].resize(RegARM32::Reg_NUM);	319 RegisterAliases[i].resize(RegARM32::Reg_NUM);

320 // TODO(eholk): It would be better to store a QtoS flag in the	320 // TODO(eholk): It would be better to store a QtoS flag in the

(...skipping 596 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
917 // We makeReg() here instead of getPhysicalRegister() because the latter ends	917 // We makeReg() here instead of getPhysicalRegister() because the latter ends

918 // up creating multi-blocks temporaries that liveness fails to validate.	918 // up creating multi-blocks temporaries that liveness fails to validate.

919 auto *PC = makeReg(IceType_i32, RegARM32::Reg_pc);	919 auto *PC = makeReg(IceType_i32, RegARM32::Reg_pc);

920	920

921 auto *AddPcReloc = RelocOffset::create(Ctx);	921 auto *AddPcReloc = RelocOffset::create(Ctx);

922 AddPcReloc->setSubtract(true);	922 AddPcReloc->setSubtract(true);

923 auto *AddPcLabel = InstARM32Label::create(Func, this);	923 auto *AddPcLabel = InstARM32Label::create(Func, this);

924 AddPcLabel->setRelocOffset(AddPcReloc);	924 AddPcLabel->setRelocOffset(AddPcReloc);

925	925

926 const IceString EmitText = Name;	926 const IceString EmitText = Name;

927 // We need a -8 in the relocation expression to account for the pc's value

928 // read by the first instruction emitted in Finish(PC).

929 auto *Imm8 = RelocOffset::create(Ctx, -8);

930	927

931 auto *MovwReloc = RelocOffset::create(Ctx);	928 auto *MovwReloc = RelocOffset::create(Ctx);

932 auto *MovwLabel = InstARM32Label::create(Func, this);	929 auto *MovwLabel = InstARM32Label::create(Func, this);

933 MovwLabel->setRelocOffset(MovwReloc);	930 MovwLabel->setRelocOffset(MovwReloc);

934	931

935 auto *MovtReloc = RelocOffset::create(Ctx);	932 auto *MovtReloc = RelocOffset::create(Ctx);

936 auto *MovtLabel = InstARM32Label::create(Func, this);	933 auto *MovtLabel = InstARM32Label::create(Func, this);

937 MovtLabel->setRelocOffset(MovtReloc);	934 MovtLabel->setRelocOffset(MovtReloc);

938	935

939 // The EmitString for these constant relocatables have hardcoded offsets	936 // The EmitString for these constant relocatables have hardcoded offsets

940 // attached to them. This could be dangerous if, e.g., we ever implemented	937 // attached to them. This could be dangerous if, e.g., we ever implemented

941 // instruction scheduling but llvm-mc currently does not support	938 // instruction scheduling but llvm-mc currently does not support

942 //	939 //

943 // movw reg, #:lower16:(Symbol - Label - Number)	940 // movw reg, #:lower16:(Symbol - Label - Number)

944 // movt reg, #:upper16:(Symbol - Label - Number)	941 // movt reg, #:upper16:(Symbol - Label - Number)

945 //	942 //

946 // relocations.	943 // relocations.

947 auto *CRLower = Ctx->getConstantSym({MovwReloc, AddPcReloc, Imm8}, Name,	944 static constexpr RelocOffsetT PcOffset = -8;

	945 auto *CRLower = Ctx->getConstantSym(PcOffset, {MovwReloc, AddPcReloc}, Name,

948 EmitText + " -16", SuppressMangling);	946 EmitText + " -16", SuppressMangling);

949 auto *CRUpper = Ctx->getConstantSym({MovtReloc, AddPcReloc, Imm8}, Name,	947 auto *CRUpper = Ctx->getConstantSym(PcOffset, {MovtReloc, AddPcReloc}, Name,

950 EmitText + " -12", SuppressMangling);	948 EmitText + " -12", SuppressMangling);

951	949

952 Context.insert(MovwLabel);	950 Context.insert(MovwLabel);

953 _movw(Register, CRLower);	951 _movw(Register, CRLower);

954 Context.insert(MovtLabel);	952 Context.insert(MovtLabel);

955 _movt(Register, CRUpper);	953 _movt(Register, CRUpper);

956 // PC = fake-def to keep liveness consistent.	954 // PC = fake-def to keep liveness consistent.

957 Context.insert<InstFakeDef>(PC);	955 Context.insert<InstFakeDef>(PC);

958 Context.insert(AddPcLabel);	956 Context.insert(AddPcLabel);

959 Finish(PC);	957 Finish(PC);

(...skipping 473 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1433 // registers (as a side effect, this gives variables a second chance at	1431 // registers (as a side effect, this gives variables a second chance at

1434 // physical register assignment).	1432 // physical register assignment).

1435 //	1433 //

1436 // A middle ground approach is to leverage sparsity and allocate one block of	1434 // A middle ground approach is to leverage sparsity and allocate one block of

1437 // space on the frame for globals (variables with multi-block lifetime), and	1435 // space on the frame for globals (variables with multi-block lifetime), and

1438 // one block to share for locals (single-block lifetime).	1436 // one block to share for locals (single-block lifetime).

1439	1437

1440 Context.init(Node);	1438 Context.init(Node);

1441 Context.setInsertPoint(Context.getCur());	1439 Context.setInsertPoint(Context.getCur());

1442	1440

1443 llvm::SmallBitVector CalleeSaves =	1441 SmallBitVector CalleeSaves = getRegisterSet(RegSet_CalleeSave, RegSet_None);

1444 getRegisterSet(RegSet_CalleeSave, RegSet_None);	1442 RegsUsed = SmallBitVector(CalleeSaves.size());

1445 RegsUsed = llvm::SmallBitVector(CalleeSaves.size());

1446 VarList SortedSpilledVariables;	1443 VarList SortedSpilledVariables;

1447 size_t GlobalsSize = 0;	1444 size_t GlobalsSize = 0;

1448 // If there is a separate locals area, this represents that area. Otherwise	1445 // If there is a separate locals area, this represents that area. Otherwise

1449 // it counts any variable not counted by GlobalsSize.	1446 // it counts any variable not counted by GlobalsSize.

1450 SpillAreaSizeBytes = 0;	1447 SpillAreaSizeBytes = 0;

1451 // If there is a separate locals area, this specifies the alignment for it.	1448 // If there is a separate locals area, this specifies the alignment for it.

1452 uint32_t LocalsSlotsAlignmentBytes = 0;	1449 uint32_t LocalsSlotsAlignmentBytes = 0;

1453 // The entire spill locations area gets aligned to largest natural alignment	1450 // The entire spill locations area gets aligned to largest natural alignment

1454 // of the variables that have a spill slot.	1451 // of the variables that have a spill slot.

1455 uint32_t SpillAreaAlignmentBytes = 0;	1452 uint32_t SpillAreaAlignmentBytes = 0;

(...skipping 35 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1491 }	1488 }

1492 if (!MaybeLeafFunc) {	1489 if (!MaybeLeafFunc) {

1493 CalleeSaves[RegARM32::Reg_lr] = true;	1490 CalleeSaves[RegARM32::Reg_lr] = true;

1494 RegsUsed[RegARM32::Reg_lr] = true;	1491 RegsUsed[RegARM32::Reg_lr] = true;

1495 }	1492 }

1496	1493

1497 // Make two passes over the used registers. The first pass records all the	1494 // Make two passes over the used registers. The first pass records all the

1498 // used registers -- and their aliases. Then, we figure out which GPRs and	1495 // used registers -- and their aliases. Then, we figure out which GPRs and

1499 // VFP S registers should be saved. We don't bother saving D/Q registers	1496 // VFP S registers should be saved. We don't bother saving D/Q registers

1500 // because their uses are recorded as S regs uses.	1497 // because their uses are recorded as S regs uses.

1501 llvm::SmallBitVector ToPreserve(RegARM32::Reg_NUM);	1498 SmallBitVector ToPreserve(RegARM32::Reg_NUM);

1502 for (SizeT i = 0; i < CalleeSaves.size(); ++i) {	1499 for (SizeT i = 0; i < CalleeSaves.size(); ++i) {

1503 if (NeedSandboxing && i == RegARM32::Reg_r9) {	1500 if (NeedSandboxing && i == RegARM32::Reg_r9) {

1504 // r9 is never updated in sandboxed code.	1501 // r9 is never updated in sandboxed code.

1505 continue;	1502 continue;

1506 }	1503 }

1507 if (CalleeSaves[i] && RegsUsed[i]) {	1504 if (CalleeSaves[i] && RegsUsed[i]) {

1508 ToPreserve \|= RegisterAliases[i];	1505 ToPreserve \|= RegisterAliases[i];

1509 }	1506 }

1510 }	1507 }

1511	1508

(...skipping 624 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
2136 }	2133 }

2137 Variable *BaseR = legalizeToReg(Base);	2134 Variable *BaseR = legalizeToReg(Base);

2138 return OperandARM32Mem::create(Func, SplitType, BaseR, Offset,	2135 return OperandARM32Mem::create(Func, SplitType, BaseR, Offset,

2139 Mem->getAddrMode());	2136 Mem->getAddrMode());

2140 }	2137 }

2141 }	2138 }

2142 llvm::report_fatal_error("Unsupported operand type");	2139 llvm::report_fatal_error("Unsupported operand type");

2143 return nullptr;	2140 return nullptr;

2144 }	2141 }

2145	2142

2146 llvm::SmallBitVector TargetARM32::getRegisterSet(RegSetMask Include,	2143 SmallBitVector TargetARM32::getRegisterSet(RegSetMask Include,

2147 RegSetMask Exclude) const {	2144 RegSetMask Exclude) const {

2148 llvm::SmallBitVector Registers(RegARM32::Reg_NUM);	2145 SmallBitVector Registers(RegARM32::Reg_NUM);

2149	2146

2150 for (uint32_t i = 0; i < RegARM32::Reg_NUM; ++i) {	2147 for (uint32_t i = 0; i < RegARM32::Reg_NUM; ++i) {

2151 const auto &Entry = RegARM32::RegTable[i];	2148 const auto &Entry = RegARM32::RegTable[i];

2152 if (Entry.Scratch && (Include & RegSet_CallerSave))	2149 if (Entry.Scratch && (Include & RegSet_CallerSave))

2153 Registers[i] = true;	2150 Registers[i] = true;

2154 if (Entry.Preserved && (Include & RegSet_CalleeSave))	2151 if (Entry.Preserved && (Include & RegSet_CalleeSave))

2155 Registers[i] = true;	2152 Registers[i] = true;

2156 if (Entry.StackPtr && (Include & RegSet_StackPointer))	2153 if (Entry.StackPtr && (Include & RegSet_StackPointer))

2157 Registers[i] = true;	2154 Registers[i] = true;

2158 if (Entry.FramePtr && (Include & RegSet_FramePointer))	2155 if (Entry.FramePtr && (Include & RegSet_FramePointer))

(...skipping 3887 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
6046	6043

6047 void TargetARM32::postLower() {	6044 void TargetARM32::postLower() {

6048 if (Ctx->getFlags().getOptLevel() == Opt_m1)	6045 if (Ctx->getFlags().getOptLevel() == Opt_m1)

6049 return;	6046 return;

6050 markRedefinitions();	6047 markRedefinitions();

6051 Context.availabilityUpdate();	6048 Context.availabilityUpdate();

6052 }	6049 }

6053	6050

6054 void TargetARM32::makeRandomRegisterPermutation(	6051 void TargetARM32::makeRandomRegisterPermutation(

6055 llvm::SmallVectorImpl<RegNumT> &Permutation,	6052 llvm::SmallVectorImpl<RegNumT> &Permutation,

6056 const llvm::SmallBitVector &ExcludeRegisters, uint64_t Salt) const {	6053 const SmallBitVector &ExcludeRegisters, uint64_t Salt) const {

6057 (void)Permutation;	6054 (void)Permutation;

6058 (void)ExcludeRegisters;	6055 (void)ExcludeRegisters;

6059 (void)Salt;	6056 (void)Salt;

6060 UnimplementedError(Func->getContext()->getFlags());	6057 UnimplementedError(Func->getContext()->getFlags());

6061 }	6058 }

6062	6059

6063 void TargetARM32::emit(const ConstantInteger32 *C) const {	6060 void TargetARM32::emit(const ConstantInteger32 *C) const {

6064 if (!BuildDefs::dump())	6061 if (!BuildDefs::dump())

6065 return;	6062 return;

6066 Ostream &Str = Ctx->getStrEmit();	6063 Ostream &Str = Ctx->getStrEmit();

(...skipping 726 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
6793 << ".eabi_attribute 42, 1 @ Tag_MPextension_use\n"	6790 << ".eabi_attribute 42, 1 @ Tag_MPextension_use\n"

6794 << ".eabi_attribute 68, 1 @ Tag_Virtualization_use\n";	6791 << ".eabi_attribute 68, 1 @ Tag_Virtualization_use\n";

6795 if (CPUFeatures.hasFeature(TargetARM32Features::HWDivArm)) {	6792 if (CPUFeatures.hasFeature(TargetARM32Features::HWDivArm)) {

6796 Str << ".eabi_attribute 44, 2 @ Tag_DIV_use\n";	6793 Str << ".eabi_attribute 44, 2 @ Tag_DIV_use\n";

6797 }	6794 }

6798 // Technically R9 is used for TLS with Sandboxing, and we reserve it.	6795 // Technically R9 is used for TLS with Sandboxing, and we reserve it.

6799 // However, for compatibility with current NaCl LLVM, don't claim that.	6796 // However, for compatibility with current NaCl LLVM, don't claim that.

6800 Str << ".eabi_attribute 14, 3 @ Tag_ABI_PCS_R9_use: Not used\n";	6797 Str << ".eabi_attribute 14, 3 @ Tag_ABI_PCS_R9_use: Not used\n";

6801 }	6798 }

6802	6799

6803 llvm::SmallBitVector TargetARM32::TypeToRegisterSet[RegARM32::RCARM32_NUM];	6800 SmallBitVector TargetARM32::TypeToRegisterSet[RegARM32::RCARM32_NUM];

6804 llvm::SmallBitVector	6801 SmallBitVector TargetARM32::TypeToRegisterSetUnfiltered[RegARM32::RCARM32_NUM];

6805 TargetARM32::TypeToRegisterSetUnfiltered[RegARM32::RCARM32_NUM];	6802 SmallBitVector TargetARM32::RegisterAliases[RegARM32::Reg_NUM];

6806 llvm::SmallBitVector TargetARM32::RegisterAliases[RegARM32::Reg_NUM];

6807	6803

6808 } // end of namespace ARM32	6804 } // end of namespace ARM32

6809 } // end of namespace Ice	6805 } // end of namespace Ice

OLD	NEW

« no previous file with comments | « src/IceTargetLoweringARM32.h ('k') | src/IceTargetLoweringMIPS32.h » ('j') | no next file with comments »