Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(304)

Side by Side Diff: src/IceTargetLoweringARM32.cpp

Issue 1738443002: Subzero. Performance tweaks. (Closed) Base URL: https://chromium.googlesource.com/native_client/pnacl-subzero.git@master
Patch Set: Addresses comments -- all of them Created 4 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/IceTargetLoweringARM32.h ('k') | src/IceTargetLoweringMIPS32.h » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 //===- subzero/src/IceTargetLoweringARM32.cpp - ARM32 lowering ------------===// 1 //===- subzero/src/IceTargetLoweringARM32.cpp - ARM32 lowering ------------===//
2 // 2 //
3 // The Subzero Code Generator 3 // The Subzero Code Generator
4 // 4 //
5 // This file is distributed under the University of Illinois Open Source 5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details. 6 // License. See LICENSE.TXT for details.
7 // 7 //
8 //===----------------------------------------------------------------------===// 8 //===----------------------------------------------------------------------===//
9 /// 9 ///
10 /// \file 10 /// \file
(...skipping 283 matching lines...) Expand 10 before | Expand all | Expand 10 after
294 294
295 } // end of anonymous namespace 295 } // end of anonymous namespace
296 296
297 TargetARM32::TargetARM32(Cfg *Func) 297 TargetARM32::TargetARM32(Cfg *Func)
298 : TargetLowering(Func), NeedSandboxing(SandboxingType == ST_NaCl), 298 : TargetLowering(Func), NeedSandboxing(SandboxingType == ST_NaCl),
299 CPUFeatures(Func->getContext()->getFlags()) {} 299 CPUFeatures(Func->getContext()->getFlags()) {}
300 300
301 void TargetARM32::staticInit(GlobalContext *Ctx) { 301 void TargetARM32::staticInit(GlobalContext *Ctx) {
302 RegNumT::setLimit(RegARM32::Reg_NUM); 302 RegNumT::setLimit(RegARM32::Reg_NUM);
303 // Limit this size (or do all bitsets need to be the same width)??? 303 // Limit this size (or do all bitsets need to be the same width)???
304 llvm::SmallBitVector IntegerRegisters(RegARM32::Reg_NUM); 304 SmallBitVector IntegerRegisters(RegARM32::Reg_NUM);
305 llvm::SmallBitVector I64PairRegisters(RegARM32::Reg_NUM); 305 SmallBitVector I64PairRegisters(RegARM32::Reg_NUM);
306 llvm::SmallBitVector Float32Registers(RegARM32::Reg_NUM); 306 SmallBitVector Float32Registers(RegARM32::Reg_NUM);
307 llvm::SmallBitVector Float64Registers(RegARM32::Reg_NUM); 307 SmallBitVector Float64Registers(RegARM32::Reg_NUM);
308 llvm::SmallBitVector VectorRegisters(RegARM32::Reg_NUM); 308 SmallBitVector VectorRegisters(RegARM32::Reg_NUM);
309 llvm::SmallBitVector QtoSRegisters(RegARM32::Reg_NUM); 309 SmallBitVector QtoSRegisters(RegARM32::Reg_NUM);
310 llvm::SmallBitVector InvalidRegisters(RegARM32::Reg_NUM); 310 SmallBitVector InvalidRegisters(RegARM32::Reg_NUM);
311 const unsigned EncodedReg_q8 = RegARM32::RegTable[RegARM32::Reg_q8].Encoding; 311 const unsigned EncodedReg_q8 = RegARM32::RegTable[RegARM32::Reg_q8].Encoding;
312 for (int i = 0; i < RegARM32::Reg_NUM; ++i) { 312 for (int i = 0; i < RegARM32::Reg_NUM; ++i) {
313 const auto &Entry = RegARM32::RegTable[i]; 313 const auto &Entry = RegARM32::RegTable[i];
314 IntegerRegisters[i] = Entry.IsInt; 314 IntegerRegisters[i] = Entry.IsInt;
315 I64PairRegisters[i] = Entry.IsI64Pair; 315 I64PairRegisters[i] = Entry.IsI64Pair;
316 Float32Registers[i] = Entry.IsFP32; 316 Float32Registers[i] = Entry.IsFP32;
317 Float64Registers[i] = Entry.IsFP64; 317 Float64Registers[i] = Entry.IsFP64;
318 VectorRegisters[i] = Entry.IsVec128; 318 VectorRegisters[i] = Entry.IsVec128;
319 RegisterAliases[i].resize(RegARM32::Reg_NUM); 319 RegisterAliases[i].resize(RegARM32::Reg_NUM);
320 // TODO(eholk): It would be better to store a QtoS flag in the 320 // TODO(eholk): It would be better to store a QtoS flag in the
(...skipping 596 matching lines...) Expand 10 before | Expand all | Expand 10 after
917 // We makeReg() here instead of getPhysicalRegister() because the latter ends 917 // We makeReg() here instead of getPhysicalRegister() because the latter ends
918 // up creating multi-blocks temporaries that liveness fails to validate. 918 // up creating multi-blocks temporaries that liveness fails to validate.
919 auto *PC = makeReg(IceType_i32, RegARM32::Reg_pc); 919 auto *PC = makeReg(IceType_i32, RegARM32::Reg_pc);
920 920
921 auto *AddPcReloc = RelocOffset::create(Ctx); 921 auto *AddPcReloc = RelocOffset::create(Ctx);
922 AddPcReloc->setSubtract(true); 922 AddPcReloc->setSubtract(true);
923 auto *AddPcLabel = InstARM32Label::create(Func, this); 923 auto *AddPcLabel = InstARM32Label::create(Func, this);
924 AddPcLabel->setRelocOffset(AddPcReloc); 924 AddPcLabel->setRelocOffset(AddPcReloc);
925 925
926 const IceString EmitText = Name; 926 const IceString EmitText = Name;
927 // We need a -8 in the relocation expression to account for the pc's value
928 // read by the first instruction emitted in Finish(PC).
929 auto *Imm8 = RelocOffset::create(Ctx, -8);
930 927
931 auto *MovwReloc = RelocOffset::create(Ctx); 928 auto *MovwReloc = RelocOffset::create(Ctx);
932 auto *MovwLabel = InstARM32Label::create(Func, this); 929 auto *MovwLabel = InstARM32Label::create(Func, this);
933 MovwLabel->setRelocOffset(MovwReloc); 930 MovwLabel->setRelocOffset(MovwReloc);
934 931
935 auto *MovtReloc = RelocOffset::create(Ctx); 932 auto *MovtReloc = RelocOffset::create(Ctx);
936 auto *MovtLabel = InstARM32Label::create(Func, this); 933 auto *MovtLabel = InstARM32Label::create(Func, this);
937 MovtLabel->setRelocOffset(MovtReloc); 934 MovtLabel->setRelocOffset(MovtReloc);
938 935
939 // The EmitString for these constant relocatables have hardcoded offsets 936 // The EmitString for these constant relocatables have hardcoded offsets
940 // attached to them. This could be dangerous if, e.g., we ever implemented 937 // attached to them. This could be dangerous if, e.g., we ever implemented
941 // instruction scheduling but llvm-mc currently does not support 938 // instruction scheduling but llvm-mc currently does not support
942 // 939 //
943 // movw reg, #:lower16:(Symbol - Label - Number) 940 // movw reg, #:lower16:(Symbol - Label - Number)
944 // movt reg, #:upper16:(Symbol - Label - Number) 941 // movt reg, #:upper16:(Symbol - Label - Number)
945 // 942 //
946 // relocations. 943 // relocations.
947 auto *CRLower = Ctx->getConstantSym({MovwReloc, AddPcReloc, Imm8}, Name, 944 static constexpr RelocOffsetT PcOffset = -8;
945 auto *CRLower = Ctx->getConstantSym(PcOffset, {MovwReloc, AddPcReloc}, Name,
948 EmitText + " -16", SuppressMangling); 946 EmitText + " -16", SuppressMangling);
949 auto *CRUpper = Ctx->getConstantSym({MovtReloc, AddPcReloc, Imm8}, Name, 947 auto *CRUpper = Ctx->getConstantSym(PcOffset, {MovtReloc, AddPcReloc}, Name,
950 EmitText + " -12", SuppressMangling); 948 EmitText + " -12", SuppressMangling);
951 949
952 Context.insert(MovwLabel); 950 Context.insert(MovwLabel);
953 _movw(Register, CRLower); 951 _movw(Register, CRLower);
954 Context.insert(MovtLabel); 952 Context.insert(MovtLabel);
955 _movt(Register, CRUpper); 953 _movt(Register, CRUpper);
956 // PC = fake-def to keep liveness consistent. 954 // PC = fake-def to keep liveness consistent.
957 Context.insert<InstFakeDef>(PC); 955 Context.insert<InstFakeDef>(PC);
958 Context.insert(AddPcLabel); 956 Context.insert(AddPcLabel);
959 Finish(PC); 957 Finish(PC);
(...skipping 473 matching lines...) Expand 10 before | Expand all | Expand 10 after
1433 // registers (as a side effect, this gives variables a second chance at 1431 // registers (as a side effect, this gives variables a second chance at
1434 // physical register assignment). 1432 // physical register assignment).
1435 // 1433 //
1436 // A middle ground approach is to leverage sparsity and allocate one block of 1434 // A middle ground approach is to leverage sparsity and allocate one block of
1437 // space on the frame for globals (variables with multi-block lifetime), and 1435 // space on the frame for globals (variables with multi-block lifetime), and
1438 // one block to share for locals (single-block lifetime). 1436 // one block to share for locals (single-block lifetime).
1439 1437
1440 Context.init(Node); 1438 Context.init(Node);
1441 Context.setInsertPoint(Context.getCur()); 1439 Context.setInsertPoint(Context.getCur());
1442 1440
1443 llvm::SmallBitVector CalleeSaves = 1441 SmallBitVector CalleeSaves = getRegisterSet(RegSet_CalleeSave, RegSet_None);
1444 getRegisterSet(RegSet_CalleeSave, RegSet_None); 1442 RegsUsed = SmallBitVector(CalleeSaves.size());
1445 RegsUsed = llvm::SmallBitVector(CalleeSaves.size());
1446 VarList SortedSpilledVariables; 1443 VarList SortedSpilledVariables;
1447 size_t GlobalsSize = 0; 1444 size_t GlobalsSize = 0;
1448 // If there is a separate locals area, this represents that area. Otherwise 1445 // If there is a separate locals area, this represents that area. Otherwise
1449 // it counts any variable not counted by GlobalsSize. 1446 // it counts any variable not counted by GlobalsSize.
1450 SpillAreaSizeBytes = 0; 1447 SpillAreaSizeBytes = 0;
1451 // If there is a separate locals area, this specifies the alignment for it. 1448 // If there is a separate locals area, this specifies the alignment for it.
1452 uint32_t LocalsSlotsAlignmentBytes = 0; 1449 uint32_t LocalsSlotsAlignmentBytes = 0;
1453 // The entire spill locations area gets aligned to largest natural alignment 1450 // The entire spill locations area gets aligned to largest natural alignment
1454 // of the variables that have a spill slot. 1451 // of the variables that have a spill slot.
1455 uint32_t SpillAreaAlignmentBytes = 0; 1452 uint32_t SpillAreaAlignmentBytes = 0;
(...skipping 35 matching lines...) Expand 10 before | Expand all | Expand 10 after
1491 } 1488 }
1492 if (!MaybeLeafFunc) { 1489 if (!MaybeLeafFunc) {
1493 CalleeSaves[RegARM32::Reg_lr] = true; 1490 CalleeSaves[RegARM32::Reg_lr] = true;
1494 RegsUsed[RegARM32::Reg_lr] = true; 1491 RegsUsed[RegARM32::Reg_lr] = true;
1495 } 1492 }
1496 1493
1497 // Make two passes over the used registers. The first pass records all the 1494 // Make two passes over the used registers. The first pass records all the
1498 // used registers -- and their aliases. Then, we figure out which GPRs and 1495 // used registers -- and their aliases. Then, we figure out which GPRs and
1499 // VFP S registers should be saved. We don't bother saving D/Q registers 1496 // VFP S registers should be saved. We don't bother saving D/Q registers
1500 // because their uses are recorded as S regs uses. 1497 // because their uses are recorded as S regs uses.
1501 llvm::SmallBitVector ToPreserve(RegARM32::Reg_NUM); 1498 SmallBitVector ToPreserve(RegARM32::Reg_NUM);
1502 for (SizeT i = 0; i < CalleeSaves.size(); ++i) { 1499 for (SizeT i = 0; i < CalleeSaves.size(); ++i) {
1503 if (NeedSandboxing && i == RegARM32::Reg_r9) { 1500 if (NeedSandboxing && i == RegARM32::Reg_r9) {
1504 // r9 is never updated in sandboxed code. 1501 // r9 is never updated in sandboxed code.
1505 continue; 1502 continue;
1506 } 1503 }
1507 if (CalleeSaves[i] && RegsUsed[i]) { 1504 if (CalleeSaves[i] && RegsUsed[i]) {
1508 ToPreserve |= RegisterAliases[i]; 1505 ToPreserve |= RegisterAliases[i];
1509 } 1506 }
1510 } 1507 }
1511 1508
(...skipping 624 matching lines...) Expand 10 before | Expand all | Expand 10 after
2136 } 2133 }
2137 Variable *BaseR = legalizeToReg(Base); 2134 Variable *BaseR = legalizeToReg(Base);
2138 return OperandARM32Mem::create(Func, SplitType, BaseR, Offset, 2135 return OperandARM32Mem::create(Func, SplitType, BaseR, Offset,
2139 Mem->getAddrMode()); 2136 Mem->getAddrMode());
2140 } 2137 }
2141 } 2138 }
2142 llvm::report_fatal_error("Unsupported operand type"); 2139 llvm::report_fatal_error("Unsupported operand type");
2143 return nullptr; 2140 return nullptr;
2144 } 2141 }
2145 2142
2146 llvm::SmallBitVector TargetARM32::getRegisterSet(RegSetMask Include, 2143 SmallBitVector TargetARM32::getRegisterSet(RegSetMask Include,
2147 RegSetMask Exclude) const { 2144 RegSetMask Exclude) const {
2148 llvm::SmallBitVector Registers(RegARM32::Reg_NUM); 2145 SmallBitVector Registers(RegARM32::Reg_NUM);
2149 2146
2150 for (uint32_t i = 0; i < RegARM32::Reg_NUM; ++i) { 2147 for (uint32_t i = 0; i < RegARM32::Reg_NUM; ++i) {
2151 const auto &Entry = RegARM32::RegTable[i]; 2148 const auto &Entry = RegARM32::RegTable[i];
2152 if (Entry.Scratch && (Include & RegSet_CallerSave)) 2149 if (Entry.Scratch && (Include & RegSet_CallerSave))
2153 Registers[i] = true; 2150 Registers[i] = true;
2154 if (Entry.Preserved && (Include & RegSet_CalleeSave)) 2151 if (Entry.Preserved && (Include & RegSet_CalleeSave))
2155 Registers[i] = true; 2152 Registers[i] = true;
2156 if (Entry.StackPtr && (Include & RegSet_StackPointer)) 2153 if (Entry.StackPtr && (Include & RegSet_StackPointer))
2157 Registers[i] = true; 2154 Registers[i] = true;
2158 if (Entry.FramePtr && (Include & RegSet_FramePointer)) 2155 if (Entry.FramePtr && (Include & RegSet_FramePointer))
(...skipping 3887 matching lines...) Expand 10 before | Expand all | Expand 10 after
6046 6043
6047 void TargetARM32::postLower() { 6044 void TargetARM32::postLower() {
6048 if (Ctx->getFlags().getOptLevel() == Opt_m1) 6045 if (Ctx->getFlags().getOptLevel() == Opt_m1)
6049 return; 6046 return;
6050 markRedefinitions(); 6047 markRedefinitions();
6051 Context.availabilityUpdate(); 6048 Context.availabilityUpdate();
6052 } 6049 }
6053 6050
6054 void TargetARM32::makeRandomRegisterPermutation( 6051 void TargetARM32::makeRandomRegisterPermutation(
6055 llvm::SmallVectorImpl<RegNumT> &Permutation, 6052 llvm::SmallVectorImpl<RegNumT> &Permutation,
6056 const llvm::SmallBitVector &ExcludeRegisters, uint64_t Salt) const { 6053 const SmallBitVector &ExcludeRegisters, uint64_t Salt) const {
6057 (void)Permutation; 6054 (void)Permutation;
6058 (void)ExcludeRegisters; 6055 (void)ExcludeRegisters;
6059 (void)Salt; 6056 (void)Salt;
6060 UnimplementedError(Func->getContext()->getFlags()); 6057 UnimplementedError(Func->getContext()->getFlags());
6061 } 6058 }
6062 6059
6063 void TargetARM32::emit(const ConstantInteger32 *C) const { 6060 void TargetARM32::emit(const ConstantInteger32 *C) const {
6064 if (!BuildDefs::dump()) 6061 if (!BuildDefs::dump())
6065 return; 6062 return;
6066 Ostream &Str = Ctx->getStrEmit(); 6063 Ostream &Str = Ctx->getStrEmit();
(...skipping 726 matching lines...) Expand 10 before | Expand all | Expand 10 after
6793 << ".eabi_attribute 42, 1 @ Tag_MPextension_use\n" 6790 << ".eabi_attribute 42, 1 @ Tag_MPextension_use\n"
6794 << ".eabi_attribute 68, 1 @ Tag_Virtualization_use\n"; 6791 << ".eabi_attribute 68, 1 @ Tag_Virtualization_use\n";
6795 if (CPUFeatures.hasFeature(TargetARM32Features::HWDivArm)) { 6792 if (CPUFeatures.hasFeature(TargetARM32Features::HWDivArm)) {
6796 Str << ".eabi_attribute 44, 2 @ Tag_DIV_use\n"; 6793 Str << ".eabi_attribute 44, 2 @ Tag_DIV_use\n";
6797 } 6794 }
6798 // Technically R9 is used for TLS with Sandboxing, and we reserve it. 6795 // Technically R9 is used for TLS with Sandboxing, and we reserve it.
6799 // However, for compatibility with current NaCl LLVM, don't claim that. 6796 // However, for compatibility with current NaCl LLVM, don't claim that.
6800 Str << ".eabi_attribute 14, 3 @ Tag_ABI_PCS_R9_use: Not used\n"; 6797 Str << ".eabi_attribute 14, 3 @ Tag_ABI_PCS_R9_use: Not used\n";
6801 } 6798 }
6802 6799
6803 llvm::SmallBitVector TargetARM32::TypeToRegisterSet[RegARM32::RCARM32_NUM]; 6800 SmallBitVector TargetARM32::TypeToRegisterSet[RegARM32::RCARM32_NUM];
6804 llvm::SmallBitVector 6801 SmallBitVector TargetARM32::TypeToRegisterSetUnfiltered[RegARM32::RCARM32_NUM];
6805 TargetARM32::TypeToRegisterSetUnfiltered[RegARM32::RCARM32_NUM]; 6802 SmallBitVector TargetARM32::RegisterAliases[RegARM32::Reg_NUM];
6806 llvm::SmallBitVector TargetARM32::RegisterAliases[RegARM32::Reg_NUM];
6807 6803
6808 } // end of namespace ARM32 6804 } // end of namespace ARM32
6809 } // end of namespace Ice 6805 } // end of namespace Ice
OLDNEW
« no previous file with comments | « src/IceTargetLoweringARM32.h ('k') | src/IceTargetLoweringMIPS32.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698