Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(5)

Unified Diff: src/arm/assembler-arm.cc

Issue 595023: ARM optimize loading of immediates. (Closed)
Patch Set: Created 10 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « src/arm/assembler-arm.h ('k') | src/arm/simulator-arm.cc » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: src/arm/assembler-arm.cc
diff --git a/src/arm/assembler-arm.cc b/src/arm/assembler-arm.cc
old mode 100644
new mode 100755
index c79aac656978de87f3ca66429b57d0f3a98415d7..0d55911ba65689574abd7989ce71b37a6f1fe812
--- a/src/arm/assembler-arm.cc
+++ b/src/arm/assembler-arm.cc
@@ -584,25 +584,111 @@ void Assembler::next(Label* L) {
}
}
+// Low-level code emission routines depending on the addressing mode
+
+
+// Find the index of a single bit in a word with one bit set.
+// I.e., calculate the log-base-2 of a power of 2.
+static inline int BitIndex(uint32_t bit) {
+ ASSERT_NE(0, bit);
+ ASSERT(IsPowerOf2(bit));
+ int res = 0;
+ if ((bit >> 16) != 0) {
+ bit = bit >> 16;
+ res += 16;
+ }
+ if ((bit & 0xff) == 0) {
+ bit = bit >> 8;
+ res += 8;
+ }
+ if ((bit & 0xf) == 0) {
+ bit = bit >> 4;
+ res += 4;
+ }
+ if ((bit & 0x03) == 0) {
+ bit = bit >> 2;
+ res += 2;
+ }
+ if (bit == 2) {
+ res += 1;
+ }
+ return res;
+}
+
-// Low-level code emission routines depending on the addressing mode.
static bool fits_shifter(uint32_t imm32,
uint32_t* rotate_imm,
- uint32_t* immed_8,
- Instr* instr) {
- // imm32 must be unsigned.
- for (int rot = 0; rot < 16; rot++) {
- uint32_t imm8 = (imm32 << 2*rot) | (imm32 >> (32 - 2*rot));
- if ((imm8 <= 0xff)) {
- *rotate_imm = rot;
- *immed_8 = imm8;
- return true;
+ uint32_t* immed_8) {
+ if (imm32 <= 255) {
+ // Respond quickly to all small numbers (includes zero).
+ *immed_8 = imm32;
+ *rotate_imm = 0;
+ return true;
+ }
+ // Find the first non-zero (aligned) bit.
+ uint32_t firstbit = imm32 - (imm32 & (imm32 - 1));
+ // Check whether an 8-bit immediate starting at that bit index can represent
+ // imm32.
+ if (firstbit > (imm32 >> 8)) {
+ // Fits in 8 bits plus shift.
+ int bit_index = BitIndex(firstbit);
+ // Prefer even positions if possible.
+ if ((bit_index & 1) != 0 && firstbit > (imm32 >> 7)) {
+ }
+ *immed_8 = imm32 >> bit_index;
+ *rotate_imm = (32 - bit_index);
+ return true;
+ }
+ // Check for an 8-bit range that wraps.
+ uint32_t rotated = (imm32 << 16) | (imm32 >> 16); // Rotate 16.
+ firstbit = rotated - (rotated & (rotated - 1));
+ if (firstbit > (rotated >> 8)) {
+ // Fits in shifter.
+ int bit_index = BitIndex(firstbit);
+ if ((bit_index & 1) != 0 && firstbit > (rotated >> 7)) {
+ bit_index -= 1;
}
+ *immed_8 = rotated >> bit_index;
+ *rotate_imm = (48 - bit_index) & 0x1F;
+ return true;
}
- // If the opcode is mov or mvn and if ~imm32 fits, change the opcode.
- if (instr != NULL && (*instr & 0xd*B21) == 0xd*B21) {
- if (fits_shifter(~imm32, rotate_imm, immed_8, NULL)) {
- *instr ^= 0x2*B21;
+ return false;
+}
+
+
+// Check if an immediate can be represented as two rotated 8-bit constants.
+static bool fits_shifter(uint32_t imm32,
+ uint32_t* rotate_imm,
+ uint32_t* immed_8,
+ uint32_t* rotate_imm_2,
+ uint32_t* immed_8_2) {
+ const uint32_t kEvenBitsMask = 0x55555555;
+ // Smear bits to even positions to ensure that rotations are even numbered.
+ uint32_t aligned = imm32 | ((imm32 >> 1) & kEvenBitsMask);
+ // Try rotating the first byte around to the end to see if a range can be
+ // found using bits from both ends.
+ for (int i = 0; i < 8; i += 2) {
+ uint32_t rotated = (aligned >> i) | (i > 0 ? (aligned << (32 - i)) : 0);
+ uint32_t lowbit1 = rotated ^ (rotated & (rotated - 1));
+ rotated &= ~((lowbit1 << 8) - 1);
+ ASSERT_NE(0, rotated); // Otherwise we would have fit in one shifter op!
+ uint32_t lowbit2 = rotated ^ (rotated & (rotated - 1));
+ rotated &= ~((lowbit2 << 8) - 1);
+ if (rotated == 0) {
+ // Found two 8-bit sequences at even offsets that contain all the bits
+ // of imm32.
+ int bi1 = BitIndex(lowbit1) + i;
+ int bi2 = BitIndex(lowbit2) + i;
+ ASSERT(bi2 < 32); // Otherwise we would have matched at i==0.
+ ASSERT(bi1 <= bi2 - 8); // Ranges non-overlapping.
+ *immed_8 = (imm32 >> bi1) & 0xFF;
+ uint32_t imm2 = (imm32 >> bi2);
+ if (bi2 > 24) {
+ imm2 |= imm32 << (32 - bi2);
+ }
+ *immed_8_2 = imm2 & 0xFF;
+ *rotate_imm = (32 - bi1) & 0x1f;
+ *rotate_imm_2 = (32 - bi2) & 0x1f;
return true;
}
}
@@ -610,11 +696,11 @@ static bool fits_shifter(uint32_t imm32,
}
-// We have to use the temporary register for things that can be relocated even
+// We have to use the constant pool for things that can be relocated even
// if they can be encoded in the ARM's 12 bits of immediate-offset instruction
// space. There is no guarantee that the relocated location can be similarly
// encoded.
-static bool MustUseIp(RelocInfo::Mode rmode) {
+static bool MustUseConstantPool(RelocInfo::Mode rmode) {
if (rmode == RelocInfo::EXTERNAL_REFERENCE) {
#ifdef DEBUG
if (!Serializer::enabled()) {
@@ -629,26 +715,155 @@ static bool MustUseIp(RelocInfo::Mode rmode) {
}
+// Try to fit immediate value into one or two immediate operations.
+// Updates the instr and emits up to two instructions previous to it
+// (with the same condition as instr, and leaving the flags unchanged).
+// If the immediate value can directly be represented as an 8-bit constant
+// rotated an even number of bits, it can be inlined in the instruction.
+// If it is an 8-bit constant rotated an odd number of bits, or if
+// the negation is a rotated 8-bit constant, load it using a mov/mvn
+// and rotate the resulting register in the instruction.
+// If the immediate value, or its negation, can be created by combining two
+// (evenly) rotated 8-bit values, then load it using a mov/orr or mvn/bic
+// sequence (if the instruction is a move, use its destination directly
+// as the destination of the orr or bic instruction).
+// If none of this works, return false.
+// If addrmode1 is false, the instruction may only use an 8-ROR-4 immediate or
+// a simple register, not a shifted register. Also, don't try to match the
+// instruction opcode against the addrmode1 instructions for optimizations.
+bool Assembler::fit_to_shifter(Instr* instr_address,
+ uint32_t imm32,
+ bool addrmode1) {
+ // Modify instruction locally until we are sure we have a fit.
+ Instr instr = *instr_address;
+ ASSERT_EQ(0, instr & ~(CondMask | OpCodeMask | S));
+
+ // Normalize moves to only be mov, not mvn, for simplicity.
+ if (addrmode1 && (instr & OpCodeMask) == MVN) {
+ imm32 = ~imm32;
+ instr ^= MOV ^ MVN;
+ }
+
+ uint32_t ror_8;
+ uint32_t immed_8;
+
+ if (fits_shifter(imm32, &ror_8, &immed_8)) {
+ if ((ror_8 & 1) == 0) {
+ // Even rotation count can be represented directly.
+ *instr_address = instr | I | ror_8 * B7 | immed_8;
+ return true;
+ } else if (addrmode1) {
+ // Odd rotation can't be represented directly, so load into ip and
+ // rotate the last bit in a register shift operand.
+ // mov ip, imm32 ROL 1
+ Instr cond = instr & CondMask;
+ emit(cond | I | MOV | ip.code() * B12 | (ror_8 >> 1) * B8 | immed_8);
+ // Use (ip ROR 1) as operand.
+ *instr_address = instr | ROR | 1 * B7 | ip.code();
+ return true;
+ }
+ }
+ // Try negating the immediate to use mvn to load it.
+ if (fits_shifter(~imm32, &ror_8, &immed_8)) {
+ Instr opcode = instr & OpCodeMask;
+ if ((ror_8 & 1) == 0 && addrmode1) {
+ if (opcode == MOV) {
+ *instr_address = (instr ^ (MVN ^ MOV)) | I | ror_8 * B7 | immed_8;
+ return true;
+ }
+ if (opcode == AND || opcode == BIC) {
+ *instr_address = (instr ^ (AND ^ BIC)) | I | ror_8 * B7 | immed_8;
+ return true;
+ }
+ }
+ // Emit mov ip,~imm32 to get value into register.
+ if ((ror_8 & 1) == 0 || addrmode1) {
+ Instr cond = instr & CondMask;
+ emit(cond | I | MVN | ip.code() * B12 | (ror_8 >> 1) * B8 | immed_8);
+ instr |= ip.code();
+ if ((ror_8 & 1) != 0) {
+ instr |= ROR | 1 * B7;
+ }
+ *instr_address = instr;
+ return true;
+ }
+ }
+
+ // TODO(lrn): If supported, use MOVT, MOVW to always load top and low bits in
+ // two operations, or a single 16-bit rotated constant in one mov and using
+ // a rotated register as operand.
+
+ // Try combining two shifter operands into imm32 using orr.
+ uint32_t ror_8_2;
+ uint32_t immed_8_2;
+ if (fits_shifter(imm32, &ror_8, &immed_8, &ror_8_2, &immed_8_2)) {
+ ASSERT_NE(0, immed_8_2);
+ ASSERT_EQ(0, ror_8 & 1);
+ ASSERT_EQ(0, ror_8_2 & 1);
+ Instr imm1 = (ror_8 >> 1) * B8 | immed_8;
+ Instr imm2 = (ror_8_2 >> 1) * B8 | immed_8_2;
+
+ // Create constant using mov+orr of two rotated 8-bit immediates.
+ Instr cond = instr & CondMask;
+ emit(cond | I | MOV | ip.code() * B12 | imm1);
+ if (addrmode1 && (instr & OpCodeMask) == MOV) {
+ // Convert mov to orr-instruction.
+ *instr_address = (instr ^ (MOV ^ ORR)) | ip.code() * B16 | I | imm2;
+ return true;
+ }
+ // Create new orr isntruction
+ emit(cond | ORR | ip.code() * (B16 + B12) | I | imm2);
+ *instr_address = instr | ip.code();
+ return true;
+ }
+ // Try again negating imm32, using mvn and bic to load the inverted result
+ // instead of mov and orr.
+ if (fits_shifter(~imm32, &ror_8, &immed_8, &ror_8_2, &immed_8_2)) {
+ // Create constant using mvn+bic of two rotated 8-bit immediates.
+ ASSERT_NE(0, immed_8_2);
+ ASSERT_EQ(0, ror_8 & 1);
+ ASSERT_EQ(0, ror_8_2 & 1);
+ Instr imm1 = (ror_8 >> 1) * B8 | immed_8;
+ Instr imm2 = (ror_8_2 >> 1) * B8 | immed_8_2;
+
+ Instr cond = instr & CondMask;
+ emit(cond | I | MVN | ip.code() * B12 | imm1);
+ if (addrmode1 && (instr & OpCodeMask) == MOV) {
+ // Convert mov to bic-instruction.
+ *instr_address = (instr ^ (MOV ^ BIC)) | ip.code() * B16 | I | imm2;
+ return true;
+ }
+ // Create new orr instruction and use ip as operand.
+ emit(cond | BIC | ip.code() * (B16 + B12) | I | imm2);
+ *instr_address = instr | ip.code();
+ return true;
+ }
+ return false;
+}
+
+
void Assembler::addrmod1(Instr instr,
Register rn,
Register rd,
const Operand& x) {
+ // Constants.
CheckBuffer();
- ASSERT((instr & ~(CondMask | OpCodeMask | S)) == 0);
+ ASSERT_EQ(0, (instr & ~(CondMask | OpCodeMask | S)));
+ ASSERT(((instr & OpCodeMask) != MOV && (instr & OpCodeMask) != MVN) ||
+ rn.is(r0));
if (!x.rm_.is_valid()) {
- // Immediate.
- uint32_t rotate_imm;
- uint32_t immed_8;
- if (MustUseIp(x.rmode_) ||
- !fits_shifter(x.imm32_, &rotate_imm, &immed_8, &instr)) {
- // The immediate operand cannot be encoded as a shifter operand, so load
- // it first to register ip and change the original instruction to use ip.
+ // immediate
+ bool must_use_pool = MustUseConstantPool(x.rmode_);
+ if (must_use_pool || !fit_to_shifter(&instr, x.imm32_)) {
+ // The immediate operand cannot be encoded as a shifter operand, or as
+ // a simple combination of shifter operands, so load it first to register
+ // ip and change the original instruction to use ip.
// However, if the original instruction is a 'mov rd, x' (not setting the
// condition code), then replace it with a 'ldr rd, [pc]'.
RecordRelocInfo(x.rmode_, x.imm32_);
CHECK(!rn.is(ip)); // rn should never be ip, or will be trashed
Condition cond = static_cast<Condition>(instr & CondMask);
- if ((instr & ~CondMask) == 13*B21) { // mov, S not set
+ if ((instr & ~CondMask) == MOV) { // mov, S not set
ldr(rd, MemOperand(pc, 0), cond);
} else {
ldr(ip, MemOperand(pc, 0), cond);
@@ -656,7 +871,6 @@ void Assembler::addrmod1(Instr instr,
}
return;
}
- instr |= I | rotate_imm*B8 | immed_8;
} else if (!x.rs_.is_valid()) {
// Immediate shift.
instr |= x.shift_imm_*B7 | x.shift_op_ | x.rm_.code();
@@ -1075,18 +1289,15 @@ void Assembler::msr(SRegisterFieldMask fields, const Operand& src,
ASSERT(fields >= B16 && fields < B20); // at least one field set
Instr instr;
if (!src.rm_.is_valid()) {
- // Immediate.
- uint32_t rotate_imm;
- uint32_t immed_8;
- if (MustUseIp(src.rmode_) ||
- !fits_shifter(src.imm32_, &rotate_imm, &immed_8, NULL)) {
- // Immediate operand cannot be encoded, load it first to register ip.
+ // immediate
+ if (MustUseConstantPool(src.rmode_) ||
+ !fit_to_shifter(&instr, src.imm32_, false)) {
+ // immediate operand cannot be encoded, load it first to register ip
RecordRelocInfo(src.rmode_, src.imm32_);
ldr(ip, MemOperand(pc, 0), cond);
msr(fields, Operand(ip), cond);
return;
}
- instr = I | rotate_imm*B8 | immed_8;
} else {
ASSERT(!src.rs_.is_valid() && src.shift_imm_ == 0); // only rm allowed
instr = src.rm_.code();
@@ -1627,9 +1838,9 @@ void Assembler::lea(Register dst,
bool Assembler::ImmediateFitsAddrMode1Instruction(int32_t imm32) {
- uint32_t dummy1;
- uint32_t dummy2;
- return fits_shifter(imm32, &dummy1, &dummy2, NULL);
+ uint32_t rotate;
+ uint32_t immediate;
+ return fits_shifter(imm32, &rotate, &immediate) && (rotate & 1) == 0;
}
« no previous file with comments | « src/arm/assembler-arm.h ('k') | src/arm/simulator-arm.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698