src/arm/assembler-arm.cc - Issue 595023: ARM optimize loading of immediates.

Unified Diff: src/arm/assembler-arm.cc

Issue 595023: ARM optimize loading of immediates. (Closed)

Patch Set: Created 10 years, 10 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Index: src/arm/assembler-arm.cc

diff --git a/src/arm/assembler-arm.cc b/src/arm/assembler-arm.cc

old mode 100644

new mode 100755

index c79aac656978de87f3ca66429b57d0f3a98415d7..0d55911ba65689574abd7989ce71b37a6f1fe812

--- a/src/arm/assembler-arm.cc

+++ b/src/arm/assembler-arm.cc

@@ -584,25 +584,111 @@ void Assembler::next(Label* L) {

}

+// Low-level code emission routines depending on the addressing mode

+// Find the index of a single bit in a word with one bit set.

+// I.e., calculate the log-base-2 of a power of 2.

+static inline int BitIndex(uint32_t bit) {

+ ASSERT_NE(0, bit);

+ ASSERT(IsPowerOf2(bit));

+ int res = 0;

+ if ((bit >> 16) != 0) {

+ bit = bit >> 16;

+ res += 16;

+ }

+ if ((bit & 0xff) == 0) {

+ bit = bit >> 8;

+ res += 8;

+ }

+ if ((bit & 0xf) == 0) {

+ bit = bit >> 4;

+ res += 4;

+ }

+ if ((bit & 0x03) == 0) {

+ bit = bit >> 2;

+ res += 2;

+ }

+ if (bit == 2) {

+ res += 1;

+ }

+ return res;

-// Low-level code emission routines depending on the addressing mode.

static bool fits_shifter(uint32_t imm32,

uint32_t* rotate_imm,

- uint32_t* immed_8,

- Instr* instr) {

- // imm32 must be unsigned.

- for (int rot = 0; rot < 16; rot++) {

- uint32_t imm8 = (imm32 << 2*rot) | (imm32 >> (32 - 2*rot));

- if ((imm8 <= 0xff)) {

- *rotate_imm = rot;

- *immed_8 = imm8;

- return true;

+ uint32_t* immed_8) {

+ if (imm32 <= 255) {

+ // Respond quickly to all small numbers (includes zero).

+ *immed_8 = imm32;

+ *rotate_imm = 0;

+ return true;

+ }

+ // Find the first non-zero (aligned) bit.

+ uint32_t firstbit = imm32 - (imm32 & (imm32 - 1));

+ // Check whether an 8-bit immediate starting at that bit index can represent

+ // imm32.

+ if (firstbit > (imm32 >> 8)) {

+ // Fits in 8 bits plus shift.

+ int bit_index = BitIndex(firstbit);

+ // Prefer even positions if possible.

+ if ((bit_index & 1) != 0 && firstbit > (imm32 >> 7)) {

+ }

+ *immed_8 = imm32 >> bit_index;

+ *rotate_imm = (32 - bit_index);

+ return true;

+ }

+ // Check for an 8-bit range that wraps.

+ uint32_t rotated = (imm32 << 16) | (imm32 >> 16); // Rotate 16.

+ firstbit = rotated - (rotated & (rotated - 1));

+ if (firstbit > (rotated >> 8)) {

+ // Fits in shifter.

+ int bit_index = BitIndex(firstbit);

+ if ((bit_index & 1) != 0 && firstbit > (rotated >> 7)) {

+ bit_index -= 1;

}

+ *immed_8 = rotated >> bit_index;

+ *rotate_imm = (48 - bit_index) & 0x1F;

+ return true;

}

- // If the opcode is mov or mvn and if ~imm32 fits, change the opcode.

- if (instr != NULL && (*instr & 0xd*B21) == 0xd*B21) {

- if (fits_shifter(~imm32, rotate_imm, immed_8, NULL)) {

- *instr ^= 0x2*B21;

+ return false;

+// Check if an immediate can be represented as two rotated 8-bit constants.

+static bool fits_shifter(uint32_t imm32,

+ uint32_t* rotate_imm,

+ uint32_t* immed_8,

+ uint32_t* rotate_imm_2,

+ uint32_t* immed_8_2) {

+ const uint32_t kEvenBitsMask = 0x55555555;

+ // Smear bits to even positions to ensure that rotations are even numbered.

+ uint32_t aligned = imm32 | ((imm32 >> 1) & kEvenBitsMask);

+ // Try rotating the first byte around to the end to see if a range can be

+ // found using bits from both ends.

+ for (int i = 0; i < 8; i += 2) {

+ uint32_t rotated = (aligned >> i) | (i > 0 ? (aligned << (32 - i)) : 0);

+ uint32_t lowbit1 = rotated ^ (rotated & (rotated - 1));

+ rotated &= ~((lowbit1 << 8) - 1);

+ ASSERT_NE(0, rotated); // Otherwise we would have fit in one shifter op!

+ uint32_t lowbit2 = rotated ^ (rotated & (rotated - 1));

+ rotated &= ~((lowbit2 << 8) - 1);

+ if (rotated == 0) {

+ // Found two 8-bit sequences at even offsets that contain all the bits

+ // of imm32.

+ int bi1 = BitIndex(lowbit1) + i;

+ int bi2 = BitIndex(lowbit2) + i;

+ ASSERT(bi2 < 32); // Otherwise we would have matched at i==0.

+ ASSERT(bi1 <= bi2 - 8); // Ranges non-overlapping.

+ *immed_8 = (imm32 >> bi1) & 0xFF;

+ uint32_t imm2 = (imm32 >> bi2);

+ if (bi2 > 24) {

+ imm2 |= imm32 << (32 - bi2);

+ }

+ *immed_8_2 = imm2 & 0xFF;

+ *rotate_imm = (32 - bi1) & 0x1f;

+ *rotate_imm_2 = (32 - bi2) & 0x1f;

return true;

}

@@ -610,11 +696,11 @@ static bool fits_shifter(uint32_t imm32,

}

-// We have to use the temporary register for things that can be relocated even

+// We have to use the constant pool for things that can be relocated even

// if they can be encoded in the ARM's 12 bits of immediate-offset instruction

// space. There is no guarantee that the relocated location can be similarly

// encoded.

-static bool MustUseIp(RelocInfo::Mode rmode) {

+static bool MustUseConstantPool(RelocInfo::Mode rmode) {

if (rmode == RelocInfo::EXTERNAL_REFERENCE) {

#ifdef DEBUG

if (!Serializer::enabled()) {

@@ -629,26 +715,155 @@ static bool MustUseIp(RelocInfo::Mode rmode) {

}

+// Try to fit immediate value into one or two immediate operations.

+// Updates the instr and emits up to two instructions previous to it

+// (with the same condition as instr, and leaving the flags unchanged).

+// If the immediate value can directly be represented as an 8-bit constant

+// rotated an even number of bits, it can be inlined in the instruction.

+// If it is an 8-bit constant rotated an odd number of bits, or if

+// the negation is a rotated 8-bit constant, load it using a mov/mvn

+// and rotate the resulting register in the instruction.

+// If the immediate value, or its negation, can be created by combining two

+// (evenly) rotated 8-bit values, then load it using a mov/orr or mvn/bic

+// sequence (if the instruction is a move, use its destination directly

+// as the destination of the orr or bic instruction).

+// If none of this works, return false.

+// If addrmode1 is false, the instruction may only use an 8-ROR-4 immediate or

+// a simple register, not a shifted register. Also, don't try to match the

+// instruction opcode against the addrmode1 instructions for optimizations.

+bool Assembler::fit_to_shifter(Instr* instr_address,

+ uint32_t imm32,

+ bool addrmode1) {

+ // Modify instruction locally until we are sure we have a fit.

+ Instr instr = *instr_address;

+ ASSERT_EQ(0, instr & ~(CondMask | OpCodeMask | S));

+ // Normalize moves to only be mov, not mvn, for simplicity.

+ if (addrmode1 && (instr & OpCodeMask) == MVN) {

+ imm32 = ~imm32;

+ instr ^= MOV ^ MVN;

+ }

+ uint32_t ror_8;

+ uint32_t immed_8;

+ if (fits_shifter(imm32, &ror_8, &immed_8)) {

+ if ((ror_8 & 1) == 0) {

+ // Even rotation count can be represented directly.

+ *instr_address = instr | I | ror_8 * B7 | immed_8;

+ return true;

+ } else if (addrmode1) {

+ // Odd rotation can't be represented directly, so load into ip and

+ // rotate the last bit in a register shift operand.

+ // mov ip, imm32 ROL 1

+ Instr cond = instr & CondMask;

+ // Use (ip ROR 1) as operand.

+ *instr_address = instr | ROR | 1 * B7 | ip.code();

+ return true;

+ }

+ // Try negating the immediate to use mvn to load it.

+ if (fits_shifter(~imm32, &ror_8, &immed_8)) {

+ Instr opcode = instr & OpCodeMask;

+ if ((ror_8 & 1) == 0 && addrmode1) {

+ if (opcode == MOV) {

+ *instr_address = (instr ^ (MVN ^ MOV)) | I | ror_8 * B7 | immed_8;

+ return true;

+ }

+ if (opcode == AND || opcode == BIC) {

+ *instr_address = (instr ^ (AND ^ BIC)) | I | ror_8 * B7 | immed_8;

+ return true;

+ }

+ // Emit mov ip,~imm32 to get value into register.

+ if ((ror_8 & 1) == 0 || addrmode1) {

+ Instr cond = instr & CondMask;

+ instr |= ip.code();

+ if ((ror_8 & 1) != 0) {

+ instr |= ROR | 1 * B7;

+ }

+ *instr_address = instr;

+ return true;

+ }

+ // TODO(lrn): If supported, use MOVT, MOVW to always load top and low bits in

+ // two operations, or a single 16-bit rotated constant in one mov and using

+ // a rotated register as operand.

+ // Try combining two shifter operands into imm32 using orr.

+ uint32_t ror_8_2;

+ uint32_t immed_8_2;

+ if (fits_shifter(imm32, &ror_8, &immed_8, &ror_8_2, &immed_8_2)) {

+ ASSERT_NE(0, immed_8_2);

+ ASSERT_EQ(0, ror_8 & 1);

+ ASSERT_EQ(0, ror_8_2 & 1);

+ Instr imm1 = (ror_8 >> 1) * B8 | immed_8;

+ Instr imm2 = (ror_8_2 >> 1) * B8 | immed_8_2;

+ // Create constant using mov+orr of two rotated 8-bit immediates.

+ Instr cond = instr & CondMask;

+ emit(cond | I | MOV | ip.code() * B12 | imm1);

+ if (addrmode1 && (instr & OpCodeMask) == MOV) {

+ // Convert mov to orr-instruction.

+ *instr_address = (instr ^ (MOV ^ ORR)) | ip.code() * B16 | I | imm2;

+ return true;

+ }

+ // Create new orr isntruction

+ emit(cond | ORR | ip.code() * (B16 + B12) | I | imm2);

+ *instr_address = instr | ip.code();

+ return true;

+ }

+ // Try again negating imm32, using mvn and bic to load the inverted result

+ // instead of mov and orr.

+ if (fits_shifter(~imm32, &ror_8, &immed_8, &ror_8_2, &immed_8_2)) {

+ // Create constant using mvn+bic of two rotated 8-bit immediates.

+ ASSERT_NE(0, immed_8_2);

+ ASSERT_EQ(0, ror_8 & 1);

+ ASSERT_EQ(0, ror_8_2 & 1);

+ Instr imm1 = (ror_8 >> 1) * B8 | immed_8;

+ Instr imm2 = (ror_8_2 >> 1) * B8 | immed_8_2;

+ Instr cond = instr & CondMask;

+ emit(cond | I | MVN | ip.code() * B12 | imm1);

+ if (addrmode1 && (instr & OpCodeMask) == MOV) {

+ // Convert mov to bic-instruction.

+ *instr_address = (instr ^ (MOV ^ BIC)) | ip.code() * B16 | I | imm2;

+ return true;

+ }

+ // Create new orr instruction and use ip as operand.

+ emit(cond | BIC | ip.code() * (B16 + B12) | I | imm2);

+ *instr_address = instr | ip.code();

+ return true;

+ }

+ return false;

void Assembler::addrmod1(Instr instr,

const Operand& x) {

+ // Constants.

CheckBuffer();

- ASSERT((instr & ~(CondMask | OpCodeMask | S)) == 0);

+ ASSERT_EQ(0, (instr & ~(CondMask | OpCodeMask | S)));

+ ASSERT(((instr & OpCodeMask) != MOV && (instr & OpCodeMask) != MVN) ||

+ rn.is(r0));

if (!x.rm_.is_valid()) {

- // Immediate.

- uint32_t rotate_imm;

- uint32_t immed_8;

- if (MustUseIp(x.rmode_) ||

- !fits_shifter(x.imm32_, &rotate_imm, &immed_8, &instr)) {

- // The immediate operand cannot be encoded as a shifter operand, so load

- // it first to register ip and change the original instruction to use ip.

+ // immediate

+ bool must_use_pool = MustUseConstantPool(x.rmode_);

+ if (must_use_pool || !fit_to_shifter(&instr, x.imm32_)) {

+ // The immediate operand cannot be encoded as a shifter operand, or as

+ // a simple combination of shifter operands, so load it first to register

+ // ip and change the original instruction to use ip.

// However, if the original instruction is a 'mov rd, x' (not setting the

// condition code), then replace it with a 'ldr rd, [pc]'.

RecordRelocInfo(x.rmode_, x.imm32_);

CHECK(!rn.is(ip)); // rn should never be ip, or will be trashed

Condition cond = static_cast<Condition>(instr & CondMask);

- if ((instr & ~CondMask) == 13*B21) { // mov, S not set

+ if ((instr & ~CondMask) == MOV) { // mov, S not set

ldr(rd, MemOperand(pc, 0), cond);

} else {

ldr(ip, MemOperand(pc, 0), cond);

@@ -656,7 +871,6 @@ void Assembler::addrmod1(Instr instr,

}

return;

}

- instr |= I | rotate_imm*B8 | immed_8;

} else if (!x.rs_.is_valid()) {

// Immediate shift.

instr |= x.shift_imm_*B7 | x.shift_op_ | x.rm_.code();

@@ -1075,18 +1289,15 @@ void Assembler::msr(SRegisterFieldMask fields, const Operand& src,

ASSERT(fields >= B16 && fields < B20); // at least one field set

Instr instr;

if (!src.rm_.is_valid()) {

- // Immediate.

- uint32_t rotate_imm;

- uint32_t immed_8;

- if (MustUseIp(src.rmode_) ||

- !fits_shifter(src.imm32_, &rotate_imm, &immed_8, NULL)) {

- // Immediate operand cannot be encoded, load it first to register ip.

+ // immediate

+ if (MustUseConstantPool(src.rmode_) ||

+ !fit_to_shifter(&instr, src.imm32_, false)) {

+ // immediate operand cannot be encoded, load it first to register ip

RecordRelocInfo(src.rmode_, src.imm32_);

ldr(ip, MemOperand(pc, 0), cond);

msr(fields, Operand(ip), cond);

return;

}

- instr = I | rotate_imm*B8 | immed_8;

} else {

ASSERT(!src.rs_.is_valid() && src.shift_imm_ == 0); // only rm allowed

instr = src.rm_.code();

@@ -1627,9 +1838,9 @@ void Assembler::lea(Register dst,

bool Assembler::ImmediateFitsAddrMode1Instruction(int32_t imm32) {

- uint32_t dummy1;

- uint32_t dummy2;

- return fits_shifter(imm32, &dummy1, &dummy2, NULL);

+ uint32_t rotate;

+ uint32_t immediate;

+ return fits_shifter(imm32, &rotate, &immediate) && (rotate & 1) == 0;

}

« no previous file with comments | « src/arm/assembler-arm.h ('k') | src/arm/simulator-arm.cc » ('j') | no next file with comments »