| Index: src/mips/assembler-mips.cc
|
| diff --git a/src/mips/assembler-mips.cc b/src/mips/assembler-mips.cc
|
| index f30f38bb7581877fe97d053ead59e6b517094f3a..f2bc6567da6e06af60738238160e118d086f8437 100644
|
| --- a/src/mips/assembler-mips.cc
|
| +++ b/src/mips/assembler-mips.cc
|
| @@ -172,7 +172,8 @@ Register ToRegister(int num) {
|
| // -----------------------------------------------------------------------------
|
| // Implementation of RelocInfo.
|
|
|
| -const int RelocInfo::kApplyMask = 1 << RelocInfo::INTERNAL_REFERENCE;
|
| +const int RelocInfo::kApplyMask = RelocInfo::kCodeTargetMask |
|
| + 1 << RelocInfo::INTERNAL_REFERENCE;
|
|
|
|
|
| bool RelocInfo::IsCodedSpecially() {
|
| @@ -546,6 +547,19 @@ bool Assembler::IsJ(Instr instr) {
|
| }
|
|
|
|
|
| +bool Assembler::IsJal(Instr instr) {
|
| + return GetOpcodeField(instr) == JAL;
|
| +}
|
| +
|
| +bool Assembler::IsJr(Instr instr) {
|
| + return GetOpcodeField(instr) == SPECIAL && GetFunctionField(instr) == JR;
|
| +}
|
| +
|
| +bool Assembler::IsJalr(Instr instr) {
|
| + return GetOpcodeField(instr) == SPECIAL && GetFunctionField(instr) == JALR;
|
| +}
|
| +
|
| +
|
| bool Assembler::IsLui(Instr instr) {
|
| uint32_t opcode = GetOpcodeField(instr);
|
| // Checks if the instruction is a load upper immediate.
|
| @@ -939,7 +953,7 @@ void Assembler::GenInstrImmediate(Opcode opcode,
|
|
|
|
|
| void Assembler::GenInstrJump(Opcode opcode,
|
| - uint32_t address) {
|
| + uint32_t address) {
|
| BlockTrampolinePoolScope block_trampoline_pool(this);
|
| ASSERT(is_uint26(address));
|
| Instr instr = opcode | address;
|
| @@ -1112,7 +1126,12 @@ void Assembler::bne(Register rs, Register rt, int16_t offset) {
|
|
|
|
|
| void Assembler::j(int32_t target) {
|
| - ASSERT(is_uint28(target) && ((target & 3) == 0));
|
| +#if DEBUG
|
| + // Get pc of delay slot.
|
| + uint32_t ipc = reinterpret_cast<uint32_t>(pc_ + 1 * kInstrSize);
|
| + bool in_range = ((uint32_t)(ipc^target) >> (kImm26Bits+kImmFieldShift)) == 0;
|
| + ASSERT(in_range && ((target & 3) == 0));
|
| +#endif
|
| GenInstrJump(J, target >> 2);
|
| }
|
|
|
| @@ -1128,8 +1147,13 @@ void Assembler::jr(Register rs) {
|
|
|
|
|
| void Assembler::jal(int32_t target) {
|
| +#ifdef DEBUG
|
| + // Get pc of delay slot.
|
| + uint32_t ipc = reinterpret_cast<uint32_t>(pc_ + 1 * kInstrSize);
|
| + bool in_range = ((uint32_t)(ipc^target) >> (kImm26Bits+kImmFieldShift)) == 0;
|
| + ASSERT(in_range && ((target & 3) == 0));
|
| +#endif
|
| positions_recorder()->WriteRecordedPositions();
|
| - ASSERT(is_uint28(target) && ((target & 3) == 0));
|
| GenInstrJump(JAL, target >> 2);
|
| }
|
|
|
| @@ -1142,6 +1166,32 @@ void Assembler::jalr(Register rs, Register rd) {
|
| }
|
|
|
|
|
| +void Assembler::j_or_jr(int32_t target, Register rs) {
|
| + // Get pc of delay slot.
|
| + uint32_t ipc = reinterpret_cast<uint32_t>(pc_ + 1 * kInstrSize);
|
| + bool in_range = ((uint32_t)(ipc^target) >> (kImm26Bits+kImmFieldShift)) == 0;
|
| +
|
| + if (in_range) {
|
| + j(target);
|
| + } else {
|
| + jr(t9);
|
| + }
|
| +}
|
| +
|
| +
|
| +void Assembler::jal_or_jalr(int32_t target, Register rs) {
|
| + // Get pc of delay slot.
|
| + uint32_t ipc = reinterpret_cast<uint32_t>(pc_ + 1 * kInstrSize);
|
| + bool in_range = ((uint32_t)(ipc^target) >> (kImm26Bits+kImmFieldShift)) == 0;
|
| +
|
| + if (in_range) {
|
| + jal(target);
|
| + } else {
|
| + jalr(t9);
|
| + }
|
| +}
|
| +
|
| +
|
| //-------Data-processing-instructions---------
|
|
|
| // Arithmetic.
|
| @@ -1614,6 +1664,13 @@ void Assembler::cfc1(Register rt, FPUControlRegister fs) {
|
| GenInstrRegister(COP1, CFC1, rt, fs);
|
| }
|
|
|
| +void Assembler::DoubleAsTwoUInt32(double d, uint32_t* lo, uint32_t* hi) {
|
| + uint64_t i;
|
| + memcpy(&i, &d, 8);
|
| +
|
| + *lo = i & 0xffffffff;
|
| + *hi = i >> 32;
|
| +}
|
|
|
| // Arithmetic.
|
|
|
| @@ -1972,10 +2029,15 @@ void Assembler::RecordRelocInfo(RelocInfo::Mode rmode, intptr_t data) {
|
| }
|
| if (rinfo.rmode() != RelocInfo::NONE) {
|
| // Don't record external references unless the heap will be serialized.
|
| - if (rmode == RelocInfo::EXTERNAL_REFERENCE &&
|
| - !Serializer::enabled() &&
|
| - !FLAG_debug_code) {
|
| - return;
|
| + if (rmode == RelocInfo::EXTERNAL_REFERENCE) {
|
| +#ifdef DEBUG
|
| + if (!Serializer::enabled()) {
|
| + Serializer::TooLateToEnableNow();
|
| + }
|
| +#endif
|
| + if (!Serializer::enabled() && !emit_debug_code()) {
|
| + return;
|
| + }
|
| }
|
| ASSERT(buffer_space() >= kMaxRelocSize); // Too late to grow buffer here.
|
| if (rmode == RelocInfo::CODE_TARGET_WITH_ID) {
|
| @@ -2070,30 +2132,142 @@ Address Assembler::target_address_at(Address pc) {
|
| }
|
|
|
|
|
| +// On Mips, a target address is stored in a lui/ori instruction pair, each
|
| +// of which load 16 bits of the 32-bit address to a register.
|
| +// Patching the address must replace both instr, and flush the i-cache.
|
| +//
|
| +// There is an optimization below, which emits a nop when the address
|
| +// fits in just 16 bits. This is unlikely to help, and should be benchmarked,
|
| +// and possibly removed.
|
| void Assembler::set_target_address_at(Address pc, Address target) {
|
| - // On MIPS we patch the address into lui/ori instruction pair.
|
| -
|
| - // First check we have an li (lui/ori pair).
|
| Instr instr2 = instr_at(pc + kInstrSize);
|
| + uint32_t rt_code = GetRtField(instr2);
|
| + uint32_t* p = reinterpret_cast<uint32_t*>(pc);
|
| + uint32_t itarget = reinterpret_cast<uint32_t>(target);
|
| +
|
| #ifdef DEBUG
|
| + // Check we have the result from a li macro-instruction, using instr pair.
|
| Instr instr1 = instr_at(pc);
|
| -
|
| - // Check we have indeed the result from a li with MustUseReg true.
|
| CHECK((GetOpcodeField(instr1) == LUI && GetOpcodeField(instr2) == ORI));
|
| #endif
|
|
|
| - uint32_t rt_code = GetRtField(instr2);
|
| - uint32_t* p = reinterpret_cast<uint32_t*>(pc);
|
| - uint32_t itarget = reinterpret_cast<uint32_t>(target);
|
| -
|
| - // lui rt, high-16.
|
| - // ori rt rt, low-16.
|
| + // Must use 2 instructions to insure patchable code => just use lui and ori.
|
| + // lui rt, upper-16.
|
| + // ori rt rt, lower-16.
|
| *p = LUI | rt_code | ((itarget & kHiMask) >> kLuiShift);
|
| *(p+1) = ORI | rt_code | (rt_code << 5) | (itarget & kImm16Mask);
|
|
|
| - CPU::FlushICache(pc, 2 * sizeof(int32_t));
|
| + // The following code is an optimization for the common case of Call()
|
| + // or Jump() which is load to register, and jump through register:
|
| + // li(t9, address); jalr(t9) (or jr(t9)).
|
| + // If the destination address is in the same 256 MB page as the call, it
|
| + // is faster to do a direct jal, or j, rather than jump thru register, since
|
| + // that lets the cpu pipeline prefetch the target address. However each
|
| + // time the address above is patched, we have to patch the direct jal/j
|
| + // instruction, as well as possibly revert to jalr/jr if we now cross a
|
| + // 256 MB page. Note that with the jal/j instructions, we do not need to
|
| + // load the register, but that code is left, since it makes it easy to
|
| + // revert this process. A further optimization could try replacing the
|
| + // li sequence with nops.
|
| + // This optimization can only be applied if the rt-code from instr2 is the
|
| + // register used for the jalr/jr. Finally, we have to skip 'jr ra', which is
|
| + // mips return. Occasionally this lands after an li().
|
| +
|
| + Instr instr3 = instr_at(pc + 2 * kInstrSize);
|
| + uint32_t ipc = reinterpret_cast<uint32_t>(pc + 3 * kInstrSize);
|
| + bool in_range =
|
| + ((uint32_t)(ipc ^ itarget) >> (kImm26Bits + kImmFieldShift)) == 0;
|
| + uint32_t target_field = (uint32_t)(itarget & kJumpAddrMask) >> kImmFieldShift;
|
| + bool patched_jump = false;
|
| +
|
| +#ifndef ALLOW_JAL_IN_BOUNDARY_REGION
|
| + // This is a workaround to the 24k core E156 bug (affect some 34k cores also).
|
| + // Since the excluded space is only 64KB out of 256MB (0.02 %), we will just
|
| + // apply this workaround for all cores so we don't have to identify the core.
|
| + if (in_range) {
|
| + // The 24k core E156 bug has some very specific requirements, we only check
|
| + // the most simple one: if the address of the delay slot instruction is in
|
| + // the first or last 32 KB of the 256 MB segment.
|
| + uint32_t segment_mask = ((256 * MB) - 1) ^ ((32 * KB) - 1);
|
| + uint32_t ipc_segment_addr = ipc & segment_mask;
|
| + if (ipc_segment_addr == 0 || ipc_segment_addr == segment_mask)
|
| + in_range = false;
|
| + }
|
| +#endif
|
| +
|
| + if (IsJalr(instr3)) {
|
| + // Try to convert JALR to JAL.
|
| + if (in_range && GetRt(instr2) == GetRs(instr3)) {
|
| + *(p+2) = JAL | target_field;
|
| + patched_jump = true;
|
| + }
|
| + } else if (IsJr(instr3)) {
|
| + // Try to convert JR to J, skip returns (jr ra).
|
| + bool is_ret = static_cast<int>(GetRs(instr3)) == ra.code();
|
| + if (in_range && !is_ret && GetRt(instr2) == GetRs(instr3)) {
|
| + *(p+2) = J | target_field;
|
| + patched_jump = true;
|
| + }
|
| + } else if (IsJal(instr3)) {
|
| + if (in_range) {
|
| + // We are patching an already converted JAL.
|
| + *(p+2) = JAL | target_field;
|
| + } else {
|
| + // Patch JAL, but out of range, revert to JALR.
|
| + // JALR rs reg is the rt reg specified in the ORI instruction.
|
| + uint32_t rs_field = GetRt(instr2) << kRsShift;
|
| + uint32_t rd_field = ra.code() << kRdShift; // Return-address (ra) reg.
|
| + *(p+2) = SPECIAL | rs_field | rd_field | JALR;
|
| + }
|
| + patched_jump = true;
|
| + } else if (IsJ(instr3)) {
|
| + if (in_range) {
|
| + // We are patching an already converted J (jump).
|
| + *(p+2) = J | target_field;
|
| + } else {
|
| + // Trying patch J, but out of range, just go back to JR.
|
| + // JR 'rs' reg is the 'rt' reg specified in the ORI instruction (instr2).
|
| + uint32_t rs_field = GetRt(instr2) << kRsShift;
|
| + *(p+2) = SPECIAL | rs_field | JR;
|
| + }
|
| + patched_jump = true;
|
| + }
|
| +
|
| + CPU::FlushICache(pc, (patched_jump ? 3 : 2) * sizeof(int32_t));
|
| }
|
|
|
| +void Assembler::JumpLabelToJumpRegister(Address pc) {
|
| + // Address pc points to lui/ori instructions.
|
| + // Jump to label may follow at pc + 2 * kInstrSize.
|
| + uint32_t* p = reinterpret_cast<uint32_t*>(pc);
|
| +#ifdef DEBUG
|
| + Instr instr1 = instr_at(pc);
|
| +#endif
|
| + Instr instr2 = instr_at(pc + 1 * kInstrSize);
|
| + Instr instr3 = instr_at(pc + 2 * kInstrSize);
|
| + bool patched = false;
|
| +
|
| + if (IsJal(instr3)) {
|
| + ASSERT(GetOpcodeField(instr1) == LUI);
|
| + ASSERT(GetOpcodeField(instr2) == ORI);
|
| +
|
| + uint32_t rs_field = GetRt(instr2) << kRsShift;
|
| + uint32_t rd_field = ra.code() << kRdShift; // Return-address (ra) reg.
|
| + *(p+2) = SPECIAL | rs_field | rd_field | JALR;
|
| + patched = true;
|
| + } else if (IsJ(instr3)) {
|
| + ASSERT(GetOpcodeField(instr1) == LUI);
|
| + ASSERT(GetOpcodeField(instr2) == ORI);
|
| +
|
| + uint32_t rs_field = GetRt(instr2) << kRsShift;
|
| + *(p+2) = SPECIAL | rs_field | JR;
|
| + patched = true;
|
| + }
|
| +
|
| + if (patched) {
|
| + CPU::FlushICache(pc+2, sizeof(Address));
|
| + }
|
| +}
|
|
|
| } } // namespace v8::internal
|
|
|
|
|