Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(117)

Side by Side Diff: src/arm/assembler-arm.cc

Issue 595023: ARM optimize loading of immediates. (Closed)
Patch Set: Created 10 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/arm/assembler-arm.h ('k') | src/arm/simulator-arm.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright (c) 1994-2006 Sun Microsystems Inc. 1 // Copyright (c) 1994-2006 Sun Microsystems Inc.
2 // All Rights Reserved. 2 // All Rights Reserved.
3 // 3 //
4 // Redistribution and use in source and binary forms, with or without 4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions 5 // modification, are permitted provided that the following conditions
6 // are met: 6 // are met:
7 // 7 //
8 // - Redistributions of source code must retain the above copyright notice, 8 // - Redistributions of source code must retain the above copyright notice,
9 // this list of conditions and the following disclaimer. 9 // this list of conditions and the following disclaimer.
10 // 10 //
(...skipping 566 matching lines...) Expand 10 before | Expand all | Expand 10 after
577 ASSERT(L->is_linked()); 577 ASSERT(L->is_linked());
578 int link = target_at(L->pos()); 578 int link = target_at(L->pos());
579 if (link > 0) { 579 if (link > 0) {
580 L->link_to(link); 580 L->link_to(link);
581 } else { 581 } else {
582 ASSERT(link == kEndOfChain); 582 ASSERT(link == kEndOfChain);
583 L->Unuse(); 583 L->Unuse();
584 } 584 }
585 } 585 }
586 586
587 // Low-level code emission routines depending on the addressing mode
587 588
588 // Low-level code emission routines depending on the addressing mode. 589
590 // Find the index of a single bit in a word with one bit set.
591 // I.e., calculate the log-base-2 of a power of 2.
592 static inline int BitIndex(uint32_t bit) {
593 ASSERT_NE(0, bit);
594 ASSERT(IsPowerOf2(bit));
595 int res = 0;
596 if ((bit >> 16) != 0) {
597 bit = bit >> 16;
598 res += 16;
599 }
600 if ((bit & 0xff) == 0) {
601 bit = bit >> 8;
602 res += 8;
603 }
604 if ((bit & 0xf) == 0) {
605 bit = bit >> 4;
606 res += 4;
607 }
608 if ((bit & 0x03) == 0) {
609 bit = bit >> 2;
610 res += 2;
611 }
612 if (bit == 2) {
613 res += 1;
614 }
615 return res;
616 }
617
618
619 static bool fits_shifter(uint32_t imm32,
620 uint32_t* rotate_imm,
621 uint32_t* immed_8) {
622 if (imm32 <= 255) {
623 // Respond quickly to all small numbers (includes zero).
624 *immed_8 = imm32;
625 *rotate_imm = 0;
626 return true;
627 }
628 // Find the first non-zero (aligned) bit.
629 uint32_t firstbit = imm32 - (imm32 & (imm32 - 1));
630 // Check whether an 8-bit immediate starting at that bit index can represent
631 // imm32.
632 if (firstbit > (imm32 >> 8)) {
633 // Fits in 8 bits plus shift.
634 int bit_index = BitIndex(firstbit);
635 // Prefer even positions if possible.
636 if ((bit_index & 1) != 0 && firstbit > (imm32 >> 7)) {
637 }
638 *immed_8 = imm32 >> bit_index;
639 *rotate_imm = (32 - bit_index);
640 return true;
641 }
642 // Check for an 8-bit range that wraps.
643 uint32_t rotated = (imm32 << 16) | (imm32 >> 16); // Rotate 16.
644 firstbit = rotated - (rotated & (rotated - 1));
645 if (firstbit > (rotated >> 8)) {
646 // Fits in shifter.
647 int bit_index = BitIndex(firstbit);
648 if ((bit_index & 1) != 0 && firstbit > (rotated >> 7)) {
649 bit_index -= 1;
650 }
651 *immed_8 = rotated >> bit_index;
652 *rotate_imm = (48 - bit_index) & 0x1F;
653 return true;
654 }
655 return false;
656 }
657
658
659 // Check if an immediate can be represented as two rotated 8-bit constants.
589 static bool fits_shifter(uint32_t imm32, 660 static bool fits_shifter(uint32_t imm32,
590 uint32_t* rotate_imm, 661 uint32_t* rotate_imm,
591 uint32_t* immed_8, 662 uint32_t* immed_8,
592 Instr* instr) { 663 uint32_t* rotate_imm_2,
593 // imm32 must be unsigned. 664 uint32_t* immed_8_2) {
594 for (int rot = 0; rot < 16; rot++) { 665 const uint32_t kEvenBitsMask = 0x55555555;
595 uint32_t imm8 = (imm32 << 2*rot) | (imm32 >> (32 - 2*rot)); 666 // Smear bits to even positions to ensure that rotations are even numbered.
596 if ((imm8 <= 0xff)) { 667 uint32_t aligned = imm32 | ((imm32 >> 1) & kEvenBitsMask);
597 *rotate_imm = rot; 668 // Try rotating the first byte around to the end to see if a range can be
598 *immed_8 = imm8; 669 // found using bits from both ends.
670 for (int i = 0; i < 8; i += 2) {
671 uint32_t rotated = (aligned >> i) | (i > 0 ? (aligned << (32 - i)) : 0);
672 uint32_t lowbit1 = rotated ^ (rotated & (rotated - 1));
673 rotated &= ~((lowbit1 << 8) - 1);
674 ASSERT_NE(0, rotated); // Otherwise we would have fit in one shifter op!
675 uint32_t lowbit2 = rotated ^ (rotated & (rotated - 1));
676 rotated &= ~((lowbit2 << 8) - 1);
677 if (rotated == 0) {
678 // Found two 8-bit sequences at even offsets that contain all the bits
679 // of imm32.
680 int bi1 = BitIndex(lowbit1) + i;
681 int bi2 = BitIndex(lowbit2) + i;
682 ASSERT(bi2 < 32); // Otherwise we would have matched at i==0.
683 ASSERT(bi1 <= bi2 - 8); // Ranges non-overlapping.
684 *immed_8 = (imm32 >> bi1) & 0xFF;
685 uint32_t imm2 = (imm32 >> bi2);
686 if (bi2 > 24) {
687 imm2 |= imm32 << (32 - bi2);
688 }
689 *immed_8_2 = imm2 & 0xFF;
690 *rotate_imm = (32 - bi1) & 0x1f;
691 *rotate_imm_2 = (32 - bi2) & 0x1f;
599 return true; 692 return true;
600 } 693 }
601 } 694 }
602 // If the opcode is mov or mvn and if ~imm32 fits, change the opcode.
603 if (instr != NULL && (*instr & 0xd*B21) == 0xd*B21) {
604 if (fits_shifter(~imm32, rotate_imm, immed_8, NULL)) {
605 *instr ^= 0x2*B21;
606 return true;
607 }
608 }
609 return false; 695 return false;
610 } 696 }
611 697
612 698
613 // We have to use the temporary register for things that can be relocated even 699 // We have to use the constant pool for things that can be relocated even
614 // if they can be encoded in the ARM's 12 bits of immediate-offset instruction 700 // if they can be encoded in the ARM's 12 bits of immediate-offset instruction
615 // space. There is no guarantee that the relocated location can be similarly 701 // space. There is no guarantee that the relocated location can be similarly
616 // encoded. 702 // encoded.
617 static bool MustUseIp(RelocInfo::Mode rmode) { 703 static bool MustUseConstantPool(RelocInfo::Mode rmode) {
618 if (rmode == RelocInfo::EXTERNAL_REFERENCE) { 704 if (rmode == RelocInfo::EXTERNAL_REFERENCE) {
619 #ifdef DEBUG 705 #ifdef DEBUG
620 if (!Serializer::enabled()) { 706 if (!Serializer::enabled()) {
621 Serializer::TooLateToEnableNow(); 707 Serializer::TooLateToEnableNow();
622 } 708 }
623 #endif 709 #endif
624 return Serializer::enabled(); 710 return Serializer::enabled();
625 } else if (rmode == RelocInfo::NONE) { 711 } else if (rmode == RelocInfo::NONE) {
626 return false; 712 return false;
627 } 713 }
628 return true; 714 return true;
629 } 715 }
630 716
631 717
718 // Try to fit immediate value into one or two immediate operations.
719 // Updates the instr and emits up to two instructions previous to it
720 // (with the same condition as instr, and leaving the flags unchanged).
721 // If the immediate value can directly be represented as an 8-bit constant
722 // rotated an even number of bits, it can be inlined in the instruction.
723 // If it is an 8-bit constant rotated an odd number of bits, or if
724 // the negation is a rotated 8-bit constant, load it using a mov/mvn
725 // and rotate the resulting register in the instruction.
726 // If the immediate value, or its negation, can be created by combining two
727 // (evenly) rotated 8-bit values, then load it using a mov/orr or mvn/bic
728 // sequence (if the instruction is a move, use its destination directly
729 // as the destination of the orr or bic instruction).
730 // If none of this works, return false.
731 // If addrmode1 is false, the instruction may only use an 8-ROR-4 immediate or
732 // a simple register, not a shifted register. Also, don't try to match the
733 // instruction opcode against the addrmode1 instructions for optimizations.
734 bool Assembler::fit_to_shifter(Instr* instr_address,
735 uint32_t imm32,
736 bool addrmode1) {
737 // Modify instruction locally until we are sure we have a fit.
738 Instr instr = *instr_address;
739 ASSERT_EQ(0, instr & ~(CondMask | OpCodeMask | S));
740
741 // Normalize moves to only be mov, not mvn, for simplicity.
742 if (addrmode1 && (instr & OpCodeMask) == MVN) {
743 imm32 = ~imm32;
744 instr ^= MOV ^ MVN;
745 }
746
747 uint32_t ror_8;
748 uint32_t immed_8;
749
750 if (fits_shifter(imm32, &ror_8, &immed_8)) {
751 if ((ror_8 & 1) == 0) {
752 // Even rotation count can be represented directly.
753 *instr_address = instr | I | ror_8 * B7 | immed_8;
754 return true;
755 } else if (addrmode1) {
756 // Odd rotation can't be represented directly, so load into ip and
757 // rotate the last bit in a register shift operand.
758 // mov ip, imm32 ROL 1
759 Instr cond = instr & CondMask;
760 emit(cond | I | MOV | ip.code() * B12 | (ror_8 >> 1) * B8 | immed_8);
761 // Use (ip ROR 1) as operand.
762 *instr_address = instr | ROR | 1 * B7 | ip.code();
763 return true;
764 }
765 }
766 // Try negating the immediate to use mvn to load it.
767 if (fits_shifter(~imm32, &ror_8, &immed_8)) {
768 Instr opcode = instr & OpCodeMask;
769 if ((ror_8 & 1) == 0 && addrmode1) {
770 if (opcode == MOV) {
771 *instr_address = (instr ^ (MVN ^ MOV)) | I | ror_8 * B7 | immed_8;
772 return true;
773 }
774 if (opcode == AND || opcode == BIC) {
775 *instr_address = (instr ^ (AND ^ BIC)) | I | ror_8 * B7 | immed_8;
776 return true;
777 }
778 }
779 // Emit mov ip,~imm32 to get value into register.
780 if ((ror_8 & 1) == 0 || addrmode1) {
781 Instr cond = instr & CondMask;
782 emit(cond | I | MVN | ip.code() * B12 | (ror_8 >> 1) * B8 | immed_8);
783 instr |= ip.code();
784 if ((ror_8 & 1) != 0) {
785 instr |= ROR | 1 * B7;
786 }
787 *instr_address = instr;
788 return true;
789 }
790 }
791
792 // TODO(lrn): If supported, use MOVT, MOVW to always load top and low bits in
793 // two operations, or a single 16-bit rotated constant in one mov and using
794 // a rotated register as operand.
795
796 // Try combining two shifter operands into imm32 using orr.
797 uint32_t ror_8_2;
798 uint32_t immed_8_2;
799 if (fits_shifter(imm32, &ror_8, &immed_8, &ror_8_2, &immed_8_2)) {
800 ASSERT_NE(0, immed_8_2);
801 ASSERT_EQ(0, ror_8 & 1);
802 ASSERT_EQ(0, ror_8_2 & 1);
803 Instr imm1 = (ror_8 >> 1) * B8 | immed_8;
804 Instr imm2 = (ror_8_2 >> 1) * B8 | immed_8_2;
805
806 // Create constant using mov+orr of two rotated 8-bit immediates.
807 Instr cond = instr & CondMask;
808 emit(cond | I | MOV | ip.code() * B12 | imm1);
809 if (addrmode1 && (instr & OpCodeMask) == MOV) {
810 // Convert mov to orr-instruction.
811 *instr_address = (instr ^ (MOV ^ ORR)) | ip.code() * B16 | I | imm2;
812 return true;
813 }
814 // Create new orr isntruction
815 emit(cond | ORR | ip.code() * (B16 + B12) | I | imm2);
816 *instr_address = instr | ip.code();
817 return true;
818 }
819 // Try again negating imm32, using mvn and bic to load the inverted result
820 // instead of mov and orr.
821 if (fits_shifter(~imm32, &ror_8, &immed_8, &ror_8_2, &immed_8_2)) {
822 // Create constant using mvn+bic of two rotated 8-bit immediates.
823 ASSERT_NE(0, immed_8_2);
824 ASSERT_EQ(0, ror_8 & 1);
825 ASSERT_EQ(0, ror_8_2 & 1);
826 Instr imm1 = (ror_8 >> 1) * B8 | immed_8;
827 Instr imm2 = (ror_8_2 >> 1) * B8 | immed_8_2;
828
829 Instr cond = instr & CondMask;
830 emit(cond | I | MVN | ip.code() * B12 | imm1);
831 if (addrmode1 && (instr & OpCodeMask) == MOV) {
832 // Convert mov to bic-instruction.
833 *instr_address = (instr ^ (MOV ^ BIC)) | ip.code() * B16 | I | imm2;
834 return true;
835 }
836 // Create new orr instruction and use ip as operand.
837 emit(cond | BIC | ip.code() * (B16 + B12) | I | imm2);
838 *instr_address = instr | ip.code();
839 return true;
840 }
841 return false;
842 }
843
844
632 void Assembler::addrmod1(Instr instr, 845 void Assembler::addrmod1(Instr instr,
633 Register rn, 846 Register rn,
634 Register rd, 847 Register rd,
635 const Operand& x) { 848 const Operand& x) {
849 // Constants.
636 CheckBuffer(); 850 CheckBuffer();
637 ASSERT((instr & ~(CondMask | OpCodeMask | S)) == 0); 851 ASSERT_EQ(0, (instr & ~(CondMask | OpCodeMask | S)));
852 ASSERT(((instr & OpCodeMask) != MOV && (instr & OpCodeMask) != MVN) ||
853 rn.is(r0));
638 if (!x.rm_.is_valid()) { 854 if (!x.rm_.is_valid()) {
639 // Immediate. 855 // immediate
640 uint32_t rotate_imm; 856 bool must_use_pool = MustUseConstantPool(x.rmode_);
641 uint32_t immed_8; 857 if (must_use_pool || !fit_to_shifter(&instr, x.imm32_)) {
642 if (MustUseIp(x.rmode_) || 858 // The immediate operand cannot be encoded as a shifter operand, or as
643 !fits_shifter(x.imm32_, &rotate_imm, &immed_8, &instr)) { 859 // a simple combination of shifter operands, so load it first to register
644 // The immediate operand cannot be encoded as a shifter operand, so load 860 // ip and change the original instruction to use ip.
645 // it first to register ip and change the original instruction to use ip.
646 // However, if the original instruction is a 'mov rd, x' (not setting the 861 // However, if the original instruction is a 'mov rd, x' (not setting the
647 // condition code), then replace it with a 'ldr rd, [pc]'. 862 // condition code), then replace it with a 'ldr rd, [pc]'.
648 RecordRelocInfo(x.rmode_, x.imm32_); 863 RecordRelocInfo(x.rmode_, x.imm32_);
649 CHECK(!rn.is(ip)); // rn should never be ip, or will be trashed 864 CHECK(!rn.is(ip)); // rn should never be ip, or will be trashed
650 Condition cond = static_cast<Condition>(instr & CondMask); 865 Condition cond = static_cast<Condition>(instr & CondMask);
651 if ((instr & ~CondMask) == 13*B21) { // mov, S not set 866 if ((instr & ~CondMask) == MOV) { // mov, S not set
652 ldr(rd, MemOperand(pc, 0), cond); 867 ldr(rd, MemOperand(pc, 0), cond);
653 } else { 868 } else {
654 ldr(ip, MemOperand(pc, 0), cond); 869 ldr(ip, MemOperand(pc, 0), cond);
655 addrmod1(instr, rn, rd, Operand(ip)); 870 addrmod1(instr, rn, rd, Operand(ip));
656 } 871 }
657 return; 872 return;
658 } 873 }
659 instr |= I | rotate_imm*B8 | immed_8;
660 } else if (!x.rs_.is_valid()) { 874 } else if (!x.rs_.is_valid()) {
661 // Immediate shift. 875 // Immediate shift.
662 instr |= x.shift_imm_*B7 | x.shift_op_ | x.rm_.code(); 876 instr |= x.shift_imm_*B7 | x.shift_op_ | x.rm_.code();
663 } else { 877 } else {
664 // Register shift. 878 // Register shift.
665 ASSERT(!rn.is(pc) && !rd.is(pc) && !x.rm_.is(pc) && !x.rs_.is(pc)); 879 ASSERT(!rn.is(pc) && !rd.is(pc) && !x.rm_.is(pc) && !x.rs_.is(pc));
666 instr |= x.rs_.code()*B8 | x.shift_op_ | B4 | x.rm_.code(); 880 instr |= x.rs_.code()*B8 | x.shift_op_ | B4 | x.rm_.code();
667 } 881 }
668 emit(instr | rn.code()*B16 | rd.code()*B12); 882 emit(instr | rn.code()*B16 | rd.code()*B12);
669 if (rn.is(pc) || x.rm_.is(pc)) 883 if (rn.is(pc) || x.rm_.is(pc))
(...skipping 398 matching lines...) Expand 10 before | Expand all | Expand 10 after
1068 ASSERT(!dst.is(pc)); 1282 ASSERT(!dst.is(pc));
1069 emit(cond | B24 | s | 15*B16 | dst.code()*B12); 1283 emit(cond | B24 | s | 15*B16 | dst.code()*B12);
1070 } 1284 }
1071 1285
1072 1286
1073 void Assembler::msr(SRegisterFieldMask fields, const Operand& src, 1287 void Assembler::msr(SRegisterFieldMask fields, const Operand& src,
1074 Condition cond) { 1288 Condition cond) {
1075 ASSERT(fields >= B16 && fields < B20); // at least one field set 1289 ASSERT(fields >= B16 && fields < B20); // at least one field set
1076 Instr instr; 1290 Instr instr;
1077 if (!src.rm_.is_valid()) { 1291 if (!src.rm_.is_valid()) {
1078 // Immediate. 1292 // immediate
1079 uint32_t rotate_imm; 1293 if (MustUseConstantPool(src.rmode_) ||
1080 uint32_t immed_8; 1294 !fit_to_shifter(&instr, src.imm32_, false)) {
1081 if (MustUseIp(src.rmode_) || 1295 // immediate operand cannot be encoded, load it first to register ip
1082 !fits_shifter(src.imm32_, &rotate_imm, &immed_8, NULL)) {
1083 // Immediate operand cannot be encoded, load it first to register ip.
1084 RecordRelocInfo(src.rmode_, src.imm32_); 1296 RecordRelocInfo(src.rmode_, src.imm32_);
1085 ldr(ip, MemOperand(pc, 0), cond); 1297 ldr(ip, MemOperand(pc, 0), cond);
1086 msr(fields, Operand(ip), cond); 1298 msr(fields, Operand(ip), cond);
1087 return; 1299 return;
1088 } 1300 }
1089 instr = I | rotate_imm*B8 | immed_8;
1090 } else { 1301 } else {
1091 ASSERT(!src.rs_.is_valid() && src.shift_imm_ == 0); // only rm allowed 1302 ASSERT(!src.rs_.is_valid() && src.shift_imm_ == 0); // only rm allowed
1092 instr = src.rm_.code(); 1303 instr = src.rm_.code();
1093 } 1304 }
1094 emit(cond | instr | B24 | B21 | fields | 15*B12); 1305 emit(cond | instr | B24 | B21 | fields | 15*B12);
1095 } 1306 }
1096 1307
1097 1308
1098 // Load/Store instructions. 1309 // Load/Store instructions.
1099 void Assembler::ldr(Register dst, const MemOperand& src, Condition cond) { 1310 void Assembler::ldr(Register dst, const MemOperand& src, Condition cond) {
(...skipping 520 matching lines...) Expand 10 before | Expand all | Expand 10 after
1620 mov(dst, Operand(x.rn_), s, cond); 1831 mov(dst, Operand(x.rn_), s, cond);
1621 else if ((am & U) == 0) // negative indexing 1832 else if ((am & U) == 0) // negative indexing
1622 sub(dst, x.rn_, Operand(x.rm_, x.shift_op_, x.shift_imm_), s, cond); 1833 sub(dst, x.rn_, Operand(x.rm_, x.shift_op_, x.shift_imm_), s, cond);
1623 else 1834 else
1624 add(dst, x.rn_, Operand(x.rm_, x.shift_op_, x.shift_imm_), s, cond); 1835 add(dst, x.rn_, Operand(x.rm_, x.shift_op_, x.shift_imm_), s, cond);
1625 } 1836 }
1626 } 1837 }
1627 1838
1628 1839
1629 bool Assembler::ImmediateFitsAddrMode1Instruction(int32_t imm32) { 1840 bool Assembler::ImmediateFitsAddrMode1Instruction(int32_t imm32) {
1630 uint32_t dummy1; 1841 uint32_t rotate;
1631 uint32_t dummy2; 1842 uint32_t immediate;
1632 return fits_shifter(imm32, &dummy1, &dummy2, NULL); 1843 return fits_shifter(imm32, &rotate, &immediate) && (rotate & 1) == 0;
1633 } 1844 }
1634 1845
1635 1846
1636 void Assembler::BlockConstPoolFor(int instructions) { 1847 void Assembler::BlockConstPoolFor(int instructions) {
1637 BlockConstPoolBefore(pc_offset() + instructions * kInstrSize); 1848 BlockConstPoolBefore(pc_offset() + instructions * kInstrSize);
1638 } 1849 }
1639 1850
1640 1851
1641 // Debugging. 1852 // Debugging.
1642 void Assembler::RecordJSReturn() { 1853 void Assembler::RecordJSReturn() {
(...skipping 226 matching lines...) Expand 10 before | Expand all | Expand 10 after
1869 bind(&after_pool); 2080 bind(&after_pool);
1870 } 2081 }
1871 2082
1872 // Since a constant pool was just emitted, move the check offset forward by 2083 // Since a constant pool was just emitted, move the check offset forward by
1873 // the standard interval. 2084 // the standard interval.
1874 next_buffer_check_ = pc_offset() + kCheckConstInterval; 2085 next_buffer_check_ = pc_offset() + kCheckConstInterval;
1875 } 2086 }
1876 2087
1877 2088
1878 } } // namespace v8::internal 2089 } } // namespace v8::internal
OLDNEW
« no previous file with comments | « src/arm/assembler-arm.h ('k') | src/arm/simulator-arm.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698