 Chromium Code Reviews
 Chromium Code Reviews Issue 2797923006:
  [ARM] Implement D-register versions of vzip, vuzp, and vtrn.  (Closed)
    
  
    Issue 2797923006:
  [ARM] Implement D-register versions of vzip, vuzp, and vtrn.  (Closed) 
  | OLD | NEW | 
|---|---|
| 1 // Copyright (c) 1994-2006 Sun Microsystems Inc. | 1 // Copyright (c) 1994-2006 Sun Microsystems Inc. | 
| 2 // All Rights Reserved. | 2 // All Rights Reserved. | 
| 3 // | 3 // | 
| 4 // Redistribution and use in source and binary forms, with or without | 4 // Redistribution and use in source and binary forms, with or without | 
| 5 // modification, are permitted provided that the following conditions | 5 // modification, are permitted provided that the following conditions | 
| 6 // are met: | 6 // are met: | 
| 7 // | 7 // | 
| 8 // - Redistributions of source code must retain the above copyright notice, | 8 // - Redistributions of source code must retain the above copyright notice, | 
| 9 // this list of conditions and the following disclaimer. | 9 // this list of conditions and the following disclaimer. | 
| 10 // | 10 // | 
| (...skipping 4692 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 4703 src1.split_code(&vn, &n); | 4703 src1.split_code(&vn, &n); | 
| 4704 int vm, m; | 4704 int vm, m; | 
| 4705 src2.split_code(&vm, &m); | 4705 src2.split_code(&vm, &m); | 
| 4706 DCHECK_GT(16, bytes); | 4706 DCHECK_GT(16, bytes); | 
| 4707 emit(0x1E5U * B23 | d * B22 | 0x3 * B20 | vn * B16 | vd * B12 | bytes * B8 | | 4707 emit(0x1E5U * B23 | d * B22 | 0x3 * B20 | vn * B16 | vd * B12 | bytes * B8 | | 
| 4708 n * B7 | B6 | m * B5 | vm); | 4708 n * B7 | B6 | m * B5 | vm); | 
| 4709 } | 4709 } | 
| 4710 | 4710 | 
| 4711 enum NeonSizedOp { VZIP, VUZP, VREV16, VREV32, VREV64, VTRN }; | 4711 enum NeonSizedOp { VZIP, VUZP, VREV16, VREV32, VREV64, VTRN }; | 
| 4712 | 4712 | 
| 4713 static Instr EncodeNeonSizedOp(NeonSizedOp op, NeonSize size, | 4713 static Instr EncodeNeonSizedOp(NeonSizedOp op, NeonRegType reg_type, | 
| 4714 QwNeonRegister dst, QwNeonRegister src) { | 4714 NeonSize size, int dst_code, int src_code) { | 
| 4715 int op_encoding = 0; | 4715 int op_encoding = 0; | 
| 4716 switch (op) { | 4716 switch (op) { | 
| 4717 case VZIP: | 4717 case VZIP: | 
| 4718 op_encoding = 0x2 * B16 | 0x3 * B7; | 4718 op_encoding = 0x2 * B16 | 0x3 * B7; | 
| 4719 break; | 4719 break; | 
| 4720 case VUZP: | 4720 case VUZP: | 
| 4721 op_encoding = 0x2 * B16 | 0x2 * B7; | 4721 op_encoding = 0x2 * B16 | 0x2 * B7; | 
| 4722 break; | 4722 break; | 
| 4723 case VREV16: | 4723 case VREV16: | 
| 4724 op_encoding = 0x2 * B7; | 4724 op_encoding = 0x2 * B7; | 
| 4725 break; | 4725 break; | 
| 4726 case VREV32: | 4726 case VREV32: | 
| 4727 op_encoding = 0x1 * B7; | 4727 op_encoding = 0x1 * B7; | 
| 4728 break; | 4728 break; | 
| 4729 case VREV64: | 4729 case VREV64: | 
| 4730 // op_encoding is 0; | 4730 // op_encoding is 0; | 
| 4731 break; | 4731 break; | 
| 4732 case VTRN: | 4732 case VTRN: | 
| 4733 op_encoding = 0x2 * B16 | B7; | 4733 op_encoding = 0x2 * B16 | B7; | 
| 4734 break; | 4734 break; | 
| 4735 default: | 4735 default: | 
| 4736 UNREACHABLE(); | 4736 UNREACHABLE(); | 
| 4737 break; | 4737 break; | 
| 4738 } | 4738 } | 
| 4739 int vd, d; | 4739 int vd, d, vm, m; | 
| 4740 dst.split_code(&vd, &d); | 4740 if (reg_type == NEON_Q) { | 
| 
martyn.capewell
2017/04/06 13:09:49
This idiom may become common if more D/Q operation
 
bbudge
2017/04/06 17:49:43
Done.
 | |
| 4741 int vm, m; | 4741 op_encoding |= B6; | 
| 4742 src.split_code(&vm, &m); | 4742 QwNeonRegister::split_code(dst_code, &vd, &d); | 
| 4743 QwNeonRegister::split_code(src_code, &vm, &m); | |
| 4744 } else { | |
| 4745 DCHECK_EQ(reg_type, NEON_D); | |
| 4746 DwVfpRegister::split_code(dst_code, &vd, &d); | |
| 4747 DwVfpRegister::split_code(src_code, &vm, &m); | |
| 4748 } | |
| 4743 int sz = static_cast<int>(size); | 4749 int sz = static_cast<int>(size); | 
| 4744 return 0x1E7U * B23 | d * B22 | 0x3 * B20 | sz * B18 | vd * B12 | B6 | | 4750 return 0x1E7U * B23 | d * B22 | 0x3 * B20 | sz * B18 | vd * B12 | m * B5 | | 
| 4745 m * B5 | vm | op_encoding; | 4751 vm | op_encoding; | 
| 4752 } | |
| 4753 | |
| 4754 void Assembler::vzip(NeonSize size, DwVfpRegister src1, DwVfpRegister src2) { | |
| 4755 DCHECK(IsEnabled(NEON)); | |
| 4756 // vzip.<size>(Dn, Dm) SIMD zip (interleave). | |
| 4757 // Instruction details available in ARM DDI 0406C.b, A8-1102. | |
| 4758 emit(EncodeNeonSizedOp(VZIP, NEON_D, size, src1.code(), src2.code())); | |
| 4746 } | 4759 } | 
| 4747 | 4760 | 
| 4748 void Assembler::vzip(NeonSize size, QwNeonRegister src1, QwNeonRegister src2) { | 4761 void Assembler::vzip(NeonSize size, QwNeonRegister src1, QwNeonRegister src2) { | 
| 4749 DCHECK(IsEnabled(NEON)); | 4762 DCHECK(IsEnabled(NEON)); | 
| 4750 // Qd = vzip.<size>(Qn, Qm) SIMD zip (interleave). | 4763 // vzip.<size>(Qn, Qm) SIMD zip (interleave). | 
| 4751 // Instruction details available in ARM DDI 0406C.b, A8-1102. | 4764 // Instruction details available in ARM DDI 0406C.b, A8-1102. | 
| 4752 emit(EncodeNeonSizedOp(VZIP, size, src1, src2)); | 4765 emit(EncodeNeonSizedOp(VZIP, NEON_Q, size, src1.code(), src2.code())); | 
| 4766 } | |
| 4767 | |
| 4768 void Assembler::vuzp(NeonSize size, DwVfpRegister src1, DwVfpRegister src2) { | |
| 4769 DCHECK(IsEnabled(NEON)); | |
| 4770 // vuzp.<size>(Dn, Dm) SIMD un-zip (de-interleave). | |
| 4771 // Instruction details available in ARM DDI 0406C.b, A8-1100. | |
| 4772 emit(EncodeNeonSizedOp(VUZP, NEON_D, size, src1.code(), src2.code())); | |
| 4753 } | 4773 } | 
| 4754 | 4774 | 
| 4755 void Assembler::vuzp(NeonSize size, QwNeonRegister src1, QwNeonRegister src2) { | 4775 void Assembler::vuzp(NeonSize size, QwNeonRegister src1, QwNeonRegister src2) { | 
| 4756 DCHECK(IsEnabled(NEON)); | 4776 DCHECK(IsEnabled(NEON)); | 
| 4757 // Qd = vuzp.<size>(Qn, Qm) SIMD un-zip (de-interleave). | 4777 // vuzp.<size>(Qn, Qm) SIMD un-zip (de-interleave). | 
| 4758 // Instruction details available in ARM DDI 0406C.b, A8-1100. | 4778 // Instruction details available in ARM DDI 0406C.b, A8-1100. | 
| 4759 emit(EncodeNeonSizedOp(VUZP, size, src1, src2)); | 4779 emit(EncodeNeonSizedOp(VUZP, NEON_Q, size, src1.code(), src2.code())); | 
| 4760 } | 4780 } | 
| 4761 | 4781 | 
| 4762 void Assembler::vrev16(NeonSize size, QwNeonRegister dst, QwNeonRegister src) { | 4782 void Assembler::vrev16(NeonSize size, QwNeonRegister dst, QwNeonRegister src) { | 
| 4763 DCHECK(IsEnabled(NEON)); | 4783 DCHECK(IsEnabled(NEON)); | 
| 4764 // Qd = vrev<op_size>.<size>(Qn, Qm) SIMD scalar reverse. | 4784 // Qd = vrev16.<size>(Qm) SIMD element reverse. | 
| 4765 // Instruction details available in ARM DDI 0406C.b, A8-1028. | 4785 // Instruction details available in ARM DDI 0406C.b, A8-1028. | 
| 4766 emit(EncodeNeonSizedOp(VREV16, size, dst, src)); | 4786 emit(EncodeNeonSizedOp(VREV16, NEON_Q, size, dst.code(), src.code())); | 
| 4767 } | 4787 } | 
| 4768 | 4788 | 
| 4769 void Assembler::vrev32(NeonSize size, QwNeonRegister dst, QwNeonRegister src) { | 4789 void Assembler::vrev32(NeonSize size, QwNeonRegister dst, QwNeonRegister src) { | 
| 4770 DCHECK(IsEnabled(NEON)); | 4790 DCHECK(IsEnabled(NEON)); | 
| 4771 // Qd = vrev<op_size>.<size>(Qn, Qm) SIMD scalar reverse. | 4791 // Qd = vrev32.<size>(Qm) SIMD element reverse. | 
| 4772 // Instruction details available in ARM DDI 0406C.b, A8-1028. | 4792 // Instruction details available in ARM DDI 0406C.b, A8-1028. | 
| 4773 emit(EncodeNeonSizedOp(VREV32, size, dst, src)); | 4793 emit(EncodeNeonSizedOp(VREV32, NEON_Q, size, dst.code(), src.code())); | 
| 4774 } | 4794 } | 
| 4775 | 4795 | 
| 4776 void Assembler::vrev64(NeonSize size, QwNeonRegister dst, QwNeonRegister src) { | 4796 void Assembler::vrev64(NeonSize size, QwNeonRegister dst, QwNeonRegister src) { | 
| 4777 DCHECK(IsEnabled(NEON)); | 4797 DCHECK(IsEnabled(NEON)); | 
| 4778 // Qd = vrev<op_size>.<size>(Qn, Qm) SIMD scalar reverse. | 4798 // Qd = vrev64.<size>(Qm) SIMD element reverse. | 
| 4779 // Instruction details available in ARM DDI 0406C.b, A8-1028. | 4799 // Instruction details available in ARM DDI 0406C.b, A8-1028. | 
| 4780 emit(EncodeNeonSizedOp(VREV64, size, dst, src)); | 4800 emit(EncodeNeonSizedOp(VREV64, NEON_Q, size, dst.code(), src.code())); | 
| 4801 } | |
| 4802 | |
| 4803 void Assembler::vtrn(NeonSize size, DwVfpRegister src1, DwVfpRegister src2) { | |
| 4804 DCHECK(IsEnabled(NEON)); | |
| 4805 // vtrn.<size>(Dn, Dm) SIMD element transpose. | |
| 4806 // Instruction details available in ARM DDI 0406C.b, A8-1096. | |
| 4807 emit(EncodeNeonSizedOp(VTRN, NEON_D, size, src1.code(), src2.code())); | |
| 4781 } | 4808 } | 
| 4782 | 4809 | 
| 4783 void Assembler::vtrn(NeonSize size, QwNeonRegister src1, QwNeonRegister src2) { | 4810 void Assembler::vtrn(NeonSize size, QwNeonRegister src1, QwNeonRegister src2) { | 
| 4784 DCHECK(IsEnabled(NEON)); | 4811 DCHECK(IsEnabled(NEON)); | 
| 4785 // Qd = vrev<op_size>.<size>(Qn, Qm) SIMD scalar reverse. | 4812 // vtrn.<size>(Qn, Qm) SIMD element transpose. | 
| 4786 // Instruction details available in ARM DDI 0406C.b, A8-1096. | 4813 // Instruction details available in ARM DDI 0406C.b, A8-1096. | 
| 4787 emit(EncodeNeonSizedOp(VTRN, size, src1, src2)); | 4814 emit(EncodeNeonSizedOp(VTRN, NEON_Q, size, src1.code(), src2.code())); | 
| 4788 } | 4815 } | 
| 4789 | 4816 | 
| 4790 // Encode NEON vtbl / vtbx instruction. | 4817 // Encode NEON vtbl / vtbx instruction. | 
| 4791 static Instr EncodeNeonVTB(DwVfpRegister dst, const NeonListOperand& list, | 4818 static Instr EncodeNeonVTB(DwVfpRegister dst, const NeonListOperand& list, | 
| 4792 DwVfpRegister index, bool vtbx) { | 4819 DwVfpRegister index, bool vtbx) { | 
| 4793 // Dd = vtbl(table, Dm) SIMD vector permute, zero at out of range indices. | 4820 // Dd = vtbl(table, Dm) SIMD vector permute, zero at out of range indices. | 
| 4794 // Instruction details available in ARM DDI 0406C.b, A8-1094. | 4821 // Instruction details available in ARM DDI 0406C.b, A8-1094. | 
| 4795 // Dd = vtbx(table, Dm) SIMD vector permute, skip out of range indices. | 4822 // Dd = vtbx(table, Dm) SIMD vector permute, skip out of range indices. | 
| 4796 // Instruction details available in ARM DDI 0406C.b, A8-1094. | 4823 // Instruction details available in ARM DDI 0406C.b, A8-1094. | 
| 4797 int vd, d; | 4824 int vd, d; | 
| (...skipping 589 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 5387 } | 5414 } | 
| 5388 | 5415 | 
| 5389 void PatchingAssembler::FlushICache(Isolate* isolate) { | 5416 void PatchingAssembler::FlushICache(Isolate* isolate) { | 
| 5390 Assembler::FlushICache(isolate, buffer_, buffer_size_ - kGap); | 5417 Assembler::FlushICache(isolate, buffer_, buffer_size_ - kGap); | 
| 5391 } | 5418 } | 
| 5392 | 5419 | 
| 5393 } // namespace internal | 5420 } // namespace internal | 
| 5394 } // namespace v8 | 5421 } // namespace v8 | 
| 5395 | 5422 | 
| 5396 #endif // V8_TARGET_ARCH_ARM | 5423 #endif // V8_TARGET_ARCH_ARM | 
| OLD | NEW |