Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(6)

Side by Side Diff: src/ia32/assembler-ia32.cc

Issue 2931333002: [ia32] Add pextrb/pextrw, pinsrb, pshufb/pshuflw and AVX version (Closed)
Patch Set: Created 3 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/ia32/assembler-ia32.h ('k') | src/ia32/disasm-ia32.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright (c) 1994-2006 Sun Microsystems Inc. 1 // Copyright (c) 1994-2006 Sun Microsystems Inc.
2 // All Rights Reserved. 2 // All Rights Reserved.
3 // 3 //
4 // Redistribution and use in source and binary forms, with or without 4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions 5 // modification, are permitted provided that the following conditions
6 // are met: 6 // are met:
7 // 7 //
8 // - Redistributions of source code must retain the above copyright notice, 8 // - Redistributions of source code must retain the above copyright notice,
9 // this list of conditions and the following disclaimer. 9 // this list of conditions and the following disclaimer.
10 // 10 //
(...skipping 104 matching lines...) Expand 10 before | Expand all | Expand 10 after
115 115
116 void CpuFeatures::ProbeImpl(bool cross_compile) { 116 void CpuFeatures::ProbeImpl(bool cross_compile) {
117 base::CPU cpu; 117 base::CPU cpu;
118 CHECK(cpu.has_sse2()); // SSE2 support is mandatory. 118 CHECK(cpu.has_sse2()); // SSE2 support is mandatory.
119 CHECK(cpu.has_cmov()); // CMOV support is mandatory. 119 CHECK(cpu.has_cmov()); // CMOV support is mandatory.
120 120
121 // Only use statically determined features for cross compile (snapshot). 121 // Only use statically determined features for cross compile (snapshot).
122 if (cross_compile) return; 122 if (cross_compile) return;
123 123
124 if (cpu.has_sse41() && FLAG_enable_sse4_1) supported_ |= 1u << SSE4_1; 124 if (cpu.has_sse41() && FLAG_enable_sse4_1) supported_ |= 1u << SSE4_1;
125 if (cpu.has_ssse3() && FLAG_enable_ssse3) supported_ |= 1u << SSSE3;
125 if (cpu.has_sse3() && FLAG_enable_sse3) supported_ |= 1u << SSE3; 126 if (cpu.has_sse3() && FLAG_enable_sse3) supported_ |= 1u << SSE3;
126 if (cpu.has_avx() && FLAG_enable_avx && cpu.has_osxsave() && 127 if (cpu.has_avx() && FLAG_enable_avx && cpu.has_osxsave() &&
127 OSHasAVXSupport()) { 128 OSHasAVXSupport()) {
128 supported_ |= 1u << AVX; 129 supported_ |= 1u << AVX;
129 } 130 }
130 if (cpu.has_fma3() && FLAG_enable_fma3 && cpu.has_osxsave() && 131 if (cpu.has_fma3() && FLAG_enable_fma3 && cpu.has_osxsave() &&
131 OSHasAVXSupport()) { 132 OSHasAVXSupport()) {
132 supported_ |= 1u << FMA3; 133 supported_ |= 1u << FMA3;
133 } 134 }
134 if (cpu.has_bmi1() && FLAG_enable_bmi1) supported_ |= 1u << BMI1; 135 if (cpu.has_bmi1() && FLAG_enable_bmi1) supported_ |= 1u << BMI1;
135 if (cpu.has_bmi2() && FLAG_enable_bmi2) supported_ |= 1u << BMI2; 136 if (cpu.has_bmi2() && FLAG_enable_bmi2) supported_ |= 1u << BMI2;
136 if (cpu.has_lzcnt() && FLAG_enable_lzcnt) supported_ |= 1u << LZCNT; 137 if (cpu.has_lzcnt() && FLAG_enable_lzcnt) supported_ |= 1u << LZCNT;
137 if (cpu.has_popcnt() && FLAG_enable_popcnt) supported_ |= 1u << POPCNT; 138 if (cpu.has_popcnt() && FLAG_enable_popcnt) supported_ |= 1u << POPCNT;
138 if (strcmp(FLAG_mcpu, "auto") == 0) { 139 if (strcmp(FLAG_mcpu, "auto") == 0) {
139 if (cpu.is_atom()) supported_ |= 1u << ATOM; 140 if (cpu.is_atom()) supported_ |= 1u << ATOM;
140 } else if (strcmp(FLAG_mcpu, "atom") == 0) { 141 } else if (strcmp(FLAG_mcpu, "atom") == 0) {
141 supported_ |= 1u << ATOM; 142 supported_ |= 1u << ATOM;
142 } 143 }
143 } 144 }
144 145
145 146
146 void CpuFeatures::PrintTarget() { } 147 void CpuFeatures::PrintTarget() { }
147 void CpuFeatures::PrintFeatures() { 148 void CpuFeatures::PrintFeatures() {
148 printf( 149 printf(
149 "SSE3=%d SSE4_1=%d AVX=%d FMA3=%d BMI1=%d BMI2=%d LZCNT=%d POPCNT=%d " 150 "SSE3=%d SSSE3=%d SSE4_1=%d AVX=%d FMA3=%d BMI1=%d BMI2=%d LZCNT=%d "
150 "ATOM=%d\n", 151 "POPCNT=%d ATOM=%d\n",
151 CpuFeatures::IsSupported(SSE3), CpuFeatures::IsSupported(SSE4_1), 152 CpuFeatures::IsSupported(SSE3), CpuFeatures::IsSupported(SSSE3),
152 CpuFeatures::IsSupported(AVX), CpuFeatures::IsSupported(FMA3), 153 CpuFeatures::IsSupported(SSE4_1), CpuFeatures::IsSupported(AVX),
153 CpuFeatures::IsSupported(BMI1), CpuFeatures::IsSupported(BMI2), 154 CpuFeatures::IsSupported(FMA3), CpuFeatures::IsSupported(BMI1),
154 CpuFeatures::IsSupported(LZCNT), CpuFeatures::IsSupported(POPCNT), 155 CpuFeatures::IsSupported(BMI2), CpuFeatures::IsSupported(LZCNT),
155 CpuFeatures::IsSupported(ATOM)); 156 CpuFeatures::IsSupported(POPCNT), CpuFeatures::IsSupported(ATOM));
156 } 157 }
157 158
158 159
159 // ----------------------------------------------------------------------------- 160 // -----------------------------------------------------------------------------
160 // Implementation of Displacement 161 // Implementation of Displacement
161 162
162 void Displacement::init(Label* L, Type type) { 163 void Displacement::init(Label* L, Type type) {
163 DCHECK(!L->is_bound()); 164 DCHECK(!L->is_bound());
164 int next = 0; 165 int next = 0;
165 if (L->is_linked()) { 166 if (L->is_linked()) {
(...skipping 2510 matching lines...) Expand 10 before | Expand all | Expand 10 after
2676 2677
2677 2678
2678 void Assembler::psrlq(XMMRegister dst, XMMRegister src) { 2679 void Assembler::psrlq(XMMRegister dst, XMMRegister src) {
2679 EnsureSpace ensure_space(this); 2680 EnsureSpace ensure_space(this);
2680 EMIT(0x66); 2681 EMIT(0x66);
2681 EMIT(0x0F); 2682 EMIT(0x0F);
2682 EMIT(0xD3); 2683 EMIT(0xD3);
2683 emit_sse_operand(dst, src); 2684 emit_sse_operand(dst, src);
2684 } 2685 }
2685 2686
2687 void Assembler::pshufb(XMMRegister dst, const Operand& src) {
2688 DCHECK(IsEnabled(SSSE3));
2689 EnsureSpace ensure_space(this);
2690 EMIT(0x66);
2691 EMIT(0x0F);
2692 EMIT(0x38);
2693 EMIT(0x00);
2694 emit_sse_operand(dst, src);
2695 }
2696
2697 void Assembler::pshuflw(XMMRegister dst, const Operand& src, uint8_t shuffle) {
2698 EnsureSpace ensure_space(this);
2699 EMIT(0xF2);
2700 EMIT(0x0F);
2701 EMIT(0x70);
2702 emit_sse_operand(dst, src);
2703 EMIT(shuffle);
2704 }
2705
2686 void Assembler::pshufd(XMMRegister dst, const Operand& src, uint8_t shuffle) { 2706 void Assembler::pshufd(XMMRegister dst, const Operand& src, uint8_t shuffle) {
2687 EnsureSpace ensure_space(this); 2707 EnsureSpace ensure_space(this);
2688 EMIT(0x66); 2708 EMIT(0x66);
2689 EMIT(0x0F); 2709 EMIT(0x0F);
2690 EMIT(0x70); 2710 EMIT(0x70);
2691 emit_sse_operand(dst, src); 2711 emit_sse_operand(dst, src);
2692 EMIT(shuffle); 2712 EMIT(shuffle);
2693 } 2713 }
2694 2714
2715 void Assembler::pextrb(const Operand& dst, XMMRegister src, int8_t offset) {
2716 DCHECK(IsEnabled(SSE4_1));
2717 EnsureSpace ensure_space(this);
2718 EMIT(0x66);
2719 EMIT(0x0F);
2720 EMIT(0x3A);
2721 EMIT(0x14);
2722 emit_sse_operand(src, dst);
2723 EMIT(offset);
2724 }
2725
2726 void Assembler::pextrw(const Operand& dst, XMMRegister src, int8_t offset) {
2727 DCHECK(IsEnabled(SSE4_1));
2728 EnsureSpace ensure_space(this);
2729 EMIT(0x66);
2730 EMIT(0x0F);
2731 EMIT(0x3A);
2732 EMIT(0x15);
2733 emit_sse_operand(src, dst);
2734 EMIT(offset);
2735 }
2695 2736
2696 void Assembler::pextrd(const Operand& dst, XMMRegister src, int8_t offset) { 2737 void Assembler::pextrd(const Operand& dst, XMMRegister src, int8_t offset) {
2697 DCHECK(IsEnabled(SSE4_1)); 2738 DCHECK(IsEnabled(SSE4_1));
2698 EnsureSpace ensure_space(this); 2739 EnsureSpace ensure_space(this);
2699 EMIT(0x66); 2740 EMIT(0x66);
2700 EMIT(0x0F); 2741 EMIT(0x0F);
2701 EMIT(0x3A); 2742 EMIT(0x3A);
2702 EMIT(0x16); 2743 EMIT(0x16);
2703 emit_sse_operand(src, dst); 2744 emit_sse_operand(src, dst);
2704 EMIT(offset); 2745 EMIT(offset);
2705 } 2746 }
2706 2747
2748 void Assembler::pinsrb(XMMRegister dst, const Operand& src, int8_t offset) {
2749 DCHECK(IsEnabled(SSE4_1));
2750 EnsureSpace ensure_space(this);
2751 EMIT(0x66);
2752 EMIT(0x0F);
2753 EMIT(0x3A);
2754 EMIT(0x20);
2755 emit_sse_operand(dst, src);
2756 EMIT(offset);
2757 }
2758
2707 void Assembler::pinsrw(XMMRegister dst, const Operand& src, int8_t offset) { 2759 void Assembler::pinsrw(XMMRegister dst, const Operand& src, int8_t offset) {
2708 DCHECK(is_uint8(offset)); 2760 DCHECK(is_uint8(offset));
2709 EnsureSpace ensure_space(this); 2761 EnsureSpace ensure_space(this);
2710 EMIT(0x66); 2762 EMIT(0x66);
2711 EMIT(0x0F); 2763 EMIT(0x0F);
2712 EMIT(0xC4); 2764 EMIT(0xC4);
2713 emit_sse_operand(dst, src); 2765 emit_sse_operand(dst, src);
2714 EMIT(offset); 2766 EMIT(offset);
2715 } 2767 }
2716 2768
(...skipping 159 matching lines...) Expand 10 before | Expand all | Expand 10 after
2876 vinstr(0x71, iop, dst, Operand(src), k66, k0F, kWIG); 2928 vinstr(0x71, iop, dst, Operand(src), k66, k0F, kWIG);
2877 EMIT(imm8); 2929 EMIT(imm8);
2878 } 2930 }
2879 2931
2880 void Assembler::vpsrad(XMMRegister dst, XMMRegister src, int8_t imm8) { 2932 void Assembler::vpsrad(XMMRegister dst, XMMRegister src, int8_t imm8) {
2881 XMMRegister iop = {4}; 2933 XMMRegister iop = {4};
2882 vinstr(0x72, iop, dst, Operand(src), k66, k0F, kWIG); 2934 vinstr(0x72, iop, dst, Operand(src), k66, k0F, kWIG);
2883 EMIT(imm8); 2935 EMIT(imm8);
2884 } 2936 }
2885 2937
2938 void Assembler::vpshuflw(XMMRegister dst, const Operand& src, uint8_t shuffle) {
2939 vinstr(0x70, dst, xmm0, src, kF2, k0F, kWIG);
2940 EMIT(shuffle);
2941 }
2942
2886 void Assembler::vpshufd(XMMRegister dst, const Operand& src, uint8_t shuffle) { 2943 void Assembler::vpshufd(XMMRegister dst, const Operand& src, uint8_t shuffle) {
2887 vinstr(0x70, dst, xmm0, src, k66, k0F, kWIG); 2944 vinstr(0x70, dst, xmm0, src, k66, k0F, kWIG);
2888 EMIT(shuffle); 2945 EMIT(shuffle);
2889 } 2946 }
2890 2947
2948 void Assembler::vpextrb(const Operand& dst, XMMRegister src, int8_t offset) {
2949 vinstr(0x14, src, xmm0, dst, k66, k0F3A, kWIG);
2950 EMIT(offset);
2951 }
2952
2953 void Assembler::vpextrw(const Operand& dst, XMMRegister src, int8_t offset) {
2954 vinstr(0x15, src, xmm0, dst, k66, k0F3A, kWIG);
2955 EMIT(offset);
2956 }
2957
2891 void Assembler::vpextrd(const Operand& dst, XMMRegister src, int8_t offset) { 2958 void Assembler::vpextrd(const Operand& dst, XMMRegister src, int8_t offset) {
2892 vinstr(0x16, src, xmm0, dst, k66, k0F3A, kWIG); 2959 vinstr(0x16, src, xmm0, dst, k66, k0F3A, kWIG);
2893 EMIT(offset); 2960 EMIT(offset);
2894 } 2961 }
2895 2962
2963 void Assembler::vpinsrb(XMMRegister dst, XMMRegister src1, const Operand& src2,
2964 int8_t offset) {
2965 vinstr(0x20, dst, src1, src2, k66, k0F3A, kWIG);
2966 EMIT(offset);
2967 }
2968
2969 void Assembler::vpinsrw(XMMRegister dst, XMMRegister src1, const Operand& src2,
2970 int8_t offset) {
2971 vinstr(0xC4, dst, src1, src2, k66, k0F, kWIG);
2972 EMIT(offset);
2973 }
2974
2896 void Assembler::vpinsrd(XMMRegister dst, XMMRegister src1, const Operand& src2, 2975 void Assembler::vpinsrd(XMMRegister dst, XMMRegister src1, const Operand& src2,
2897 int8_t offset) { 2976 int8_t offset) {
2898 vinstr(0x22, dst, src1, src2, k66, k0F3A, kWIG); 2977 vinstr(0x22, dst, src1, src2, k66, k0F3A, kWIG);
2899 EMIT(offset); 2978 EMIT(offset);
2900 } 2979 }
2901 2980
2902 void Assembler::bmi1(byte op, Register reg, Register vreg, const Operand& rm) { 2981 void Assembler::bmi1(byte op, Register reg, Register vreg, const Operand& rm) {
2903 DCHECK(IsEnabled(BMI1)); 2982 DCHECK(IsEnabled(BMI1));
2904 EnsureSpace ensure_space(this); 2983 EnsureSpace ensure_space(this);
2905 emit_vex_prefix(vreg, kLZ, kNone, k0F38, kW0); 2984 emit_vex_prefix(vreg, kLZ, kNone, k0F38, kW0);
(...skipping 282 matching lines...) Expand 10 before | Expand all | Expand 10 after
3188 } 3267 }
3189 RelocInfo rinfo(pc_, rmode, data, NULL); 3268 RelocInfo rinfo(pc_, rmode, data, NULL);
3190 reloc_info_writer.Write(&rinfo); 3269 reloc_info_writer.Write(&rinfo);
3191 } 3270 }
3192 3271
3193 3272
3194 } // namespace internal 3273 } // namespace internal
3195 } // namespace v8 3274 } // namespace v8
3196 3275
3197 #endif // V8_TARGET_ARCH_IA32 3276 #endif // V8_TARGET_ARCH_IA32
OLDNEW
« no previous file with comments | « src/ia32/assembler-ia32.h ('k') | src/ia32/disasm-ia32.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698