Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(339)

Side by Side Diff: include/libyuv/macros_msa.h

Issue 2553403002: Add MSA optimized TransposeWx8_MSA and TransposeUVWx8_MSA functions (Closed)
Patch Set: Changes as per review comments Created 4 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « CMakeLists.txt ('k') | include/libyuv/rotate_row.h » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 * Copyright 2016 The LibYuv Project Authors. All rights reserved. 2 * Copyright 2016 The LibYuv Project Authors. All rights reserved.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license 4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source 5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found 6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may 7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree. 8 * be found in the AUTHORS file in the root of the source tree.
9 */ 9 */
10 10
11 #ifndef INCLUDE_LIBYUV_MACROS_MSA_H_ 11 #ifndef INCLUDE_LIBYUV_MACROS_MSA_H_
12 #define INCLUDE_LIBYUV_MACROS_MSA_H_ 12 #define INCLUDE_LIBYUV_MACROS_MSA_H_
13 13
14 #if !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa) 14 #if !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa)
15 #include <msa.h> 15 #include <msa.h>
16 #include <stdint.h> 16 #include <stdint.h>
17 17
18 #if (__mips_isa_rev >= 6) 18 #if (__mips_isa_rev >= 6)
19 #define LW(psrc) \ 19 #define LW(psrc) \
20 ({ \ 20 ({ \
21 uint8* psrc_lw_m = (uint8*)(psrc); /* NOLINT */ \ 21 uint8* psrc_lw_m = (uint8*)(psrc); /* NOLINT */ \
22 uint32 val_m; \ 22 uint32 val_m; \
23 asm volatile("lw %[val_m], %[psrc_lw_m] \n\t" \ 23 asm volatile("lw %[val_m], %[psrc_lw_m] \n" \
24 : [val_m] "=r"(val_m) \ 24 : [val_m] "=r"(val_m) \
25 : [psrc_lw_m] "m"(*psrc_lw_m)); \ 25 : [psrc_lw_m] "m"(*psrc_lw_m)); \
26 val_m; \ 26 val_m; \
27 }) 27 })
28 28
29 #if (__mips == 64) 29 #if (__mips == 64)
30 #define LD(psrc) \ 30 #define LD(psrc) \
31 ({ \ 31 ({ \
32 uint8* psrc_ld_m = (uint8*)(psrc); /* NOLINT */ \ 32 uint8* psrc_ld_m = (uint8*)(psrc); /* NOLINT */ \
33 uint64 val_m = 0; \ 33 uint64 val_m = 0; \
34 asm volatile("ld %[val_m], %[psrc_ld_m] \n\t" \ 34 asm volatile("ld %[val_m], %[psrc_ld_m] \n" \
35 : [val_m] "=r"(val_m) \ 35 : [val_m] "=r"(val_m) \
36 : [psrc_ld_m] "m"(*psrc_ld_m)); \ 36 : [psrc_ld_m] "m"(*psrc_ld_m)); \
37 val_m; \ 37 val_m; \
38 }) 38 })
39 #else // !(__mips == 64) 39 #else // !(__mips == 64)
40 #define LD(psrc) \ 40 #define LD(psrc) \
41 ({ \ 41 ({ \
42 uint8* psrc_ld_m = (uint8*)(psrc); /* NOLINT */ \ 42 uint8* psrc_ld_m = (uint8*)(psrc); /* NOLINT */ \
43 uint32 val0_m, val1_m; \ 43 uint32 val0_m, val1_m; \
44 uint64 val_m = 0; \ 44 uint64 val_m = 0; \
45 val0_m = LW(psrc_ld_m); \ 45 val0_m = LW(psrc_ld_m); \
46 val1_m = LW(psrc_ld_m + 4); \ 46 val1_m = LW(psrc_ld_m + 4); \
47 val_m = (uint64)(val1_m); /* NOLINT */ \ 47 val_m = (uint64)(val1_m); /* NOLINT */ \
48 val_m = (uint64)((val_m << 32) & 0xFFFFFFFF00000000); /* NOLINT */ \ 48 val_m = (uint64)((val_m << 32) & 0xFFFFFFFF00000000); /* NOLINT */ \
49 val_m = (uint64)(val_m | (uint64)val0_m); /* NOLINT */ \ 49 val_m = (uint64)(val_m | (uint64)val0_m); /* NOLINT */ \
50 val_m; \ 50 val_m; \
51 }) 51 })
52 #endif // (__mips == 64) 52 #endif // (__mips == 64)
53 #else // !(__mips_isa_rev >= 6) 53
54 #define LW(psrc) \ 54 #define SW(val, pdst) \
55 ({ \ 55 ({ \
56 uint8* psrc_lw_m = (uint8*)(psrc); /* NOLINT */ \ 56 uint8_t* pdst_sw_m = (uint8_t*)(pdst); \
57 uint32 val_m; \ 57 uint32_t val_m = (val); \
58 asm volatile("ulw %[val_m], %[psrc_lw_m] \n\t" \ 58 asm volatile("sw %[val_m], %[pdst_sw_m] \n" \
59 : [val_m] "=r"(val_m) \ 59 \
60 : [psrc_lw_m] "m"(*psrc_lw_m)); \ 60 : [pdst_sw_m] "=m"(*pdst_sw_m) \
61 val_m; \ 61 : [val_m] "r"(val_m)); \
62 }) 62 })
63 63
64 #if (__mips == 64) 64 #if (__mips == 64)
65 #define LD(psrc) \ 65 #define SD(val, pdst) \
66 ({ \ 66 ({ \
67 uint8* psrc_ld_m = (uint8*)(psrc); /* NOLINT */ \ 67 uint8_t* pdst_sd_m = (uint8_t*)(pdst); \
68 uint64 val_m = 0; \ 68 uint64_t val_m = (val); \
69 asm volatile("uld %[val_m], %[psrc_ld_m] \n\t" \ 69 asm volatile("sd %[val_m], %[pdst_sd_m] \n" \
70 : [val_m] "=r"(val_m) \ 70 \
71 : [psrc_ld_m] "m"(*psrc_ld_m)); \ 71 : [pdst_sd_m] "=m"(*pdst_sd_m) \
72 val_m; \ 72 : [val_m] "r"(val_m)); \
73 })
74 #else // !(__mips == 64)
75 #define SD(val, pdst) \
76 ({ \
77 uint8_t* pdst_sd_m = (uint8_t*)(pdst); \
78 uint32_t val0_m, val1_m; \
79 val0_m = (uint32_t)((val)&0x00000000FFFFFFFF); \
80 val1_m = (uint32_t)(((val) >> 32) & 0x00000000FFFFFFFF); \
81 SW(val0_m, pdst_sd_m); \
82 SW(val1_m, pdst_sd_m + 4); \
83 })
84 #endif // !(__mips == 64)
85 #else // !(__mips_isa_rev >= 6)
86 #define LW(psrc) \
87 ({ \
88 uint8* psrc_lw_m = (uint8*)(psrc); /* NOLINT */ \
89 uint32 val_m; \
90 asm volatile("ulw %[val_m], %[psrc_lw_m] \n" \
91 : [val_m] "=r"(val_m) \
92 : [psrc_lw_m] "m"(*psrc_lw_m)); \
93 val_m; \
94 })
95
96 #if (__mips == 64)
97 #define LD(psrc) \
98 ({ \
99 uint8* psrc_ld_m = (uint8*)(psrc); /* NOLINT */ \
100 uint64 val_m = 0; \
101 asm volatile("uld %[val_m], %[psrc_ld_m] \n" \
102 : [val_m] "=r"(val_m) \
103 : [psrc_ld_m] "m"(*psrc_ld_m)); \
104 val_m; \
73 }) 105 })
74 #else // !(__mips == 64) 106 #else // !(__mips == 64)
75 #define LD(psrc) \ 107 #define LD(psrc) \
76 ({ \ 108 ({ \
77 uint8* psrc_ld_m = (uint8*)(psrc); /* NOLINT */ \ 109 uint8* psrc_ld_m = (uint8*)(psrc); /* NOLINT */ \
78 uint32 val0_m, val1_m; \ 110 uint32 val0_m, val1_m; \
79 uint64 val_m = 0; \ 111 uint64 val_m = 0; \
80 val0_m = LW(psrc_ld_m); \ 112 val0_m = LW(psrc_ld_m); \
81 val1_m = LW(psrc_ld_m + 4); \ 113 val1_m = LW(psrc_ld_m + 4); \
82 val_m = (uint64)(val1_m); /* NOLINT */ \ 114 val_m = (uint64)(val1_m); /* NOLINT */ \
83 val_m = (uint64)((val_m << 32) & 0xFFFFFFFF00000000); /* NOLINT */ \ 115 val_m = (uint64)((val_m << 32) & 0xFFFFFFFF00000000); /* NOLINT */ \
84 val_m = (uint64)(val_m | (uint64)val0_m); /* NOLINT */ \ 116 val_m = (uint64)(val_m | (uint64)val0_m); /* NOLINT */ \
85 val_m; \ 117 val_m; \
86 }) 118 })
87 #endif // (__mips == 64) 119 #endif // (__mips == 64)
120
121 #define SW(val, pdst) \
122 ({ \
123 uint8_t* pdst_sw_m = (uint8_t*)(pdst); \
124 uint32_t val_m = (val); \
125 asm volatile("usw %[val_m], %[pdst_sw_m] \n" \
126 : [pdst_sw_m] "=m"(*pdst_sw_m) \
127 : [val_m] "r"(val_m)); \
128 })
129
130 #define SD(val, pdst) \
131 ({ \
132 uint8_t* pdst_sd_m = (uint8_t*)(pdst); \
133 uint32_t val0_m, val1_m; \
134 val0_m = (uint32_t)((val)&0x00000000FFFFFFFF); \
135 val1_m = (uint32_t)(((val) >> 32) & 0x00000000FFFFFFFF); \
136 SW(val0_m, pdst_sd_m); \
137 SW(val1_m, pdst_sd_m + 4); \
138 })
88 #endif // (__mips_isa_rev >= 6) 139 #endif // (__mips_isa_rev >= 6)
89 140
90 // TODO(fbarchard): Consider removing __VAR_ARGS versions. 141 // TODO(fbarchard): Consider removing __VAR_ARGS versions.
91 #define LD_B(RTYPE, psrc) *((RTYPE*)(psrc)) /* NOLINT */ 142 #define LD_B(RTYPE, psrc) *((RTYPE*)(psrc)) /* NOLINT */
92 #define LD_UB(...) LD_B(v16u8, __VA_ARGS__) 143 #define LD_UB(...) LD_B(v16u8, __VA_ARGS__)
93 144
94 #define ST_B(RTYPE, in, pdst) *((RTYPE*)(pdst)) = (in) /* NOLINT */ 145 #define ST_B(RTYPE, in, pdst) *((RTYPE*)(pdst)) = (in) /* NOLINT */
95 #define ST_UB(...) ST_B(v16u8, __VA_ARGS__) 146 #define ST_UB(...) ST_B(v16u8, __VA_ARGS__)
96 147
97 /* Description : Load two vectors with 16 'byte' sized elements 148 /* Description : Load two vectors with 16 'byte' sized elements
(...skipping 62 matching lines...) Expand 10 before | Expand all | Expand 10 after
160 #define ILVRL_B2(RTYPE, in0, in1, out0, out1) \ 211 #define ILVRL_B2(RTYPE, in0, in1, out0, out1) \
161 { \ 212 { \
162 out0 = (RTYPE)__msa_ilvr_b((v16i8)in0, (v16i8)in1); \ 213 out0 = (RTYPE)__msa_ilvr_b((v16i8)in0, (v16i8)in1); \
163 out1 = (RTYPE)__msa_ilvl_b((v16i8)in0, (v16i8)in1); \ 214 out1 = (RTYPE)__msa_ilvl_b((v16i8)in0, (v16i8)in1); \
164 } 215 }
165 #define ILVRL_B2_UB(...) ILVRL_B2(v16u8, __VA_ARGS__) 216 #define ILVRL_B2_UB(...) ILVRL_B2(v16u8, __VA_ARGS__)
166 217
167 #endif /* !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa) */ 218 #endif /* !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa) */
168 219
169 #endif // INCLUDE_LIBYUV_MACROS_MSA_H_ 220 #endif // INCLUDE_LIBYUV_MACROS_MSA_H_
OLDNEW
« no previous file with comments | « CMakeLists.txt ('k') | include/libyuv/rotate_row.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698