OLD | NEW |
1 /* | 1 /* |
2 * Copyright 2012 The LibYuv Project Authors. All rights reserved. | 2 * Copyright 2012 The LibYuv Project Authors. All rights reserved. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
9 */ | 9 */ |
10 | 10 |
(...skipping 22 matching lines...) Expand all Loading... |
33 if (n > 0) { \ | 33 if (n > 0) { \ |
34 ANY_SIMD(y_buf, u_buf, v_buf, dst_ptr, n); \ | 34 ANY_SIMD(y_buf, u_buf, v_buf, dst_ptr, n); \ |
35 } \ | 35 } \ |
36 memcpy(temp, y_buf + n, r); \ | 36 memcpy(temp, y_buf + n, r); \ |
37 memcpy(temp + 64, u_buf + (n >> UVSHIFT), SS(r, UVSHIFT)); \ | 37 memcpy(temp + 64, u_buf + (n >> UVSHIFT), SS(r, UVSHIFT)); \ |
38 memcpy(temp + 128, v_buf + (n >> UVSHIFT), SS(r, UVSHIFT)); \ | 38 memcpy(temp + 128, v_buf + (n >> UVSHIFT), SS(r, UVSHIFT)); \ |
39 ANY_SIMD(temp, temp + 64, temp + 128, temp + 192, MASK + 1); \ | 39 ANY_SIMD(temp, temp + 64, temp + 128, temp + 192, MASK + 1); \ |
40 memcpy(dst_ptr + (n >> DUVSHIFT) * BPP, temp + 192, \ | 40 memcpy(dst_ptr + (n >> DUVSHIFT) * BPP, temp + 192, \ |
41 SS(r, DUVSHIFT) * BPP); \ | 41 SS(r, DUVSHIFT) * BPP); \ |
42 } | 42 } |
43 | 43 #ifdef HAS_I422TOYUY2ROW_SSE2 |
44 #ifdef HAS_I422TOARGBROW_SSSE3 | |
45 ANY31(I422ToARGBRow_Any_SSSE3, I422ToARGBRow_SSSE3, 1, 0, 4, 7) | |
46 ANY31(I422ToABGRRow_Any_SSSE3, I422ToABGRRow_SSSE3, 1, 0, 4, 7) | |
47 ANY31(J422ToARGBRow_Any_SSSE3, J422ToARGBRow_SSSE3, 1, 0, 4, 7) | |
48 ANY31(J422ToABGRRow_Any_SSSE3, J422ToABGRRow_SSSE3, 1, 0, 4, 7) | |
49 ANY31(H422ToARGBRow_Any_SSSE3, H422ToARGBRow_SSSE3, 1, 0, 4, 7) | |
50 ANY31(H422ToABGRRow_Any_SSSE3, H422ToABGRRow_SSSE3, 1, 0, 4, 7) | |
51 #endif | |
52 #ifdef HAS_I444TOARGBROW_SSSE3 | |
53 ANY31(I444ToARGBRow_Any_SSSE3, I444ToARGBRow_SSSE3, 0, 0, 4, 7) | |
54 ANY31(I411ToARGBRow_Any_SSSE3, I411ToARGBRow_SSSE3, 2, 0, 4, 7) | |
55 ANY31(I422ToBGRARow_Any_SSSE3, I422ToBGRARow_SSSE3, 1, 0, 4, 7) | |
56 ANY31(I422ToRGBARow_Any_SSSE3, I422ToRGBARow_SSSE3, 1, 0, 4, 7) | |
57 ANY31(I422ToARGB4444Row_Any_SSSE3, I422ToARGB4444Row_SSSE3, 1, 0, 2, 7) | |
58 ANY31(I422ToARGB1555Row_Any_SSSE3, I422ToARGB1555Row_SSSE3, 1, 0, 2, 7) | |
59 ANY31(I422ToRGB565Row_Any_SSSE3, I422ToRGB565Row_SSSE3, 1, 0, 2, 7) | |
60 ANY31(I422ToRGB24Row_Any_SSSE3, I422ToRGB24Row_SSSE3, 1, 0, 3, 7) | |
61 ANY31(I422ToRAWRow_Any_SSSE3, I422ToRAWRow_SSSE3, 1, 0, 3, 7) | |
62 ANY31(I422ToYUY2Row_Any_SSE2, I422ToYUY2Row_SSE2, 1, 1, 4, 15) | 44 ANY31(I422ToYUY2Row_Any_SSE2, I422ToYUY2Row_SSE2, 1, 1, 4, 15) |
63 ANY31(I422ToUYVYRow_Any_SSE2, I422ToUYVYRow_SSE2, 1, 1, 4, 15) | 45 ANY31(I422ToUYVYRow_Any_SSE2, I422ToUYVYRow_SSE2, 1, 1, 4, 15) |
64 #endif // HAS_I444TOARGBROW_SSSE3 | |
65 #ifdef HAS_I444TOABGRROW_SSSE3 | |
66 ANY31(I444ToABGRRow_Any_SSSE3, I444ToABGRRow_SSSE3, 0, 0, 4, 7) | |
67 #endif | |
68 #ifdef HAS_I422TORGB24ROW_AVX2 | |
69 ANY31(I422ToRGB24Row_Any_AVX2, I422ToRGB24Row_AVX2, 1, 0, 3, 15) | |
70 #endif | |
71 #ifdef HAS_I422TORAWROW_AVX2 | |
72 ANY31(I422ToRAWRow_Any_AVX2, I422ToRAWRow_AVX2, 1, 0, 3, 15) | |
73 #endif | |
74 #ifdef HAS_J422TOARGBROW_AVX2 | |
75 ANY31(J422ToARGBRow_Any_AVX2, J422ToARGBRow_AVX2, 1, 0, 4, 15) | |
76 #endif | |
77 #ifdef HAS_J422TOABGRROW_AVX2 | |
78 ANY31(J422ToABGRRow_Any_AVX2, J422ToABGRRow_AVX2, 1, 0, 4, 15) | |
79 #endif | |
80 #ifdef HAS_H422TOARGBROW_AVX2 | |
81 ANY31(H422ToARGBRow_Any_AVX2, H422ToARGBRow_AVX2, 1, 0, 4, 15) | |
82 #endif | |
83 #ifdef HAS_H422TOABGRROW_AVX2 | |
84 ANY31(H422ToABGRRow_Any_AVX2, H422ToABGRRow_AVX2, 1, 0, 4, 15) | |
85 #endif | |
86 #ifdef HAS_I422TOARGBROW_AVX2 | |
87 ANY31(I422ToARGBRow_Any_AVX2, I422ToARGBRow_AVX2, 1, 0, 4, 15) | |
88 #endif | |
89 #ifdef HAS_I422TOBGRAROW_AVX2 | |
90 ANY31(I422ToBGRARow_Any_AVX2, I422ToBGRARow_AVX2, 1, 0, 4, 15) | |
91 #endif | |
92 #ifdef HAS_I422TORGBAROW_AVX2 | |
93 ANY31(I422ToRGBARow_Any_AVX2, I422ToRGBARow_AVX2, 1, 0, 4, 15) | |
94 #endif | |
95 #ifdef HAS_I422TOABGRROW_AVX2 | |
96 ANY31(I422ToABGRRow_Any_AVX2, I422ToABGRRow_AVX2, 1, 0, 4, 15) | |
97 #endif | |
98 #ifdef HAS_I444TOARGBROW_AVX2 | |
99 ANY31(I444ToARGBRow_Any_AVX2, I444ToARGBRow_AVX2, 0, 0, 4, 15) | |
100 #endif | |
101 #ifdef HAS_I444TOABGRROW_AVX2 | |
102 ANY31(I444ToABGRRow_Any_AVX2, I444ToABGRRow_AVX2, 0, 0, 4, 15) | |
103 #endif | |
104 #ifdef HAS_I411TOARGBROW_AVX2 | |
105 ANY31(I411ToARGBRow_Any_AVX2, I411ToARGBRow_AVX2, 2, 0, 4, 15) | |
106 #endif | |
107 #ifdef HAS_I422TOARGB4444ROW_AVX2 | |
108 ANY31(I422ToARGB4444Row_Any_AVX2, I422ToARGB4444Row_AVX2, 1, 0, 2, 7) | |
109 #endif | |
110 #ifdef HAS_I422TOARGB1555ROW_AVX2 | |
111 ANY31(I422ToARGB1555Row_Any_AVX2, I422ToARGB1555Row_AVX2, 1, 0, 2, 7) | |
112 #endif | |
113 #ifdef HAS_I422TORGB565ROW_AVX2 | |
114 ANY31(I422ToRGB565Row_Any_AVX2, I422ToRGB565Row_AVX2, 1, 0, 2, 7) | |
115 #endif | |
116 #ifdef HAS_I422TOARGBROW_NEON | |
117 ANY31(I444ToARGBRow_Any_NEON, I444ToARGBRow_NEON, 0, 0, 4, 7) | |
118 ANY31(I422ToARGBRow_Any_NEON, I422ToARGBRow_NEON, 1, 0, 4, 7) | |
119 ANY31(I411ToARGBRow_Any_NEON, I411ToARGBRow_NEON, 2, 0, 4, 7) | |
120 ANY31(I422ToBGRARow_Any_NEON, I422ToBGRARow_NEON, 1, 0, 4, 7) | |
121 ANY31(I422ToABGRRow_Any_NEON, I422ToABGRRow_NEON, 1, 0, 4, 7) | |
122 ANY31(I422ToRGBARow_Any_NEON, I422ToRGBARow_NEON, 1, 0, 4, 7) | |
123 ANY31(I422ToRGB24Row_Any_NEON, I422ToRGB24Row_NEON, 1, 0, 3, 7) | |
124 ANY31(I422ToRAWRow_Any_NEON, I422ToRAWRow_NEON, 1, 0, 3, 7) | |
125 ANY31(I422ToARGB4444Row_Any_NEON, I422ToARGB4444Row_NEON, 1, 0, 2, 7) | |
126 ANY31(I422ToARGB1555Row_Any_NEON, I422ToARGB1555Row_NEON, 1, 0, 2, 7) | |
127 ANY31(I422ToRGB565Row_Any_NEON, I422ToRGB565Row_NEON, 1, 0, 2, 7) | |
128 #endif | |
129 #ifdef HAS_J422TOARGBROW_NEON | |
130 ANY31(J422ToARGBRow_Any_NEON, J422ToARGBRow_NEON, 1, 0, 4, 7) | |
131 #endif | |
132 #ifdef HAS_J422TOABGRROW_NEON | |
133 ANY31(J422ToABGRRow_Any_NEON, J422ToABGRRow_NEON, 1, 0, 4, 7) | |
134 #endif | |
135 #ifdef HAS_H422TOARGBROW_NEON | |
136 ANY31(H422ToARGBRow_Any_NEON, H422ToARGBRow_NEON, 1, 0, 4, 7) | |
137 #endif | |
138 #ifdef HAS_H422TOABGRROW_NEON | |
139 ANY31(H422ToABGRRow_Any_NEON, H422ToABGRRow_NEON, 1, 0, 4, 7) | |
140 #endif | 46 #endif |
141 #ifdef HAS_I422TOYUY2ROW_NEON | 47 #ifdef HAS_I422TOYUY2ROW_NEON |
142 ANY31(I422ToYUY2Row_Any_NEON, I422ToYUY2Row_NEON, 1, 1, 4, 15) | 48 ANY31(I422ToYUY2Row_Any_NEON, I422ToYUY2Row_NEON, 1, 1, 4, 15) |
143 #endif | 49 #endif |
144 #ifdef HAS_I422TOUYVYROW_NEON | 50 #ifdef HAS_I422TOUYVYROW_NEON |
145 ANY31(I422ToUYVYRow_Any_NEON, I422ToUYVYRow_NEON, 1, 1, 4, 15) | 51 ANY31(I422ToUYVYRow_Any_NEON, I422ToUYVYRow_NEON, 1, 1, 4, 15) |
146 #endif | 52 #endif |
147 #undef ANY31 | 53 #undef ANY31C |
| 54 |
| 55 // Any 3 planes to 1 with yuvconstants |
| 56 #define ANY31C(NAMEANY, ANY_SIMD, UVSHIFT, DUVSHIFT, BPP, MASK) \ |
| 57 void NAMEANY(const uint8* y_buf, const uint8* u_buf, const uint8* v_buf, \ |
| 58 uint8* dst_ptr, struct YuvConstants* yuvconstants, \ |
| 59 int width) { \ |
| 60 SIMD_ALIGNED(uint8 temp[64 * 4]); \ |
| 61 memset(temp, 0, 64 * 3); /* for YUY2 and msan */ \ |
| 62 int r = width & MASK; \ |
| 63 int n = width & ~MASK; \ |
| 64 if (n > 0) { \ |
| 65 ANY_SIMD(y_buf, u_buf, v_buf, dst_ptr, yuvconstants, n); \ |
| 66 } \ |
| 67 memcpy(temp, y_buf + n, r); \ |
| 68 memcpy(temp + 64, u_buf + (n >> UVSHIFT), SS(r, UVSHIFT)); \ |
| 69 memcpy(temp + 128, v_buf + (n >> UVSHIFT), SS(r, UVSHIFT)); \ |
| 70 ANY_SIMD(temp, temp + 64, temp + 128, temp + 192, \ |
| 71 yuvconstants, MASK + 1); \ |
| 72 memcpy(dst_ptr + (n >> DUVSHIFT) * BPP, temp + 192, \ |
| 73 SS(r, DUVSHIFT) * BPP); \ |
| 74 } |
| 75 |
| 76 #ifdef HAS_I422TOARGBROW_SSSE3 |
| 77 ANY31C(I422ToARGBRow_Any_SSSE3, I422ToARGBRow_SSSE3, 1, 0, 4, 7) |
| 78 ANY31C(I422ToABGRRow_Any_SSSE3, I422ToABGRRow_SSSE3, 1, 0, 4, 7) |
| 79 #endif |
| 80 #ifdef HAS_I444TOARGBROW_SSSE3 |
| 81 ANY31C(I444ToARGBRow_Any_SSSE3, I444ToARGBRow_SSSE3, 0, 0, 4, 7) |
| 82 ANY31C(I411ToARGBRow_Any_SSSE3, I411ToARGBRow_SSSE3, 2, 0, 4, 7) |
| 83 ANY31C(I422ToBGRARow_Any_SSSE3, I422ToBGRARow_SSSE3, 1, 0, 4, 7) |
| 84 ANY31C(I422ToRGBARow_Any_SSSE3, I422ToRGBARow_SSSE3, 1, 0, 4, 7) |
| 85 ANY31C(I422ToARGB4444Row_Any_SSSE3, I422ToARGB4444Row_SSSE3, 1, 0, 2, 7) |
| 86 ANY31C(I422ToARGB1555Row_Any_SSSE3, I422ToARGB1555Row_SSSE3, 1, 0, 2, 7) |
| 87 ANY31C(I422ToRGB565Row_Any_SSSE3, I422ToRGB565Row_SSSE3, 1, 0, 2, 7) |
| 88 ANY31C(I422ToRGB24Row_Any_SSSE3, I422ToRGB24Row_SSSE3, 1, 0, 3, 7) |
| 89 ANY31C(I422ToRAWRow_Any_SSSE3, I422ToRAWRow_SSSE3, 1, 0, 3, 7) |
| 90 #endif // HAS_I444TOARGBROW_SSSE3 |
| 91 #ifdef HAS_I444TOABGRROW_SSSE3 |
| 92 ANY31C(I444ToABGRRow_Any_SSSE3, I444ToABGRRow_SSSE3, 0, 0, 4, 7) |
| 93 #endif |
| 94 #ifdef HAS_I422TORGB24ROW_AVX2 |
| 95 ANY31C(I422ToRGB24Row_Any_AVX2, I422ToRGB24Row_AVX2, 1, 0, 3, 15) |
| 96 #endif |
| 97 #ifdef HAS_I422TORAWROW_AVX2 |
| 98 ANY31C(I422ToRAWRow_Any_AVX2, I422ToRAWRow_AVX2, 1, 0, 3, 15) |
| 99 #endif |
| 100 #ifdef HAS_I422TOARGBROW_AVX2 |
| 101 ANY31C(I422ToARGBRow_Any_AVX2, I422ToARGBRow_AVX2, 1, 0, 4, 15) |
| 102 #endif |
| 103 #ifdef HAS_I422TOBGRAROW_AVX2 |
| 104 ANY31C(I422ToBGRARow_Any_AVX2, I422ToBGRARow_AVX2, 1, 0, 4, 15) |
| 105 #endif |
| 106 #ifdef HAS_I422TORGBAROW_AVX2 |
| 107 ANY31C(I422ToRGBARow_Any_AVX2, I422ToRGBARow_AVX2, 1, 0, 4, 15) |
| 108 #endif |
| 109 #ifdef HAS_I422TOABGRROW_AVX2 |
| 110 ANY31C(I422ToABGRRow_Any_AVX2, I422ToABGRRow_AVX2, 1, 0, 4, 15) |
| 111 #endif |
| 112 #ifdef HAS_I444TOARGBROW_AVX2 |
| 113 ANY31C(I444ToARGBRow_Any_AVX2, I444ToARGBRow_AVX2, 0, 0, 4, 15) |
| 114 #endif |
| 115 #ifdef HAS_I444TOABGRROW_AVX2 |
| 116 ANY31C(I444ToABGRRow_Any_AVX2, I444ToABGRRow_AVX2, 0, 0, 4, 15) |
| 117 #endif |
| 118 #ifdef HAS_I411TOARGBROW_AVX2 |
| 119 ANY31C(I411ToARGBRow_Any_AVX2, I411ToARGBRow_AVX2, 2, 0, 4, 15) |
| 120 #endif |
| 121 #ifdef HAS_I422TOARGB4444ROW_AVX2 |
| 122 ANY31C(I422ToARGB4444Row_Any_AVX2, I422ToARGB4444Row_AVX2, 1, 0, 2, 7) |
| 123 #endif |
| 124 #ifdef HAS_I422TOARGB1555ROW_AVX2 |
| 125 ANY31C(I422ToARGB1555Row_Any_AVX2, I422ToARGB1555Row_AVX2, 1, 0, 2, 7) |
| 126 #endif |
| 127 #ifdef HAS_I422TORGB565ROW_AVX2 |
| 128 ANY31C(I422ToRGB565Row_Any_AVX2, I422ToRGB565Row_AVX2, 1, 0, 2, 7) |
| 129 #endif |
| 130 #ifdef HAS_I422TOARGBROW_NEON |
| 131 ANY31C(I444ToARGBRow_Any_NEON, I444ToARGBRow_NEON, 0, 0, 4, 7) |
| 132 ANY31C(I422ToARGBRow_Any_NEON, I422ToARGBRow_NEON, 1, 0, 4, 7) |
| 133 ANY31C(I411ToARGBRow_Any_NEON, I411ToARGBRow_NEON, 2, 0, 4, 7) |
| 134 ANY31C(I422ToBGRARow_Any_NEON, I422ToBGRARow_NEON, 1, 0, 4, 7) |
| 135 ANY31C(I422ToABGRRow_Any_NEON, I422ToABGRRow_NEON, 1, 0, 4, 7) |
| 136 ANY31C(I422ToRGBARow_Any_NEON, I422ToRGBARow_NEON, 1, 0, 4, 7) |
| 137 ANY31C(I422ToRGB24Row_Any_NEON, I422ToRGB24Row_NEON, 1, 0, 3, 7) |
| 138 ANY31C(I422ToRAWRow_Any_NEON, I422ToRAWRow_NEON, 1, 0, 3, 7) |
| 139 ANY31C(I422ToARGB4444Row_Any_NEON, I422ToARGB4444Row_NEON, 1, 0, 2, 7) |
| 140 ANY31C(I422ToARGB1555Row_Any_NEON, I422ToARGB1555Row_NEON, 1, 0, 2, 7) |
| 141 ANY31C(I422ToRGB565Row_Any_NEON, I422ToRGB565Row_NEON, 1, 0, 2, 7) |
| 142 #endif |
| 143 #undef ANY31C |
148 | 144 |
149 // Any 2 planes to 1. | 145 // Any 2 planes to 1. |
150 #define ANY21(NAMEANY, ANY_SIMD, UVSHIFT, SBPP, SBPP2, BPP, MASK) \ | 146 #define ANY21(NAMEANY, ANY_SIMD, UVSHIFT, SBPP, SBPP2, BPP, MASK) \ |
151 void NAMEANY(const uint8* y_buf, const uint8* uv_buf, \ | 147 void NAMEANY(const uint8* y_buf, const uint8* uv_buf, \ |
152 uint8* dst_ptr, int width) { \ | 148 uint8* dst_ptr, int width) { \ |
153 SIMD_ALIGNED(uint8 temp[64 * 3]); \ | 149 SIMD_ALIGNED(uint8 temp[64 * 3]); \ |
154 memset(temp, 0, 64 * 2); /* for msan */ \ | 150 memset(temp, 0, 64 * 2); /* for msan */ \ |
155 int r = width & MASK; \ | 151 int r = width & MASK; \ |
156 int n = width & ~MASK; \ | 152 int n = width & ~MASK; \ |
157 if (n > 0) { \ | 153 if (n > 0) { \ |
158 ANY_SIMD(y_buf, uv_buf, dst_ptr, n); \ | 154 ANY_SIMD(y_buf, uv_buf, dst_ptr, n); \ |
159 } \ | 155 } \ |
160 memcpy(temp, y_buf + n * SBPP, r * SBPP); \ | 156 memcpy(temp, y_buf + n * SBPP, r * SBPP); \ |
161 memcpy(temp + 64, uv_buf + (n >> UVSHIFT) * SBPP2, \ | 157 memcpy(temp + 64, uv_buf + (n >> UVSHIFT) * SBPP2, \ |
162 SS(r, UVSHIFT) * SBPP2); \ | 158 SS(r, UVSHIFT) * SBPP2); \ |
163 ANY_SIMD(temp, temp + 64, temp + 128, MASK + 1); \ | 159 ANY_SIMD(temp, temp + 64, temp + 128, MASK + 1); \ |
164 memcpy(dst_ptr + n * BPP, temp + 128, r * BPP); \ | 160 memcpy(dst_ptr + n * BPP, temp + 128, r * BPP); \ |
165 } | 161 } |
166 | 162 |
167 // Biplanar to RGB. | |
168 #ifdef HAS_NV12TOARGBROW_SSSE3 | |
169 ANY21(NV12ToARGBRow_Any_SSSE3, NV12ToARGBRow_SSSE3, 1, 1, 2, 4, 7) | |
170 ANY21(NV21ToARGBRow_Any_SSSE3, NV21ToARGBRow_SSSE3, 1, 1, 2, 4, 7) | |
171 #endif | |
172 #ifdef HAS_NV12TOARGBROW_AVX2 | |
173 ANY21(NV12ToARGBRow_Any_AVX2, NV12ToARGBRow_AVX2, 1, 1, 2, 4, 15) | |
174 ANY21(NV21ToARGBRow_Any_AVX2, NV21ToARGBRow_AVX2, 1, 1, 2, 4, 15) | |
175 #endif | |
176 #ifdef HAS_NV12TOARGBROW_NEON | |
177 ANY21(NV12ToARGBRow_Any_NEON, NV12ToARGBRow_NEON, 1, 1, 2, 4, 7) | |
178 ANY21(NV21ToARGBRow_Any_NEON, NV21ToARGBRow_NEON, 1, 1, 2, 4, 7) | |
179 #endif | |
180 #ifdef HAS_NV12TORGB565ROW_SSSE3 | |
181 ANY21(NV12ToRGB565Row_Any_SSSE3, NV12ToRGB565Row_SSSE3, 1, 1, 2, 2, 7) | |
182 ANY21(NV21ToRGB565Row_Any_SSSE3, NV21ToRGB565Row_SSSE3, 1, 1, 2, 2, 7) | |
183 #endif | |
184 #ifdef HAS_NV12TORGB565ROW_AVX2 | |
185 ANY21(NV12ToRGB565Row_Any_AVX2, NV12ToRGB565Row_AVX2, 1, 1, 2, 2, 15) | |
186 ANY21(NV21ToRGB565Row_Any_AVX2, NV21ToRGB565Row_AVX2, 1, 1, 2, 2, 15) | |
187 #endif | |
188 #ifdef HAS_NV12TORGB565ROW_NEON | |
189 ANY21(NV12ToRGB565Row_Any_NEON, NV12ToRGB565Row_NEON, 1, 1, 2, 2, 7) | |
190 ANY21(NV21ToRGB565Row_Any_NEON, NV21ToRGB565Row_NEON, 1, 1, 2, 2, 7) | |
191 #endif | |
192 | |
193 // Merge functions. | 163 // Merge functions. |
194 #ifdef HAS_MERGEUVROW_SSE2 | 164 #ifdef HAS_MERGEUVROW_SSE2 |
195 ANY21(MergeUVRow_Any_SSE2, MergeUVRow_SSE2, 0, 1, 1, 2, 15) | 165 ANY21(MergeUVRow_Any_SSE2, MergeUVRow_SSE2, 0, 1, 1, 2, 15) |
196 #endif | 166 #endif |
197 #ifdef HAS_MERGEUVROW_AVX2 | 167 #ifdef HAS_MERGEUVROW_AVX2 |
198 ANY21(MergeUVRow_Any_AVX2, MergeUVRow_AVX2, 0, 1, 1, 2, 31) | 168 ANY21(MergeUVRow_Any_AVX2, MergeUVRow_AVX2, 0, 1, 1, 2, 31) |
199 #endif | 169 #endif |
200 #ifdef HAS_MERGEUVROW_NEON | 170 #ifdef HAS_MERGEUVROW_NEON |
201 ANY21(MergeUVRow_Any_NEON, MergeUVRow_NEON, 0, 1, 1, 2, 15) | 171 ANY21(MergeUVRow_Any_NEON, MergeUVRow_NEON, 0, 1, 1, 2, 15) |
202 #endif | 172 #endif |
(...skipping 39 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
242 ANY21(SobelToPlaneRow_Any_NEON, SobelToPlaneRow_NEON, 0, 1, 1, 1, 15) | 212 ANY21(SobelToPlaneRow_Any_NEON, SobelToPlaneRow_NEON, 0, 1, 1, 1, 15) |
243 #endif | 213 #endif |
244 #ifdef HAS_SOBELXYROW_SSE2 | 214 #ifdef HAS_SOBELXYROW_SSE2 |
245 ANY21(SobelXYRow_Any_SSE2, SobelXYRow_SSE2, 0, 1, 1, 4, 15) | 215 ANY21(SobelXYRow_Any_SSE2, SobelXYRow_SSE2, 0, 1, 1, 4, 15) |
246 #endif | 216 #endif |
247 #ifdef HAS_SOBELXYROW_NEON | 217 #ifdef HAS_SOBELXYROW_NEON |
248 ANY21(SobelXYRow_Any_NEON, SobelXYRow_NEON, 0, 1, 1, 4, 7) | 218 ANY21(SobelXYRow_Any_NEON, SobelXYRow_NEON, 0, 1, 1, 4, 7) |
249 #endif | 219 #endif |
250 #undef ANY21 | 220 #undef ANY21 |
251 | 221 |
| 222 // Any 2 planes to 1 with yuvconstants |
| 223 #define ANY21C(NAMEANY, ANY_SIMD, UVSHIFT, SBPP, SBPP2, BPP, MASK) \ |
| 224 void NAMEANY(const uint8* y_buf, const uint8* uv_buf, \ |
| 225 uint8* dst_ptr, struct YuvConstants* yuvconstants, \ |
| 226 int width) { \ |
| 227 SIMD_ALIGNED(uint8 temp[64 * 3]); \ |
| 228 memset(temp, 0, 64 * 2); /* for msan */ \ |
| 229 int r = width & MASK; \ |
| 230 int n = width & ~MASK; \ |
| 231 if (n > 0) { \ |
| 232 ANY_SIMD(y_buf, uv_buf, dst_ptr, yuvconstants, n); \ |
| 233 } \ |
| 234 memcpy(temp, y_buf + n * SBPP, r * SBPP); \ |
| 235 memcpy(temp + 64, uv_buf + (n >> UVSHIFT) * SBPP2, \ |
| 236 SS(r, UVSHIFT) * SBPP2); \ |
| 237 ANY_SIMD(temp, temp + 64, temp + 128, yuvconstants, MASK + 1); \ |
| 238 memcpy(dst_ptr + n * BPP, temp + 128, r * BPP); \ |
| 239 } |
| 240 |
| 241 // Biplanar to RGB. |
| 242 #ifdef HAS_NV12TOARGBROW_SSSE3 |
| 243 ANY21C(NV12ToARGBRow_Any_SSSE3, NV12ToARGBRow_SSSE3, 1, 1, 2, 4, 7) |
| 244 #endif |
| 245 #ifdef HAS_NV12TOARGBROW_AVX2 |
| 246 ANY21C(NV12ToARGBRow_Any_AVX2, NV12ToARGBRow_AVX2, 1, 1, 2, 4, 15) |
| 247 #endif |
| 248 #ifdef HAS_NV12TOARGBROW_NEON |
| 249 ANY21C(NV12ToARGBRow_Any_NEON, NV12ToARGBRow_NEON, 1, 1, 2, 4, 7) |
| 250 #endif |
| 251 #ifdef HAS_NV12TORGB565ROW_SSSE3 |
| 252 ANY21C(NV12ToRGB565Row_Any_SSSE3, NV12ToRGB565Row_SSSE3, 1, 1, 2, 2, 7) |
| 253 #endif |
| 254 #ifdef HAS_NV12TORGB565ROW_AVX2 |
| 255 ANY21C(NV12ToRGB565Row_Any_AVX2, NV12ToRGB565Row_AVX2, 1, 1, 2, 2, 15) |
| 256 #endif |
| 257 #ifdef HAS_NV12TORGB565ROW_NEON |
| 258 ANY21C(NV12ToRGB565Row_Any_NEON, NV12ToRGB565Row_NEON, 1, 1, 2, 2, 7) |
| 259 #endif |
| 260 #undef ANY21C |
| 261 |
252 // Any 1 to 1. | 262 // Any 1 to 1. |
253 #define ANY11(NAMEANY, ANY_SIMD, UVSHIFT, SBPP, BPP, MASK) \ | 263 #define ANY11(NAMEANY, ANY_SIMD, UVSHIFT, SBPP, BPP, MASK) \ |
254 void NAMEANY(const uint8* src_ptr, uint8* dst_ptr, int width) { \ | 264 void NAMEANY(const uint8* src_ptr, uint8* dst_ptr, int width) { \ |
255 SIMD_ALIGNED(uint8 temp[128 * 2]); \ | 265 SIMD_ALIGNED(uint8 temp[128 * 2]); \ |
256 memset(temp, 0, 128); /* for YUY2 and msan */ \ | 266 memset(temp, 0, 128); /* for YUY2 and msan */ \ |
257 int r = width & MASK; \ | 267 int r = width & MASK; \ |
258 int n = width & ~MASK; \ | 268 int n = width & ~MASK; \ |
259 if (n > 0) { \ | 269 if (n > 0) { \ |
260 ANY_SIMD(src_ptr, dst_ptr, n); \ | 270 ANY_SIMD(src_ptr, dst_ptr, n); \ |
261 } \ | 271 } \ |
(...skipping 28 matching lines...) Expand all Loading... |
290 #endif | 300 #endif |
291 #if defined(HAS_J400TOARGBROW_AVX2) | 301 #if defined(HAS_J400TOARGBROW_AVX2) |
292 ANY11(J400ToARGBRow_Any_AVX2, J400ToARGBRow_AVX2, 0, 1, 4, 15) | 302 ANY11(J400ToARGBRow_Any_AVX2, J400ToARGBRow_AVX2, 0, 1, 4, 15) |
293 #endif | 303 #endif |
294 #if defined(HAS_I400TOARGBROW_SSE2) | 304 #if defined(HAS_I400TOARGBROW_SSE2) |
295 ANY11(I400ToARGBRow_Any_SSE2, I400ToARGBRow_SSE2, 0, 1, 4, 7) | 305 ANY11(I400ToARGBRow_Any_SSE2, I400ToARGBRow_SSE2, 0, 1, 4, 7) |
296 #endif | 306 #endif |
297 #if defined(HAS_I400TOARGBROW_AVX2) | 307 #if defined(HAS_I400TOARGBROW_AVX2) |
298 ANY11(I400ToARGBRow_Any_AVX2, I400ToARGBRow_AVX2, 0, 1, 4, 15) | 308 ANY11(I400ToARGBRow_Any_AVX2, I400ToARGBRow_AVX2, 0, 1, 4, 15) |
299 #endif | 309 #endif |
300 #if defined(HAS_YUY2TOARGBROW_SSSE3) | 310 #if defined(HAS_RGB24TOARGBROW_SSSE3) |
301 ANY11(YUY2ToARGBRow_Any_SSSE3, YUY2ToARGBRow_SSSE3, 1, 4, 4, 15) | |
302 ANY11(UYVYToARGBRow_Any_SSSE3, UYVYToARGBRow_SSSE3, 1, 4, 4, 15) | |
303 ANY11(RGB24ToARGBRow_Any_SSSE3, RGB24ToARGBRow_SSSE3, 0, 3, 4, 15) | 311 ANY11(RGB24ToARGBRow_Any_SSSE3, RGB24ToARGBRow_SSSE3, 0, 3, 4, 15) |
304 ANY11(RAWToARGBRow_Any_SSSE3, RAWToARGBRow_SSSE3, 0, 3, 4, 15) | 312 ANY11(RAWToARGBRow_Any_SSSE3, RAWToARGBRow_SSSE3, 0, 3, 4, 15) |
305 ANY11(RGB565ToARGBRow_Any_SSE2, RGB565ToARGBRow_SSE2, 0, 2, 4, 7) | 313 ANY11(RGB565ToARGBRow_Any_SSE2, RGB565ToARGBRow_SSE2, 0, 2, 4, 7) |
306 ANY11(ARGB1555ToARGBRow_Any_SSE2, ARGB1555ToARGBRow_SSE2, 0, 2, 4, 7) | 314 ANY11(ARGB1555ToARGBRow_Any_SSE2, ARGB1555ToARGBRow_SSE2, 0, 2, 4, 7) |
307 ANY11(ARGB4444ToARGBRow_Any_SSE2, ARGB4444ToARGBRow_SSE2, 0, 2, 4, 7) | 315 ANY11(ARGB4444ToARGBRow_Any_SSE2, ARGB4444ToARGBRow_SSE2, 0, 2, 4, 7) |
308 #endif | 316 #endif |
309 #if defined(HAS_RGB565TOARGBROW_AVX2) | 317 #if defined(HAS_RGB565TOARGBROW_AVX2) |
310 ANY11(RGB565ToARGBRow_Any_AVX2, RGB565ToARGBRow_AVX2, 0, 2, 4, 15) | 318 ANY11(RGB565ToARGBRow_Any_AVX2, RGB565ToARGBRow_AVX2, 0, 2, 4, 15) |
311 #endif | 319 #endif |
312 #if defined(HAS_ARGB1555TOARGBROW_AVX2) | 320 #if defined(HAS_ARGB1555TOARGBROW_AVX2) |
313 ANY11(ARGB1555ToARGBRow_Any_AVX2, ARGB1555ToARGBRow_AVX2, 0, 2, 4, 15) | 321 ANY11(ARGB1555ToARGBRow_Any_AVX2, ARGB1555ToARGBRow_AVX2, 0, 2, 4, 15) |
314 #endif | 322 #endif |
315 #if defined(HAS_ARGB4444TOARGBROW_AVX2) | 323 #if defined(HAS_ARGB4444TOARGBROW_AVX2) |
316 ANY11(ARGB4444ToARGBRow_Any_AVX2, ARGB4444ToARGBRow_AVX2, 0, 2, 4, 15) | 324 ANY11(ARGB4444ToARGBRow_Any_AVX2, ARGB4444ToARGBRow_AVX2, 0, 2, 4, 15) |
317 #endif | 325 #endif |
318 #if defined(HAS_YUY2TOARGBROW_AVX2) | |
319 ANY11(YUY2ToARGBRow_Any_AVX2, YUY2ToARGBRow_AVX2, 1, 4, 4, 31) | |
320 ANY11(UYVYToARGBRow_Any_AVX2, UYVYToARGBRow_AVX2, 1, 4, 4, 31) | |
321 #endif | |
322 #if defined(HAS_ARGBTORGB24ROW_NEON) | 326 #if defined(HAS_ARGBTORGB24ROW_NEON) |
323 ANY11(ARGBToRGB24Row_Any_NEON, ARGBToRGB24Row_NEON, 0, 4, 3, 7) | 327 ANY11(ARGBToRGB24Row_Any_NEON, ARGBToRGB24Row_NEON, 0, 4, 3, 7) |
324 ANY11(ARGBToRAWRow_Any_NEON, ARGBToRAWRow_NEON, 0, 4, 3, 7) | 328 ANY11(ARGBToRAWRow_Any_NEON, ARGBToRAWRow_NEON, 0, 4, 3, 7) |
325 ANY11(ARGBToRGB565Row_Any_NEON, ARGBToRGB565Row_NEON, 0, 4, 2, 7) | 329 ANY11(ARGBToRGB565Row_Any_NEON, ARGBToRGB565Row_NEON, 0, 4, 2, 7) |
326 ANY11(ARGBToARGB1555Row_Any_NEON, ARGBToARGB1555Row_NEON, 0, 4, 2, 7) | 330 ANY11(ARGBToARGB1555Row_Any_NEON, ARGBToARGB1555Row_NEON, 0, 4, 2, 7) |
327 ANY11(ARGBToARGB4444Row_Any_NEON, ARGBToARGB4444Row_NEON, 0, 4, 2, 7) | 331 ANY11(ARGBToARGB4444Row_Any_NEON, ARGBToARGB4444Row_NEON, 0, 4, 2, 7) |
328 ANY11(J400ToARGBRow_Any_NEON, J400ToARGBRow_NEON, 0, 1, 4, 7) | 332 ANY11(J400ToARGBRow_Any_NEON, J400ToARGBRow_NEON, 0, 1, 4, 7) |
329 ANY11(I400ToARGBRow_Any_NEON, I400ToARGBRow_NEON, 0, 1, 4, 7) | 333 ANY11(I400ToARGBRow_Any_NEON, I400ToARGBRow_NEON, 0, 1, 4, 7) |
330 ANY11(YUY2ToARGBRow_Any_NEON, YUY2ToARGBRow_NEON, 1, 4, 4, 7) | |
331 ANY11(UYVYToARGBRow_Any_NEON, UYVYToARGBRow_NEON, 1, 4, 4, 7) | |
332 #endif | 334 #endif |
333 #ifdef HAS_ARGBTOYROW_AVX2 | 335 #ifdef HAS_ARGBTOYROW_AVX2 |
334 ANY11(ARGBToYRow_Any_AVX2, ARGBToYRow_AVX2, 0, 4, 1, 31) | 336 ANY11(ARGBToYRow_Any_AVX2, ARGBToYRow_AVX2, 0, 4, 1, 31) |
335 #endif | 337 #endif |
336 #ifdef HAS_ARGBTOYJROW_AVX2 | 338 #ifdef HAS_ARGBTOYJROW_AVX2 |
337 ANY11(ARGBToYJRow_Any_AVX2, ARGBToYJRow_AVX2, 0, 4, 1, 31) | 339 ANY11(ARGBToYJRow_Any_AVX2, ARGBToYJRow_AVX2, 0, 4, 1, 31) |
338 #endif | 340 #endif |
339 #ifdef HAS_UYVYTOYROW_AVX2 | 341 #ifdef HAS_UYVYTOYROW_AVX2 |
340 ANY11(UYVYToYRow_Any_AVX2, UYVYToYRow_AVX2, 0, 2, 1, 31) | 342 ANY11(UYVYToYRow_Any_AVX2, UYVYToYRow_AVX2, 0, 2, 1, 31) |
341 #endif | 343 #endif |
(...skipping 77 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
419 ANY11(ARGBAttenuateRow_Any_AVX2, ARGBAttenuateRow_AVX2, 0, 4, 4, 7) | 421 ANY11(ARGBAttenuateRow_Any_AVX2, ARGBAttenuateRow_AVX2, 0, 4, 4, 7) |
420 #endif | 422 #endif |
421 #ifdef HAS_ARGBUNATTENUATEROW_AVX2 | 423 #ifdef HAS_ARGBUNATTENUATEROW_AVX2 |
422 ANY11(ARGBUnattenuateRow_Any_AVX2, ARGBUnattenuateRow_AVX2, 0, 4, 4, 7) | 424 ANY11(ARGBUnattenuateRow_Any_AVX2, ARGBUnattenuateRow_AVX2, 0, 4, 4, 7) |
423 #endif | 425 #endif |
424 #ifdef HAS_ARGBATTENUATEROW_NEON | 426 #ifdef HAS_ARGBATTENUATEROW_NEON |
425 ANY11(ARGBAttenuateRow_Any_NEON, ARGBAttenuateRow_NEON, 0, 4, 4, 7) | 427 ANY11(ARGBAttenuateRow_Any_NEON, ARGBAttenuateRow_NEON, 0, 4, 4, 7) |
426 #endif | 428 #endif |
427 #undef ANY11 | 429 #undef ANY11 |
428 | 430 |
| 431 // Any 1 to 1 with yuvconstants |
| 432 #define ANY11C(NAMEANY, ANY_SIMD, UVSHIFT, SBPP, BPP, MASK) \ |
| 433 void NAMEANY(const uint8* src_ptr, uint8* dst_ptr, \ |
| 434 struct YuvConstants* yuvconstants, int width) { \ |
| 435 SIMD_ALIGNED(uint8 temp[128 * 2]); \ |
| 436 memset(temp, 0, 128); /* for YUY2 and msan */ \ |
| 437 int r = width & MASK; \ |
| 438 int n = width & ~MASK; \ |
| 439 if (n > 0) { \ |
| 440 ANY_SIMD(src_ptr, dst_ptr, yuvconstants, n); \ |
| 441 } \ |
| 442 memcpy(temp, src_ptr + (n >> UVSHIFT) * SBPP, SS(r, UVSHIFT) * SBPP); \ |
| 443 ANY_SIMD(temp, temp + 128, yuvconstants, MASK + 1); \ |
| 444 memcpy(dst_ptr + n * BPP, temp + 128, r * BPP); \ |
| 445 } |
| 446 #if defined(HAS_YUY2TOARGBROW_SSSE3) |
| 447 ANY11C(YUY2ToARGBRow_Any_SSSE3, YUY2ToARGBRow_SSSE3, 1, 4, 4, 15) |
| 448 ANY11C(UYVYToARGBRow_Any_SSSE3, UYVYToARGBRow_SSSE3, 1, 4, 4, 15) |
| 449 #endif |
| 450 #if defined(HAS_YUY2TOARGBROW_AVX2) |
| 451 ANY11C(YUY2ToARGBRow_Any_AVX2, YUY2ToARGBRow_AVX2, 1, 4, 4, 31) |
| 452 ANY11C(UYVYToARGBRow_Any_AVX2, UYVYToARGBRow_AVX2, 1, 4, 4, 31) |
| 453 #endif |
| 454 #if defined(HAS_YUY2TOARGBROW_NEON) |
| 455 ANY11C(YUY2ToARGBRow_Any_NEON, YUY2ToARGBRow_NEON, 1, 4, 4, 7) |
| 456 ANY11C(UYVYToARGBRow_Any_NEON, UYVYToARGBRow_NEON, 1, 4, 4, 7) |
| 457 #endif |
| 458 #undef ANY11C |
| 459 |
429 // Any 1 to 1 blended. | 460 // Any 1 to 1 blended. |
430 #define ANY11B(NAMEANY, ANY_SIMD, UVSHIFT, SBPP, BPP, MASK) \ | 461 #define ANY11B(NAMEANY, ANY_SIMD, UVSHIFT, SBPP, BPP, MASK) \ |
431 void NAMEANY(const uint8* src_ptr, uint8* dst_ptr, int width) { \ | 462 void NAMEANY(const uint8* src_ptr, uint8* dst_ptr, int width) { \ |
432 SIMD_ALIGNED(uint8 temp[128 * 2]); \ | 463 SIMD_ALIGNED(uint8 temp[128 * 2]); \ |
433 memset(temp, 0, 128 * 2); /* for YUY2 and msan */ \ | 464 memset(temp, 0, 128 * 2); /* for YUY2 and msan */ \ |
434 int r = width & MASK; \ | 465 int r = width & MASK; \ |
435 int n = width & ~MASK; \ | 466 int n = width & ~MASK; \ |
436 if (n > 0) { \ | 467 if (n > 0) { \ |
437 ANY_SIMD(src_ptr, dst_ptr, n); \ | 468 ANY_SIMD(src_ptr, dst_ptr, n); \ |
438 } \ | 469 } \ |
(...skipping 290 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
729 #endif | 760 #endif |
730 #ifdef HAS_UYVYTOUVROW_NEON | 761 #ifdef HAS_UYVYTOUVROW_NEON |
731 ANY12S(UYVYToUVRow_Any_NEON, UYVYToUVRow_NEON, 1, 4, 15) | 762 ANY12S(UYVYToUVRow_Any_NEON, UYVYToUVRow_NEON, 1, 4, 15) |
732 #endif | 763 #endif |
733 #undef ANY12S | 764 #undef ANY12S |
734 | 765 |
735 #ifdef __cplusplus | 766 #ifdef __cplusplus |
736 } // extern "C" | 767 } // extern "C" |
737 } // namespace libyuv | 768 } // namespace libyuv |
738 #endif | 769 #endif |
OLD | NEW |