OLD | NEW |
1 /* | 1 /* |
2 * Copyright 2013 The LibYuv Project Authors. All rights reserved. | 2 * Copyright 2013 The LibYuv Project Authors. All rights reserved. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
9 */ | 9 */ |
10 | 10 |
(...skipping 170 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
181 "jg 1b \n" | 181 "jg 1b \n" |
182 : "+r"(src_ptr), // %0 | 182 : "+r"(src_ptr), // %0 |
183 "+r"(dst_ptr), // %1 | 183 "+r"(dst_ptr), // %1 |
184 "+r"(dst_width) // %2 | 184 "+r"(dst_width) // %2 |
185 : "r"((intptr_t)(src_stride)) // %3 | 185 : "r"((intptr_t)(src_stride)) // %3 |
186 : "memory", "cc", NACL_R14 | 186 : "memory", "cc", NACL_R14 |
187 "xmm0", "xmm1", "xmm2", "xmm3", "xmm5" | 187 "xmm0", "xmm1", "xmm2", "xmm3", "xmm5" |
188 ); | 188 ); |
189 } | 189 } |
190 | 190 |
| 191 #ifdef HAS_SCALEROWDOWN2_AVX2 |
| 192 void ScaleRowDown2_AVX2(const uint8* src_ptr, ptrdiff_t src_stride, |
| 193 uint8* dst_ptr, int dst_width) { |
| 194 asm volatile ( |
| 195 LABELALIGN |
| 196 "1: \n" |
| 197 "vmovdqu " MEMACCESS(0) ",%%ymm0 \n" |
| 198 "vmovdqu " MEMACCESS2(0x20,0) ",%%ymm1 \n" |
| 199 "lea " MEMLEA(0x40,0) ",%0 \n" |
| 200 "vpsrlw $0x8,%%ymm0,%%ymm0 \n" |
| 201 "vpsrlw $0x8,%%ymm1,%%ymm1 \n" |
| 202 "vpackuswb %%ymm1,%%ymm0,%%ymm0 \n" |
| 203 "vpermq $0xd8,%%ymm0,%%ymm0 \n" |
| 204 "vmovdqu %%ymm0," MEMACCESS(1) " \n" |
| 205 "lea " MEMLEA(0x20,1) ",%1 \n" |
| 206 "sub $0x20,%2 \n" |
| 207 "jg 1b \n" |
| 208 "vzeroupper \n" |
| 209 : "+r"(src_ptr), // %0 |
| 210 "+r"(dst_ptr), // %1 |
| 211 "+r"(dst_width) // %2 |
| 212 :: "memory", "cc", "xmm0", "xmm1" |
| 213 ); |
| 214 } |
| 215 |
| 216 void ScaleRowDown2Linear_AVX2(const uint8* src_ptr, ptrdiff_t src_stride, |
| 217 uint8* dst_ptr, int dst_width) { |
| 218 asm volatile ( |
| 219 "vpcmpeqb %%ymm4,%%ymm4,%%ymm4 \n" |
| 220 "vpsrlw $0xf,%%ymm4,%%ymm4 \n" |
| 221 "vpackuswb %%ymm4,%%ymm4,%%ymm4 \n" |
| 222 "vpxor %%ymm5,%%ymm5,%%ymm5 \n" |
| 223 |
| 224 LABELALIGN |
| 225 "1: \n" |
| 226 "vmovdqu " MEMACCESS(0) ",%%ymm0 \n" |
| 227 "vmovdqu " MEMACCESS2(0x20, 0) ",%%ymm1 \n" |
| 228 "lea " MEMLEA(0x40,0) ",%0 \n" |
| 229 "vpmaddubsw %%ymm4,%%ymm0,%%ymm0 \n" |
| 230 "vpmaddubsw %%ymm4,%%ymm1,%%ymm1 \n" |
| 231 "vpavgw %%ymm5,%%ymm0,%%ymm0 \n" |
| 232 "vpavgw %%ymm5,%%ymm1,%%ymm1 \n" |
| 233 "vpackuswb %%ymm1,%%ymm0,%%ymm0 \n" |
| 234 "vpermq $0xd8,%%ymm0,%%ymm0 \n" |
| 235 "vmovdqu %%ymm0," MEMACCESS(1) " \n" |
| 236 "lea " MEMLEA(0x20,1) ",%1 \n" |
| 237 "sub $0x20,%2 \n" |
| 238 "jg 1b \n" |
| 239 "vzeroupper \n" |
| 240 : "+r"(src_ptr), // %0 |
| 241 "+r"(dst_ptr), // %1 |
| 242 "+r"(dst_width) // %2 |
| 243 :: "memory", "cc", "xmm0", "xmm1", "xmm4", "xmm5" |
| 244 ); |
| 245 } |
| 246 |
| 247 void ScaleRowDown2Box_AVX2(const uint8* src_ptr, ptrdiff_t src_stride, |
| 248 uint8* dst_ptr, int dst_width) { |
| 249 asm volatile ( |
| 250 "vpcmpeqb %%ymm4,%%ymm4,%%ymm4 \n" |
| 251 "vpsrlw $0xf,%%ymm4,%%ymm4 \n" |
| 252 "vpackuswb %%ymm4,%%ymm4,%%ymm4 \n" |
| 253 "vpxor %%ymm5,%%ymm5,%%ymm5 \n" |
| 254 |
| 255 LABELALIGN |
| 256 "1: \n" |
| 257 "vmovdqu " MEMACCESS(0) ",%%ymm0 \n" |
| 258 "vmovdqu " MEMACCESS2(0x20,0) ",%%ymm1 \n" |
| 259 MEMOPREG(vmovdqu,0x00,0,3,1,ymm2) // vmovdqu (%0,%3,1),%%ymm2 |
| 260 MEMOPREG(vmovdqu,0x20,0,3,1,ymm3) // vmovdqu 0x20(%0,%3,1),%%ymm3 |
| 261 "lea " MEMLEA(0x40,0) ",%0 \n" |
| 262 "vpmaddubsw %%ymm4,%%ymm0,%%ymm0 \n" |
| 263 "vpmaddubsw %%ymm4,%%ymm1,%%ymm1 \n" |
| 264 "vpmaddubsw %%ymm4,%%ymm2,%%ymm2 \n" |
| 265 "vpmaddubsw %%ymm4,%%ymm3,%%ymm3 \n" |
| 266 "vpaddw %%ymm2,%%ymm0,%%ymm0 \n" |
| 267 "vpaddw %%ymm3,%%ymm1,%%ymm1 \n" |
| 268 "vpsrlw $0x1,%%ymm0,%%ymm0 \n" |
| 269 "vpsrlw $0x1,%%ymm1,%%ymm1 \n" |
| 270 "vpavgw %%ymm5,%%ymm0,%%ymm0 \n" |
| 271 "vpavgw %%ymm5,%%ymm1,%%ymm1 \n" |
| 272 "vpackuswb %%ymm1,%%ymm0,%%ymm0 \n" |
| 273 "vpermq $0xd8,%%ymm0,%%ymm0 \n" |
| 274 "vmovdqu %%ymm0," MEMACCESS(1) " \n" |
| 275 "lea " MEMLEA(0x20,1) ",%1 \n" |
| 276 "sub $0x20,%2 \n" |
| 277 "jg 1b \n" |
| 278 "vzeroupper \n" |
| 279 : "+r"(src_ptr), // %0 |
| 280 "+r"(dst_ptr), // %1 |
| 281 "+r"(dst_width) // %2 |
| 282 : "r"((intptr_t)(src_stride)) // %3 |
| 283 : "memory", "cc", NACL_R14 |
| 284 "xmm0", "xmm1", "xmm2", "xmm3", "xmm5" |
| 285 ); |
| 286 } |
| 287 #endif // HAS_SCALEROWDOWN2_AVX2 |
| 288 |
191 void ScaleRowDown4_SSE2(const uint8* src_ptr, ptrdiff_t src_stride, | 289 void ScaleRowDown4_SSE2(const uint8* src_ptr, ptrdiff_t src_stride, |
192 uint8* dst_ptr, int dst_width) { | 290 uint8* dst_ptr, int dst_width) { |
193 asm volatile ( | 291 asm volatile ( |
194 "pcmpeqb %%xmm5,%%xmm5 \n" | 292 "pcmpeqb %%xmm5,%%xmm5 \n" |
195 "psrld $0x18,%%xmm5 \n" | 293 "psrld $0x18,%%xmm5 \n" |
196 "pslld $0x10,%%xmm5 \n" | 294 "pslld $0x10,%%xmm5 \n" |
197 | 295 |
198 LABELALIGN | 296 LABELALIGN |
199 "1: \n" | 297 "1: \n" |
200 "movdqu " MEMACCESS(0) ",%%xmm0 \n" | 298 "movdqu " MEMACCESS(0) ",%%xmm0 \n" |
(...skipping 896 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1097 ); | 1195 ); |
1098 return num; | 1196 return num; |
1099 } | 1197 } |
1100 | 1198 |
1101 #endif // defined(__x86_64__) || defined(__i386__) | 1199 #endif // defined(__x86_64__) || defined(__i386__) |
1102 | 1200 |
1103 #ifdef __cplusplus | 1201 #ifdef __cplusplus |
1104 } // extern "C" | 1202 } // extern "C" |
1105 } // namespace libyuv | 1203 } // namespace libyuv |
1106 #endif | 1204 #endif |
OLD | NEW |