| OLD | NEW |
| 1 /* | 1 /* |
| 2 * Copyright 2013 The LibYuv Project Authors. All rights reserved. | 2 * Copyright 2013 The LibYuv Project Authors. All rights reserved. |
| 3 * | 3 * |
| 4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
| 5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
| 6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
| 7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
| 8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
| 9 */ | 9 */ |
| 10 | 10 |
| (...skipping 268 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 279 : "+r"(src_ptr), // %0 | 279 : "+r"(src_ptr), // %0 |
| 280 "+r"(dst_ptr), // %1 | 280 "+r"(dst_ptr), // %1 |
| 281 "+r"(dst_width) // %2 | 281 "+r"(dst_width) // %2 |
| 282 : "r"((intptr_t)(src_stride)) // %3 | 282 : "r"((intptr_t)(src_stride)) // %3 |
| 283 : "memory", "cc", NACL_R14 | 283 : "memory", "cc", NACL_R14 |
| 284 "xmm0", "xmm1", "xmm2", "xmm3", "xmm5" | 284 "xmm0", "xmm1", "xmm2", "xmm3", "xmm5" |
| 285 ); | 285 ); |
| 286 } | 286 } |
| 287 #endif // HAS_SCALEROWDOWN2_AVX2 | 287 #endif // HAS_SCALEROWDOWN2_AVX2 |
| 288 | 288 |
| 289 void ScaleRowDown4_SSE2(const uint8* src_ptr, ptrdiff_t src_stride, | 289 void ScaleRowDown4_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride, |
| 290 uint8* dst_ptr, int dst_width) { | 290 uint8* dst_ptr, int dst_width) { |
| 291 asm volatile ( | 291 asm volatile ( |
| 292 "pcmpeqb %%xmm5,%%xmm5 \n" | 292 "pcmpeqb %%xmm5,%%xmm5 \n" |
| 293 "psrld $0x18,%%xmm5 \n" | 293 "psrld $0x18,%%xmm5 \n" |
| 294 "pslld $0x10,%%xmm5 \n" | 294 "pslld $0x10,%%xmm5 \n" |
| 295 | 295 |
| 296 LABELALIGN | 296 LABELALIGN |
| 297 "1: \n" | 297 "1: \n" |
| 298 "movdqu " MEMACCESS(0) ",%%xmm0 \n" | 298 "movdqu " MEMACCESS(0) ",%%xmm0 \n" |
| 299 "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n" | 299 "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n" |
| 300 "lea " MEMLEA(0x20,0) ",%0 \n" | 300 "lea " MEMLEA(0x20,0) ",%0 \n" |
| 301 "pand %%xmm5,%%xmm0 \n" | 301 "pand %%xmm5,%%xmm0 \n" |
| 302 "pand %%xmm5,%%xmm1 \n" | 302 "pand %%xmm5,%%xmm1 \n" |
| 303 "packuswb %%xmm1,%%xmm0 \n" | 303 "packuswb %%xmm1,%%xmm0 \n" |
| 304 "psrlw $0x8,%%xmm0 \n" | 304 "psrlw $0x8,%%xmm0 \n" |
| 305 "packuswb %%xmm0,%%xmm0 \n" | 305 "packuswb %%xmm0,%%xmm0 \n" |
| 306 "movq %%xmm0," MEMACCESS(1) " \n" | 306 "movq %%xmm0," MEMACCESS(1) " \n" |
| 307 "lea " MEMLEA(0x8,1) ",%1 \n" | 307 "lea " MEMLEA(0x8,1) ",%1 \n" |
| 308 "sub $0x8,%2 \n" | 308 "sub $0x8,%2 \n" |
| 309 "jg 1b \n" | 309 "jg 1b \n" |
| 310 : "+r"(src_ptr), // %0 | 310 : "+r"(src_ptr), // %0 |
| 311 "+r"(dst_ptr), // %1 | 311 "+r"(dst_ptr), // %1 |
| 312 "+r"(dst_width) // %2 | 312 "+r"(dst_width) // %2 |
| 313 :: "memory", "cc", "xmm0", "xmm1", "xmm5" | 313 :: "memory", "cc", "xmm0", "xmm1", "xmm5" |
| 314 ); | 314 ); |
| 315 } | 315 } |
| 316 | 316 |
| 317 void ScaleRowDown4Box_SSE2(const uint8* src_ptr, ptrdiff_t src_stride, | 317 void ScaleRowDown4Box_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride, |
| 318 uint8* dst_ptr, int dst_width) { | 318 uint8* dst_ptr, int dst_width) { |
| 319 intptr_t stridex3 = 0; | 319 intptr_t stridex3 = 0; |
| 320 asm volatile ( | 320 asm volatile ( |
| 321 "pcmpeqb %%xmm7,%%xmm7 \n" | 321 "pcmpeqb %%xmm4,%%xmm4 \n" |
| 322 "psrlw $0x8,%%xmm7 \n" | 322 "psrlw $0xf,%%xmm4 \n" |
| 323 "movdqa %%xmm4,%%xmm5 \n" |
| 324 "packuswb %%xmm4,%%xmm4 \n" |
| 325 "psllw $0x3,%%xmm5 \n" |
| 323 "lea " MEMLEA4(0x00,4,4,2) ",%3 \n" | 326 "lea " MEMLEA4(0x00,4,4,2) ",%3 \n" |
| 324 | 327 |
| 325 LABELALIGN | 328 LABELALIGN |
| 326 "1: \n" | 329 "1: \n" |
| 327 "movdqu " MEMACCESS(0) ",%%xmm0 \n" | 330 "movdqu " MEMACCESS(0) ",%%xmm0 \n" |
| 328 "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n" | 331 "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n" |
| 329 MEMOPREG(movdqu,0x00,0,4,1,xmm2) // movdqu (%0,%4,1),%%xmm2 | 332 MEMOPREG(movdqu,0x00,0,4,1,xmm2) // movdqu (%0,%4,1),%%xmm2 |
| 330 MEMOPREG(movdqu,0x10,0,4,1,xmm3) // movdqu 0x10(%0,%4,1),%%xmm3 | 333 MEMOPREG(movdqu,0x10,0,4,1,xmm3) // movdqu 0x10(%0,%4,1),%%xmm3 |
| 331 "pavgb %%xmm2,%%xmm0 \n" | 334 "pmaddubsw %%xmm4,%%xmm0 \n" |
| 332 "pavgb %%xmm3,%%xmm1 \n" | 335 "pmaddubsw %%xmm4,%%xmm1 \n" |
| 336 "pmaddubsw %%xmm4,%%xmm2 \n" |
| 337 "pmaddubsw %%xmm4,%%xmm3 \n" |
| 338 "paddw %%xmm2,%%xmm0 \n" |
| 339 "paddw %%xmm3,%%xmm1 \n" |
| 333 MEMOPREG(movdqu,0x00,0,4,2,xmm2) // movdqu (%0,%4,2),%%xmm2 | 340 MEMOPREG(movdqu,0x00,0,4,2,xmm2) // movdqu (%0,%4,2),%%xmm2 |
| 334 MEMOPREG(movdqu,0x10,0,4,2,xmm3) // movdqu 0x10(%0,%4,2),%%xmm3 | 341 MEMOPREG(movdqu,0x10,0,4,2,xmm3) // movdqu 0x10(%0,%4,2),%%xmm3 |
| 335 MEMOPREG(movdqu,0x00,0,3,1,xmm4) // movdqu (%0,%3,1),%%xmm4 | 342 "pmaddubsw %%xmm4,%%xmm2 \n" |
| 336 MEMOPREG(movdqu,0x10,0,3,1,xmm5) // movdqu 0x10(%0,%3,1),%%xmm5 | 343 "pmaddubsw %%xmm4,%%xmm3 \n" |
| 344 "paddw %%xmm2,%%xmm0 \n" |
| 345 "paddw %%xmm3,%%xmm1 \n" |
| 346 MEMOPREG(movdqu,0x00,0,3,1,xmm2) // movdqu (%0,%3,1),%%xmm2 |
| 347 MEMOPREG(movdqu,0x10,0,3,1,xmm3) // movdqu 0x10(%0,%3,1),%%xmm3 |
| 337 "lea " MEMLEA(0x20,0) ",%0 \n" | 348 "lea " MEMLEA(0x20,0) ",%0 \n" |
| 338 "pavgb %%xmm4,%%xmm2 \n" | 349 "pmaddubsw %%xmm4,%%xmm2 \n" |
| 339 "pavgb %%xmm2,%%xmm0 \n" | 350 "pmaddubsw %%xmm4,%%xmm3 \n" |
| 340 "pavgb %%xmm5,%%xmm3 \n" | 351 "paddw %%xmm2,%%xmm0 \n" |
| 341 "pavgb %%xmm3,%%xmm1 \n" | 352 "paddw %%xmm3,%%xmm1 \n" |
| 342 "movdqa %%xmm0,%%xmm2 \n" | 353 "phaddw %%xmm1,%%xmm0 \n" |
| 343 "psrlw $0x8,%%xmm0 \n" | 354 "paddw %%xmm5,%%xmm0 \n" |
| 344 "movdqa %%xmm1,%%xmm3 \n" | 355 "psrlw $0x4,%%xmm0 \n" |
| 345 "psrlw $0x8,%%xmm1 \n" | 356 "packuswb %%xmm0,%%xmm0 \n" |
| 346 "pand %%xmm7,%%xmm2 \n" | |
| 347 "pand %%xmm7,%%xmm3 \n" | |
| 348 "pavgw %%xmm2,%%xmm0 \n" | |
| 349 "pavgw %%xmm3,%%xmm1 \n" | |
| 350 "packuswb %%xmm1,%%xmm0 \n" | |
| 351 "movdqa %%xmm0,%%xmm2 \n" | |
| 352 "psrlw $0x8,%%xmm0 \n" | |
| 353 "pand %%xmm7,%%xmm2 \n" | |
| 354 "pavgw %%xmm2,%%xmm0 \n" | |
| 355 "packuswb %%xmm0,%%xmm0 \n" | |
| 356 "movq %%xmm0," MEMACCESS(1) " \n" | 357 "movq %%xmm0," MEMACCESS(1) " \n" |
| 357 "lea " MEMLEA(0x8,1) ",%1 \n" | 358 "lea " MEMLEA(0x8,1) ",%1 \n" |
| 358 "sub $0x8,%2 \n" | 359 "sub $0x8,%2 \n" |
| 359 "jg 1b \n" | 360 "jg 1b \n" |
| 360 : "+r"(src_ptr), // %0 | 361 : "+r"(src_ptr), // %0 |
| 361 "+r"(dst_ptr), // %1 | 362 "+r"(dst_ptr), // %1 |
| 362 "+r"(dst_width), // %2 | 363 "+r"(dst_width), // %2 |
| 363 "+r"(stridex3) // %3 | 364 "+r"(stridex3) // %3 |
| 364 : "r"((intptr_t)(src_stride)) // %4 | 365 : "r"((intptr_t)(src_stride)) // %4 |
| 365 : "memory", "cc", NACL_R14 | 366 : "memory", "cc", NACL_R14 |
| 366 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm7" | 367 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" |
| 367 ); | 368 ); |
| 368 } | 369 } |
| 369 | 370 |
| 370 void ScaleRowDown34_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride, | 371 void ScaleRowDown34_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride, |
| 371 uint8* dst_ptr, int dst_width) { | 372 uint8* dst_ptr, int dst_width) { |
| 372 asm volatile ( | 373 asm volatile ( |
| 373 "movdqa %0,%%xmm3 \n" | 374 "movdqa %0,%%xmm3 \n" |
| 374 "movdqa %1,%%xmm4 \n" | 375 "movdqa %1,%%xmm4 \n" |
| 375 "movdqa %2,%%xmm5 \n" | 376 "movdqa %2,%%xmm5 \n" |
| 376 : | 377 : |
| (...skipping 818 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1195 ); | 1196 ); |
| 1196 return num; | 1197 return num; |
| 1197 } | 1198 } |
| 1198 | 1199 |
| 1199 #endif // defined(__x86_64__) || defined(__i386__) | 1200 #endif // defined(__x86_64__) || defined(__i386__) |
| 1200 | 1201 |
| 1201 #ifdef __cplusplus | 1202 #ifdef __cplusplus |
| 1202 } // extern "C" | 1203 } // extern "C" |
| 1203 } // namespace libyuv | 1204 } // namespace libyuv |
| 1204 #endif | 1205 #endif |
| OLD | NEW |