| Index: source/scale_gcc.cc
|
| diff --git a/source/scale_gcc.cc b/source/scale_gcc.cc
|
| index e73023dbce342a18c8555660cb2e1483491cae93..86d030972bd6b544c8dbd9cb3458111941338196 100644
|
| --- a/source/scale_gcc.cc
|
| +++ b/source/scale_gcc.cc
|
| @@ -9,6 +9,7 @@
|
| */
|
|
|
| #include "libyuv/row.h"
|
| +#include "libyuv/scale_row.h"
|
|
|
| #ifdef __cplusplus
|
| namespace libyuv {
|
| @@ -608,12 +609,12 @@ void ScaleAddRow_SSE2(const uint8* src_ptr, uint16* dst_ptr, int src_width) {
|
| // Reads 32 bytes and accumulates to 32 shorts at a time.
|
| void ScaleAddRow_AVX2(const uint8* src_ptr, uint16* dst_ptr, int src_width) {
|
| asm volatile (
|
| - "vpxor %%xmm5,%%xmm5 \n"
|
| + "vpxor %%ymm5,%%ymm5,%%ymm5 \n"
|
|
|
| LABELALIGN
|
| "1: \n"
|
| "vmovdqu " MEMACCESS(0) ",%%ymm3 \n"
|
| - "lea " MEMLEA(0x20,0) ",%0 \n" // src_ptr += 16
|
| + "lea " MEMLEA(0x20,0) ",%0 \n" // src_ptr += 32
|
| "vpermq $0xd8,%%ymm3,%%ymm3 \n"
|
| "vpunpcklbw %%ymm5,%%ymm3,%%ymm2 \n"
|
| "vpunpckhbw %%ymm5,%%ymm3,%%ymm3 \n"
|
|
|