| Index: media/base/simd/convert_rgb_to_yuv_ssse3.asm
|
| diff --git a/media/base/simd/convert_rgb_to_yuv_ssse3.asm b/media/base/simd/convert_rgb_to_yuv_ssse3.asm
|
| deleted file mode 100644
|
| index 6b86ff234de7f1d6bfc38ad5f7b79c83eaf86c2f..0000000000000000000000000000000000000000
|
| --- a/media/base/simd/convert_rgb_to_yuv_ssse3.asm
|
| +++ /dev/null
|
| @@ -1,318 +0,0 @@
|
| -; Copyright (c) 2011 The Chromium Authors. All rights reserved.
|
| -; Use of this source code is governed by a BSD-style license that can be
|
| -; found in the LICENSE file.
|
| -
|
| -%include "media/base/simd/media_export.asm"
|
| -%include "third_party/x86inc/x86inc.asm"
|
| -
|
| -;
|
| -; This file uses SSE, SSE2, SSE3, and SSSE3, which are supported by all ATOM
|
| -; processors.
|
| -;
|
| - SECTION_TEXT
|
| - CPU SSE, SSE3, SSE3, SSSE3
|
| -
|
| -;
|
| -; XMM registers representing constants. We must not use these registers as
|
| -; destination operands.
|
| -; for (int i = 0; i < 16; i += 4) {
|
| -; xmm7.b[i] = 25; xmm7.b[i+1] = 2; xmm7.b[i+2] = 66; xmm7.b[i+3] = 0;
|
| -; xmm6.b[i] = 0; xmm6.b[i+1] = 127; xmm6.b[i+2] = 0; xmm6.b[i+3] = 0;
|
| -; xmm5.b[i] = 112; xmm5.b[i+1] = -74; xmm5.b[i+2] = -38; xmm5.b[i+3] = 0;
|
| -; xmm4.b[i] = -18; xmm4.b[i+1] = -94; xmm4.b[i+2] = 112; xmm4.b[i+3] = 0;
|
| -; }
|
| -;
|
| -%define XMM_CONST_Y0 xmm7
|
| -%define XMM_CONST_Y1 xmm6
|
| -%define XMM_CONST_U xmm5
|
| -%define XMM_CONST_V xmm4
|
| -%define XMM_CONST_128 xmm3
|
| -
|
| -;
|
| -; LOAD_XMM %1 (xmm), %2 (imm32)
|
| -; Loads an immediate value to an XMM register.
|
| -; %1.d[0] = %1.d[1] = %1.d[2] = %1.d[3] = %2;
|
| -;
|
| -%macro LOAD_XMM 2
|
| - mov TEMPd, %2
|
| - movd %1, TEMPd
|
| - pshufd %1, %1, 00000000B
|
| -%endmacro
|
| -
|
| -;
|
| -; UNPACKRGB %1 (xmm), %2 (imm8)
|
| -; Unpacks one RGB pixel in the specified XMM register.
|
| -; for (int i = 15; i > %2; --i) %1.b[i] = %1.b[i - 1];
|
| -; %1.b[%2] = 0;
|
| -; for (int i = %2 - 1; i >= 0; --i) %1.b[i] = %1.b[i];
|
| -;
|
| -%macro UNPACKRGB 2
|
| - movdqa xmm1, %1
|
| - psrldq xmm1, %2
|
| - pslldq xmm1, %2
|
| - pxor %1, xmm1
|
| - pslldq xmm1, 1
|
| - por %1, xmm1
|
| -%endmacro
|
| -
|
| -;
|
| -; READ_ARGB %1 (xmm), %2 (imm)
|
| -; Read the specified number of ARGB (or RGB) pixels from the source and store
|
| -; them to the destination xmm register. If the input format is RGB, we read RGB
|
| -; pixels and convert them to ARGB pixels. (For this case, the alpha values of
|
| -; the output pixels become 0.)
|
| -;
|
| -%macro READ_ARGB 2
|
| -
|
| -%if PIXELSIZE == 4
|
| -
|
| - ; Read ARGB pixels from the source. (This macro assumes the input buffer may
|
| - ; not be aligned to a 16-byte boundary.)
|
| -%if %2 == 1
|
| - movd %1, DWORD [ARGBq + WIDTHq * 4 * 2]
|
| -%elif %2 == 2
|
| - movq %1, QWORD [ARGBq + WIDTHq * 4 * 2]
|
| -%elif %2 == 4
|
| - movdqu %1, DQWORD [ARGBq + WIDTHq * 4 * 2]
|
| -%else
|
| -%error unsupported number of pixels.
|
| -%endif
|
| -
|
| -%elif PIXELSIZE == 3
|
| -
|
| - ; Read RGB pixels from the source and convert them to ARGB pixels.
|
| -%if %2 == 1
|
| - ; Read one RGB pixel and convert it to one ARGB pixel.
|
| - ; Save the WIDTH register to xmm1. (This macro needs to break it.)
|
| - MOVq xmm1, WIDTHq
|
| -
|
| - ; Once read three bytes from the source to TEMPd, and copy it to the
|
| - ; destination xmm register.
|
| - lea WIDTHq, [WIDTHq + WIDTHq * 2]
|
| - movzx TEMPd, BYTE [ARGBq + WIDTHq * 2 + 2]
|
| - shl TEMPd, 16
|
| - mov TEMPw, WORD [ARGBq + WIDTHq * 2]
|
| - movd %1, TEMPd
|
| -
|
| - ; Restore the WIDTH register.
|
| - MOVq WIDTHq, xmm1
|
| -%elif %2 == 2
|
| - ; Read two RGB pixels and convert them to two ARGB pixels.
|
| - ; Read six bytes from the source to the destination xmm register.
|
| - mov TEMPq, WIDTHq
|
| - lea TEMPq, [TEMPq + TEMPq * 2]
|
| - movd %1, DWORD [ARGBq + TEMPq * 2]
|
| - pinsrw %1, WORD [ARGBq + TEMPq * 2 + 4], 3
|
| -
|
| - ; Fill the alpha values of these RGB pixels with 0 and convert them to two
|
| - ; ARGB pixels.
|
| - UNPACKRGB %1, 3
|
| -%elif %2 == 4
|
| - ; Read four RGB pixels and convert them to four ARGB pixels.
|
| - ; Read twelve bytes from the source to the destination xmm register.
|
| - mov TEMPq, WIDTHq
|
| - lea TEMPq, [TEMPq + TEMPq * 2]
|
| - movq %1, QWORD [ARGBq + TEMPq * 2]
|
| - movd xmm1, DWORD [ARGBq + TEMPq * 2 + 8]
|
| - shufps %1, xmm1, 01000100B
|
| -
|
| - ; Fill the alpha values of these RGB pixels with 0 and convert them to four
|
| - ; ARGB pixels.
|
| - UNPACKRGB %1, 3
|
| - UNPACKRGB %1, 4 + 3
|
| - UNPACKRGB %1, 4 + 4 + 3
|
| -%else
|
| -%error unsupported number of pixels.
|
| -%endif
|
| -
|
| -%else
|
| -%error unsupported PIXELSIZE value.
|
| -%endif
|
| -
|
| -%endmacro
|
| -
|
| -;
|
| -; CALC_Y %1 (xmm), %2 (xmm)
|
| -; Calculates four Y values from four ARGB pixels stored in %2.
|
| -; %1.b[0] = ToByte((25 * B(0) + 129 * G(0) + 66 * R(0) + 128) / 256 + 16);
|
| -; %1.b[1] = ToByte((25 * B(1) + 129 * G(1) + 66 * R(1) + 128) / 256 + 16);
|
| -; %1.b[2] = ToByte((25 * B(2) + 129 * G(2) + 66 * R(2) + 128) / 256 + 16);
|
| -; %1.b[3] = ToByte((25 * B(3) + 129 * G(3) + 66 * R(3) + 128) / 256 + 16);
|
| -;
|
| -%macro CALC_Y 2
|
| - ; To avoid signed saturation, we divide this conversion formula into two
|
| - ; formulae and store their results into two XMM registers %1 and xmm2.
|
| - ; %1.w[0] = 25 * %2.b[0] + 2 * %2.b[1] + 66 * %2.b[2] + 0 * %2.b[3];
|
| - ; %1.w[1] = 25 * %2.b[4] + 2 * %2.b[5] + 66 * %2.b[6] + 0 * %2.b[7];
|
| - ; %1.w[2] = 25 * %2.b[8] + 2 * %2.b[9] + 66 * %2.b[10] + 0 * %2.b[11];
|
| - ; %1.w[3] = 25 * %2.b[12] + 2 * %2.b[13] + 66 * %2.b[14] + 0 * %2.b[15];
|
| - ; xmm2.w[0] = 0 * %2.b[0] + 127 * %2.b[1] + 0 * %2.b[2] + 0 * %2.b[3];
|
| - ; xmm2.w[1] = 0 * %2.b[4] + 127 * %2.b[5] + 0 * %2.b[6] + 0 * %2.b[7];
|
| - ; xmm2.w[2] = 0 * %2.b[8] + 127 * %2.b[9] + 0 * %2.b[10] + 0 * %2.b[11];
|
| - ; xmm2.w[3] = 0 * %2.b[12] + 127 * %2.b[13] + 0 * %2.b[14] + 0 * %2.b[15];
|
| - movdqa %1, %2
|
| - pmaddubsw %1, XMM_CONST_Y0
|
| - phaddsw %1, %1
|
| - movdqa xmm2, %2
|
| - pmaddubsw xmm2, XMM_CONST_Y1
|
| - phaddsw xmm2, xmm2
|
| -
|
| - ; %1.b[0] = ToByte((%1.w[0] + xmm2.w[0] + 128) / 256 + 16);
|
| - ; %1.b[1] = ToByte((%1.w[1] + xmm2.w[1] + 128) / 256 + 16);
|
| - ; %1.b[2] = ToByte((%1.w[2] + xmm2.w[2] + 128) / 256 + 16);
|
| - ; %1.b[3] = ToByte((%1.w[3] + xmm2.w[3] + 128) / 256 + 16);
|
| - paddw %1, xmm2
|
| - movdqa xmm2, XMM_CONST_128
|
| - paddw %1, xmm2
|
| - psrlw %1, 8
|
| - psrlw xmm2, 3
|
| - paddw %1, xmm2
|
| - packuswb %1, %1
|
| -%endmacro
|
| -
|
| -;
|
| -; INIT_UV %1 (r32), %2 (reg) %3 (imm)
|
| -;
|
| -%macro INIT_UV 3
|
| -
|
| -%if SUBSAMPLING == 1 && LINE == 1
|
| -%if %3 == 1 || %3 == 2
|
| - movzx %1, BYTE [%2 + WIDTHq]
|
| -%elif %3 == 4
|
| - movzx %1, WORD [%2 + WIDTHq]
|
| -%else
|
| -%error unsupported number of pixels.
|
| -%endif
|
| -%endif
|
| -
|
| -%endmacro
|
| -
|
| -;
|
| -; CALC_UV %1 (xmm), %2 (xmm), %3 (xmm), %4 (r32)
|
| -; Calculates two U (or V) values from four ARGB pixels stored in %2.
|
| -; if %3 == XMM_CONST_U
|
| -; if (SUBSAMPLING) {
|
| -; %1.b[0] = ToByte((112 * B(0) - 74 * G(0) - 38 * R(0) + 128) / 256 + 128);
|
| -; %1.b[0] = ToByte((112 * B(0) - 74 * G(0) - 38 * R(0) + 128) / 256 + 128);
|
| -; %1.b[1] = ToByte((112 * B(2) - 74 * G(2) - 38 * R(2) + 128) / 256 + 128);
|
| -; %1.b[1] = ToByte((112 * B(2) - 74 * G(2) - 38 * R(2) + 128) / 256 + 128);
|
| -; } else {
|
| -; %1.b[0] = ToByte((112 * B(0) - 74 * G(0) - 38 * R(0) + 128) / 256 + 128);
|
| -; %1.b[1] = ToByte((112 * B(2) - 74 * G(2) - 38 * R(2) + 128) / 256 + 128);
|
| -; }
|
| -; if %3 == XMM_CONST_V
|
| -; %1.b[0] = ToByte((-18 * B(0) - 94 * G(0) + 112 * R(0) + 128) / 256 + 128);
|
| -; %1.b[1] = ToByte((-18 * B(2) - 94 * G(2) + 112 * R(2) + 128) / 256 + 128);
|
| -;
|
| -%macro CALC_UV 4
|
| - ; for (int i = 0; i < 4; ++i) {
|
| - ; %1.w[i] = 0;
|
| - ; for (int j = 0; j < 4; ++j)
|
| - ; %1.w[i] += %3.b[i * 4 + j] + %2.b[i * 4 + j];
|
| - ; }
|
| - movdqa %1, %2
|
| - pmaddubsw %1, %3
|
| - phaddsw %1, %1
|
| -
|
| -%if SUBSAMPLING == 1
|
| - ; %1.w[0] = (%1.w[0] + %1.w[1] + 1) / 2;
|
| - ; %1.w[1] = (%1.w[1] + %1.w[0] + 1) / 2;
|
| - ; %1.w[2] = (%1.w[2] + %1.w[3] + 1) / 2;
|
| - ; %1.w[3] = (%1.w[3] + %1.w[2] + 1) / 2;
|
| - pshuflw xmm2, %1, 10110001B
|
| - pavgw %1, xmm2
|
| -%endif
|
| -
|
| - ; %1.b[0] = ToByte((%1.w[0] + 128) / 256 + 128);
|
| - ; %1.b[1] = ToByte((%1.w[2] + 128) / 256 + 128);
|
| - pshuflw %1, %1, 10001000B
|
| - paddw %1, XMM_CONST_128
|
| - psraw %1, 8
|
| - paddw %1, XMM_CONST_128
|
| - packuswb %1, %1
|
| -
|
| -%if SUBSAMPLING == 1 && LINE == 1
|
| - ; %1.b[0] = (%1.b[0] + %3.b[0] + 1) / 2;
|
| - ; %1.b[1] = (%1.b[1] + %3.b[1] + 1) / 2;
|
| - movd xmm2, %4
|
| - pavgb %1, xmm2
|
| -%endif
|
| -%endmacro
|
| -
|
| -;
|
| -; extern "C" void ConvertARGBToYUVRow_SSSE3(const uint8_t* argb,
|
| -; uint8_t* y,
|
| -; uint8_t* u,
|
| -; uint8_t* v,
|
| -; ptrdiff_t width);
|
| -;
|
| -%define SYMBOL ConvertARGBToYUVRow_SSSE3
|
| -%define PIXELSIZE 4
|
| -%define SUBSAMPLING 0
|
| -%define LINE 0
|
| -%include "convert_rgb_to_yuv_ssse3.inc"
|
| -
|
| -;
|
| -; extern "C" void ConvertRGBToYUVRow_SSSE3(const uint8_t* rgb,
|
| -; uint8_t* y,
|
| -; uint8_t* u,
|
| -; uint8_t* v,
|
| -; ptrdiff_t width);
|
| -;
|
| -%define SYMBOL ConvertRGBToYUVRow_SSSE3
|
| -%define PIXELSIZE 3
|
| -%define SUBSAMPLING 0
|
| -%define LINE 0
|
| -%include "convert_rgb_to_yuv_ssse3.inc"
|
| -
|
| -;
|
| -; extern "C" void ConvertARGBToYUVEven_SSSE3(const uint8_t* argb,
|
| -; uint8_t* y,
|
| -; uint8_t* u,
|
| -; uint8_t* v,
|
| -; ptrdiff_t width);
|
| -;
|
| -%define SYMBOL ConvertARGBToYUVEven_SSSE3
|
| -%define PIXELSIZE 4
|
| -%define SUBSAMPLING 1
|
| -%define LINE 0
|
| -%include "convert_rgb_to_yuv_ssse3.inc"
|
| -
|
| -;
|
| -; extern "C" void ConvertARGBToYUVOdd_SSSE3(const uint8_t* argb,
|
| -; uint8_t* y,
|
| -; uint8_t* u,
|
| -; uint8_t* v,
|
| -; ptrdiff_t width);
|
| -;
|
| -%define SYMBOL ConvertARGBToYUVOdd_SSSE3
|
| -%define PIXELSIZE 4
|
| -%define SUBSAMPLING 1
|
| -%define LINE 1
|
| -%include "convert_rgb_to_yuv_ssse3.inc"
|
| -
|
| -;
|
| -; extern "C" void ConvertRGBToYUVEven_SSSE3(const uint8_t* rgb,
|
| -; uint8_t* y,
|
| -; uint8_t* u,
|
| -; uint8_t* v,
|
| -; ptrdiff_t width);
|
| -;
|
| -%define SYMBOL ConvertRGBToYUVEven_SSSE3
|
| -%define PIXELSIZE 3
|
| -%define SUBSAMPLING 1
|
| -%define LINE 0
|
| -%include "convert_rgb_to_yuv_ssse3.inc"
|
| -
|
| -;
|
| -; extern "C" void ConvertRGBToYUVOdd_SSSE3(const uint8_t* rgb,
|
| -; uint8_t* y,
|
| -; uint8_t* u,
|
| -; uint8_t* v,
|
| -; ptrdiff_t width);
|
| -;
|
| -%define SYMBOL ConvertRGBToYUVOdd_SSSE3
|
| -%define PIXELSIZE 3
|
| -%define SUBSAMPLING 1
|
| -%define LINE 1
|
| -%include "convert_rgb_to_yuv_ssse3.inc"
|
|
|