| OLD | NEW |
| 1 /* | 1 /* |
| 2 * Copyright 2012 The LibYuv Project Authors. All rights reserved. | 2 * Copyright 2012 The LibYuv Project Authors. All rights reserved. |
| 3 * | 3 * |
| 4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
| 5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
| 6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
| 7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
| 8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
| 9 */ | 9 */ |
| 10 | 10 |
| 11 #include "libyuv/row.h" | 11 #include "libyuv/row.h" |
| 12 | 12 |
| 13 #include <string.h> // For memset. |
| 14 |
| 13 #include "libyuv/basic_types.h" | 15 #include "libyuv/basic_types.h" |
| 14 | 16 |
| 15 #ifdef __cplusplus | 17 #ifdef __cplusplus |
| 16 namespace libyuv { | 18 namespace libyuv { |
| 17 extern "C" { | 19 extern "C" { |
| 18 #endif | 20 #endif |
| 19 | 21 |
| 20 // YUV to RGB does multiple of 8 with SIMD and remainder with C. | 22 // Subsampled source needs to be increase by 1 of not even. |
| 21 #define YANY(NAMEANY, I420TORGB_SIMD, I420TORGB_C, UV_SHIFT, BPP, MASK) \ | 23 #define SS(width, shift) (((width) + (1 << (shift)) - 1) >> (shift)) |
| 24 |
| 25 // Any 3 planes to 1. |
| 26 #define ANY31(NAMEANY, ANY_SIMD, UVSHIFT, DUVSHIFT, BPP, MASK) \ |
| 22 void NAMEANY(const uint8* y_buf, const uint8* u_buf, const uint8* v_buf, \ | 27 void NAMEANY(const uint8* y_buf, const uint8* u_buf, const uint8* v_buf, \ |
| 23 uint8* rgb_buf, int width) { \ | 28 uint8* dst_ptr, int width) { \ |
| 24 int n = width & ~MASK; \ | 29 SIMD_ALIGNED(uint8 temp[64 * 4]); \ |
| 25 if (n > 0) { \ | 30 memset(temp, 0, 64 * 3); /* for YUY2 and msan */ \ |
| 26 I420TORGB_SIMD(y_buf, u_buf, v_buf, rgb_buf, n); \ | 31 int r = width & MASK; \ |
| 27 } \ | 32 int n = width & ~MASK; \ |
| 28 I420TORGB_C(y_buf + n, \ | 33 if (n > 0) { \ |
| 29 u_buf + (n >> UV_SHIFT), \ | 34 ANY_SIMD(y_buf, u_buf, v_buf, dst_ptr, n); \ |
| 30 v_buf + (n >> UV_SHIFT), \ | 35 } \ |
| 31 rgb_buf + n * BPP, width & MASK); \ | 36 memcpy(temp, y_buf + n, r); \ |
| 37 memcpy(temp + 64, u_buf + (n >> UVSHIFT), SS(r, UVSHIFT)); \ |
| 38 memcpy(temp + 128, v_buf + (n >> UVSHIFT), SS(r, UVSHIFT)); \ |
| 39 ANY_SIMD(temp, temp + 64, temp + 128, temp + 192, MASK + 1); \ |
| 40 memcpy(dst_ptr + (n >> DUVSHIFT) * BPP, temp + 192, \ |
| 41 SS(r, DUVSHIFT) * BPP); \ |
| 32 } | 42 } |
| 33 | 43 |
| 34 #ifdef HAS_I422TOARGBROW_SSSE3 | 44 #ifdef HAS_I422TOARGBROW_SSSE3 |
| 35 YANY(I422ToARGBRow_Any_SSSE3, I422ToARGBRow_SSSE3, I422ToARGBRow_C, | 45 ANY31(I422ToARGBRow_Any_SSSE3, I422ToARGBRow_SSSE3, 1, 0, 4, 7) |
| 36 1, 4, 7) | |
| 37 #endif | 46 #endif |
| 38 #ifdef HAS_I444TOARGBROW_SSSE3 | 47 #ifdef HAS_I444TOARGBROW_SSSE3 |
| 39 YANY(I444ToARGBRow_Any_SSSE3, I444ToARGBRow_SSSE3, I444ToARGBRow_C, | 48 ANY31(I444ToARGBRow_Any_SSSE3, I444ToARGBRow_SSSE3, 0, 0, 4, 7) |
| 40 0, 4, 7) | 49 ANY31(I411ToARGBRow_Any_SSSE3, I411ToARGBRow_SSSE3, 2, 0, 4, 7) |
| 41 YANY(I411ToARGBRow_Any_SSSE3, I411ToARGBRow_SSSE3, I411ToARGBRow_C, | 50 ANY31(I422ToBGRARow_Any_SSSE3, I422ToBGRARow_SSSE3, 1, 0, 4, 7) |
| 42 2, 4, 7) | 51 ANY31(I422ToABGRRow_Any_SSSE3, I422ToABGRRow_SSSE3, 1, 0, 4, 7) |
| 43 YANY(I422ToBGRARow_Any_SSSE3, I422ToBGRARow_SSSE3, I422ToBGRARow_C, | 52 ANY31(I422ToRGBARow_Any_SSSE3, I422ToRGBARow_SSSE3, 1, 0, 4, 7) |
| 44 1, 4, 7) | 53 ANY31(I422ToARGB4444Row_Any_SSSE3, I422ToARGB4444Row_SSSE3, 1, 0, 2, 7) |
| 45 YANY(I422ToABGRRow_Any_SSSE3, I422ToABGRRow_SSSE3, I422ToABGRRow_C, | 54 ANY31(I422ToARGB1555Row_Any_SSSE3, I422ToARGB1555Row_SSSE3, 1, 0, 2, 7) |
| 46 1, 4, 7) | 55 ANY31(I422ToRGB565Row_Any_SSSE3, I422ToRGB565Row_SSSE3, 1, 0, 2, 7) |
| 47 YANY(I422ToRGBARow_Any_SSSE3, I422ToRGBARow_SSSE3, I422ToRGBARow_C, | 56 ANY31(I422ToRGB24Row_Any_SSSE3, I422ToRGB24Row_SSSE3, 1, 0, 3, 7) |
| 48 1, 4, 7) | 57 ANY31(I422ToRAWRow_Any_SSSE3, I422ToRAWRow_SSSE3, 1, 0, 3, 7) |
| 49 YANY(I422ToARGB4444Row_Any_SSSE3, I422ToARGB4444Row_SSSE3, I422ToARGB4444Row_C, | 58 ANY31(I422ToYUY2Row_Any_SSE2, I422ToYUY2Row_SSE2, 1, 1, 4, 15) |
| 50 1, 2, 7) | 59 ANY31(I422ToUYVYRow_Any_SSE2, I422ToUYVYRow_SSE2, 1, 1, 4, 15) |
| 51 YANY(I422ToARGB1555Row_Any_SSSE3, I422ToARGB1555Row_SSSE3, I422ToARGB1555Row_C, | |
| 52 1, 2, 7) | |
| 53 YANY(I422ToRGB565Row_Any_SSSE3, I422ToRGB565Row_SSSE3, I422ToRGB565Row_C, | |
| 54 1, 2, 7) | |
| 55 YANY(I422ToRGB24Row_Any_SSSE3, I422ToRGB24Row_SSSE3, I422ToRGB24Row_C, 1, 3, 7) | |
| 56 YANY(I422ToRAWRow_Any_SSSE3, I422ToRAWRow_SSSE3, I422ToRAWRow_C, 1, 3, 7) | |
| 57 YANY(I422ToYUY2Row_Any_SSE2, I422ToYUY2Row_SSE2, I422ToYUY2Row_C, 1, 2, 15) | |
| 58 YANY(I422ToUYVYRow_Any_SSE2, I422ToUYVYRow_SSE2, I422ToUYVYRow_C, 1, 2, 15) | |
| 59 #endif // HAS_I444TOARGBROW_SSSE3 | 60 #endif // HAS_I444TOARGBROW_SSSE3 |
| 61 #ifdef HAS_I422TORGB24ROW_AVX2 |
| 62 ANY31(I422ToRGB24Row_Any_AVX2, I422ToRGB24Row_AVX2, 1, 0, 3, 15) |
| 63 #endif |
| 64 #ifdef HAS_I422TORAWROW_AVX2 |
| 65 ANY31(I422ToRAWRow_Any_AVX2, I422ToRAWRow_AVX2, 1, 0, 3, 15) |
| 66 #endif |
| 60 #ifdef HAS_J422TOARGBROW_SSSE3 | 67 #ifdef HAS_J422TOARGBROW_SSSE3 |
| 61 YANY(J422ToARGBRow_Any_SSSE3, J422ToARGBRow_SSSE3, J422ToARGBRow_C, | 68 ANY31(J422ToARGBRow_Any_SSSE3, J422ToARGBRow_SSSE3, 1, 0, 4, 7) |
| 62 1, 4, 7) | 69 #endif |
| 70 #ifdef HAS_J422TOARGBROW_AVX2 |
| 71 ANY31(J422ToARGBRow_Any_AVX2, J422ToARGBRow_AVX2, 1, 0, 4, 15) |
| 63 #endif | 72 #endif |
| 64 #ifdef HAS_I422TOARGBROW_AVX2 | 73 #ifdef HAS_I422TOARGBROW_AVX2 |
| 65 YANY(I422ToARGBRow_Any_AVX2, I422ToARGBRow_AVX2, I422ToARGBRow_C, 1, 4, 15) | 74 ANY31(I422ToARGBRow_Any_AVX2, I422ToARGBRow_AVX2, 1, 0, 4, 15) |
| 66 #endif | 75 #endif |
| 67 #ifdef HAS_I422TOBGRAROW_AVX2 | 76 #ifdef HAS_I422TOBGRAROW_AVX2 |
| 68 YANY(I422ToBGRARow_Any_AVX2, I422ToBGRARow_AVX2, I422ToBGRARow_C, 1, 4, 15) | 77 ANY31(I422ToBGRARow_Any_AVX2, I422ToBGRARow_AVX2, 1, 0, 4, 15) |
| 69 #endif | 78 #endif |
| 70 #ifdef HAS_I422TORGBAROW_AVX2 | 79 #ifdef HAS_I422TORGBAROW_AVX2 |
| 71 YANY(I422ToRGBARow_Any_AVX2, I422ToRGBARow_AVX2, I422ToRGBARow_C, 1, 4, 15) | 80 ANY31(I422ToRGBARow_Any_AVX2, I422ToRGBARow_AVX2, 1, 0, 4, 15) |
| 72 #endif | 81 #endif |
| 73 #ifdef HAS_I422TOABGRROW_AVX2 | 82 #ifdef HAS_I422TOABGRROW_AVX2 |
| 74 YANY(I422ToABGRRow_Any_AVX2, I422ToABGRRow_AVX2, I422ToABGRRow_C, 1, 4, 15) | 83 ANY31(I422ToABGRRow_Any_AVX2, I422ToABGRRow_AVX2, 1, 0, 4, 15) |
| 84 #endif |
| 85 #ifdef HAS_I444TOARGBROW_AVX2 |
| 86 ANY31(I444ToARGBRow_Any_AVX2, I444ToARGBRow_AVX2, 0, 0, 4, 15) |
| 87 #endif |
| 88 #ifdef HAS_I411TOARGBROW_AVX2 |
| 89 ANY31(I411ToARGBRow_Any_AVX2, I411ToARGBRow_AVX2, 2, 0, 4, 15) |
| 75 #endif | 90 #endif |
| 76 #ifdef HAS_I422TOARGB4444ROW_AVX2 | 91 #ifdef HAS_I422TOARGB4444ROW_AVX2 |
| 77 YANY(I422ToARGB4444Row_Any_AVX2, I422ToARGB4444Row_AVX2, I422ToARGB4444Row_C, | 92 ANY31(I422ToARGB4444Row_Any_AVX2, I422ToARGB4444Row_AVX2, 1, 0, 2, 7) |
| 78 1, 2, 7) | |
| 79 #endif | 93 #endif |
| 80 #ifdef HAS_I422TOARGB1555ROW_AVX2 | 94 #ifdef HAS_I422TOARGB1555ROW_AVX2 |
| 81 YANY(I422ToARGB1555Row_Any_AVX2, I422ToARGB1555Row_AVX2, I422ToARGB1555Row_C, | 95 ANY31(I422ToARGB1555Row_Any_AVX2, I422ToARGB1555Row_AVX2, 1, 0, 2, 7) |
| 82 1, 2, 7) | |
| 83 #endif | 96 #endif |
| 84 #ifdef HAS_I422TORGB565ROW_AVX2 | 97 #ifdef HAS_I422TORGB565ROW_AVX2 |
| 85 YANY(I422ToRGB565Row_Any_AVX2, I422ToRGB565Row_AVX2, I422ToRGB565Row_C, | 98 ANY31(I422ToRGB565Row_Any_AVX2, I422ToRGB565Row_AVX2, 1, 0, 2, 7) |
| 86 1, 2, 7) | |
| 87 #endif | 99 #endif |
| 88 #ifdef HAS_I422TOARGBROW_NEON | 100 #ifdef HAS_I422TOARGBROW_NEON |
| 89 YANY(I444ToARGBRow_Any_NEON, I444ToARGBRow_NEON, I444ToARGBRow_C, 0, 4, 7) | 101 ANY31(I444ToARGBRow_Any_NEON, I444ToARGBRow_NEON, 0, 0, 4, 7) |
| 90 YANY(I422ToARGBRow_Any_NEON, I422ToARGBRow_NEON, I422ToARGBRow_C, 1, 4, 7) | 102 ANY31(I422ToARGBRow_Any_NEON, I422ToARGBRow_NEON, 1, 0, 4, 7) |
| 91 YANY(I411ToARGBRow_Any_NEON, I411ToARGBRow_NEON, I411ToARGBRow_C, 2, 4, 7) | 103 ANY31(I411ToARGBRow_Any_NEON, I411ToARGBRow_NEON, 2, 0, 4, 7) |
| 92 YANY(I422ToBGRARow_Any_NEON, I422ToBGRARow_NEON, I422ToBGRARow_C, 1, 4, 7) | 104 ANY31(I422ToBGRARow_Any_NEON, I422ToBGRARow_NEON, 1, 0, 4, 7) |
| 93 YANY(I422ToABGRRow_Any_NEON, I422ToABGRRow_NEON, I422ToABGRRow_C, 1, 4, 7) | 105 ANY31(I422ToABGRRow_Any_NEON, I422ToABGRRow_NEON, 1, 0, 4, 7) |
| 94 YANY(I422ToRGBARow_Any_NEON, I422ToRGBARow_NEON, I422ToRGBARow_C, 1, 4, 7) | 106 ANY31(I422ToRGBARow_Any_NEON, I422ToRGBARow_NEON, 1, 0, 4, 7) |
| 95 YANY(I422ToRGB24Row_Any_NEON, I422ToRGB24Row_NEON, I422ToRGB24Row_C, 1, 3, 7) | 107 ANY31(I422ToRGB24Row_Any_NEON, I422ToRGB24Row_NEON, 1, 0, 3, 7) |
| 96 YANY(I422ToRAWRow_Any_NEON, I422ToRAWRow_NEON, I422ToRAWRow_C, 1, 3, 7) | 108 ANY31(I422ToRAWRow_Any_NEON, I422ToRAWRow_NEON, 1, 0, 3, 7) |
| 97 YANY(I422ToARGB4444Row_Any_NEON, I422ToARGB4444Row_NEON, I422ToARGB4444Row_C, | 109 ANY31(I422ToARGB4444Row_Any_NEON, I422ToARGB4444Row_NEON, 1, 0, 2, 7) |
| 98 1, 2, 7) | 110 ANY31(I422ToARGB1555Row_Any_NEON, I422ToARGB1555Row_NEON, 1, 0, 2, 7) |
| 99 YANY(I422ToARGB1555Row_Any_NEON, I422ToARGB1555Row_NEON, I422ToARGB1555Row_C, | 111 ANY31(I422ToRGB565Row_Any_NEON, I422ToRGB565Row_NEON, 1, 0, 2, 7) |
| 100 1, 2, 7) | |
| 101 YANY(I422ToRGB565Row_Any_NEON, I422ToRGB565Row_NEON, I422ToRGB565Row_C, 1, 2, 7) | |
| 102 #endif | 112 #endif |
| 103 #ifdef HAS_I422TOYUY2ROW_NEON | 113 #ifdef HAS_I422TOYUY2ROW_NEON |
| 104 YANY(I422ToYUY2Row_Any_NEON, I422ToYUY2Row_NEON, I422ToYUY2Row_C, 1, 2, 15) | 114 ANY31(I422ToYUY2Row_Any_NEON, I422ToYUY2Row_NEON, 1, 1, 4, 15) |
| 105 #endif | 115 #endif |
| 106 #ifdef HAS_I422TOUYVYROW_NEON | 116 #ifdef HAS_I422TOUYVYROW_NEON |
| 107 YANY(I422ToUYVYRow_Any_NEON, I422ToUYVYRow_NEON, I422ToUYVYRow_C, 1, 2, 15) | 117 ANY31(I422ToUYVYRow_Any_NEON, I422ToUYVYRow_NEON, 1, 1, 4, 15) |
| 108 #endif | 118 #endif |
| 109 #undef YANY | 119 #undef ANY31 |
| 110 | 120 |
| 111 // Wrappers to handle odd width | 121 // Any 2 planes to 1. |
| 112 #define NV2NY(NAMEANY, NV12TORGB_SIMD, NV12TORGB_C, UV_SHIFT, BPP, MASK) \ | 122 #define ANY21(NAMEANY, ANY_SIMD, UVSHIFT, SBPP, SBPP2, BPP, MASK) \ |
| 113 void NAMEANY(const uint8* y_buf, const uint8* uv_buf, \ | 123 void NAMEANY(const uint8* y_buf, const uint8* uv_buf, \ |
| 114 uint8* rgb_buf, int width) { \ | 124 uint8* dst_ptr, int width) { \ |
| 115 int n = width & ~MASK; \ | 125 SIMD_ALIGNED(uint8 temp[64 * 3]); \ |
| 116 if (n > 0) { \ | 126 memset(temp, 0, 64 * 2); /* for msan */ \ |
| 117 NV12TORGB_SIMD(y_buf, uv_buf, rgb_buf, n); \ | 127 int r = width & MASK; \ |
| 118 } \ | 128 int n = width & ~MASK; \ |
| 119 NV12TORGB_C(y_buf + n, \ | 129 if (n > 0) { \ |
| 120 uv_buf + (n >> UV_SHIFT), \ | 130 ANY_SIMD(y_buf, uv_buf, dst_ptr, n); \ |
| 121 rgb_buf + n * BPP, width & MASK); \ | 131 } \ |
| 122 } | 132 memcpy(temp, y_buf + n * SBPP, r * SBPP); \ |
| 123 | 133 memcpy(temp + 64, uv_buf + (n >> UVSHIFT) * SBPP2, \ |
| 134 SS(r, UVSHIFT) * SBPP2); \ |
| 135 ANY_SIMD(temp, temp + 64, temp + 128, MASK + 1); \ |
| 136 memcpy(dst_ptr + n * BPP, temp + 128, r * BPP); \ |
| 137 } |
| 138 |
| 139 // Biplanar to RGB. |
| 124 #ifdef HAS_NV12TOARGBROW_SSSE3 | 140 #ifdef HAS_NV12TOARGBROW_SSSE3 |
| 125 NV2NY(NV12ToARGBRow_Any_SSSE3, NV12ToARGBRow_SSSE3, NV12ToARGBRow_C, 0, 4, 7) | 141 ANY21(NV12ToARGBRow_Any_SSSE3, NV12ToARGBRow_SSSE3, 1, 1, 2, 4, 7) |
| 126 NV2NY(NV21ToARGBRow_Any_SSSE3, NV21ToARGBRow_SSSE3, NV21ToARGBRow_C, 0, 4, 7) | 142 ANY21(NV21ToARGBRow_Any_SSSE3, NV21ToARGBRow_SSSE3, 1, 1, 2, 4, 7) |
| 127 #endif | 143 #endif |
| 128 #ifdef HAS_NV12TOARGBROW_AVX2 | 144 #ifdef HAS_NV12TOARGBROW_AVX2 |
| 129 NV2NY(NV12ToARGBRow_Any_AVX2, NV12ToARGBRow_AVX2, NV12ToARGBRow_C, 0, 4, 15) | 145 ANY21(NV12ToARGBRow_Any_AVX2, NV12ToARGBRow_AVX2, 1, 1, 2, 4, 15) |
| 130 NV2NY(NV21ToARGBRow_Any_AVX2, NV21ToARGBRow_AVX2, NV21ToARGBRow_C, 0, 4, 15) | 146 ANY21(NV21ToARGBRow_Any_AVX2, NV21ToARGBRow_AVX2, 1, 1, 2, 4, 15) |
| 131 #endif | 147 #endif |
| 132 #ifdef HAS_NV12TOARGBROW_NEON | 148 #ifdef HAS_NV12TOARGBROW_NEON |
| 133 NV2NY(NV12ToARGBRow_Any_NEON, NV12ToARGBRow_NEON, NV12ToARGBRow_C, 0, 4, 7) | 149 ANY21(NV12ToARGBRow_Any_NEON, NV12ToARGBRow_NEON, 1, 1, 2, 4, 7) |
| 134 NV2NY(NV21ToARGBRow_Any_NEON, NV21ToARGBRow_NEON, NV21ToARGBRow_C, 0, 4, 7) | 150 ANY21(NV21ToARGBRow_Any_NEON, NV21ToARGBRow_NEON, 1, 1, 2, 4, 7) |
| 135 #endif | 151 #endif |
| 136 #ifdef HAS_NV12TORGB565ROW_SSSE3 | 152 #ifdef HAS_NV12TORGB565ROW_SSSE3 |
| 137 NV2NY(NV12ToRGB565Row_Any_SSSE3, NV12ToRGB565Row_SSSE3, NV12ToRGB565Row_C, | 153 ANY21(NV12ToRGB565Row_Any_SSSE3, NV12ToRGB565Row_SSSE3, 1, 1, 2, 2, 7) |
| 138 0, 2, 7) | 154 ANY21(NV21ToRGB565Row_Any_SSSE3, NV21ToRGB565Row_SSSE3, 1, 1, 2, 2, 7) |
| 139 NV2NY(NV21ToRGB565Row_Any_SSSE3, NV21ToRGB565Row_SSSE3, NV21ToRGB565Row_C, | |
| 140 0, 2, 7) | |
| 141 #endif | 155 #endif |
| 142 #ifdef HAS_NV12TORGB565ROW_AVX2 | 156 #ifdef HAS_NV12TORGB565ROW_AVX2 |
| 143 NV2NY(NV12ToRGB565Row_Any_AVX2, NV12ToRGB565Row_AVX2, NV12ToRGB565Row_C, | 157 ANY21(NV12ToRGB565Row_Any_AVX2, NV12ToRGB565Row_AVX2, 1, 1, 2, 2, 15) |
| 144 0, 2, 15) | 158 ANY21(NV21ToRGB565Row_Any_AVX2, NV21ToRGB565Row_AVX2, 1, 1, 2, 2, 15) |
| 145 NV2NY(NV21ToRGB565Row_Any_AVX2, NV21ToRGB565Row_AVX2, NV21ToRGB565Row_C, | |
| 146 0, 2, 15) | |
| 147 #endif | 159 #endif |
| 148 #ifdef HAS_NV12TORGB565ROW_NEON | 160 #ifdef HAS_NV12TORGB565ROW_NEON |
| 149 NV2NY(NV12ToRGB565Row_Any_NEON, NV12ToRGB565Row_NEON, NV12ToRGB565Row_C, | 161 ANY21(NV12ToRGB565Row_Any_NEON, NV12ToRGB565Row_NEON, 1, 1, 2, 2, 7) |
| 150 0, 2, 7) | 162 ANY21(NV21ToRGB565Row_Any_NEON, NV21ToRGB565Row_NEON, 1, 1, 2, 2, 7) |
| 151 NV2NY(NV21ToRGB565Row_Any_NEON, NV21ToRGB565Row_NEON, NV21ToRGB565Row_C, | 163 #endif |
| 152 0, 2, 7) | 164 |
| 153 #endif | 165 // Merge functions. |
| 154 #undef NVANY | 166 #ifdef HAS_MERGEUVROW_SSE2 |
| 155 | 167 ANY21(MergeUVRow_Any_SSE2, MergeUVRow_SSE2, 0, 1, 1, 2, 15) |
| 156 #define RGBANY(NAMEANY, ARGBTORGB_SIMD, ARGBTORGB_C, SBPP, BPP, MASK) \ | 168 #endif |
| 157 void NAMEANY(const uint8* src, uint8* dst, int width) { \ | 169 #ifdef HAS_MERGEUVROW_AVX2 |
| 158 int n = width & ~MASK; \ | 170 ANY21(MergeUVRow_Any_AVX2, MergeUVRow_AVX2, 0, 1, 1, 2, 31) |
| 159 if (n > 0) { \ | 171 #endif |
| 160 ARGBTORGB_SIMD(src, dst, n); \ | 172 #ifdef HAS_MERGEUVROW_NEON |
| 161 } \ | 173 ANY21(MergeUVRow_Any_NEON, MergeUVRow_NEON, 0, 1, 1, 2, 15) |
| 162 ARGBTORGB_C(src + n * SBPP, dst + n * BPP, width & MASK); \ | 174 #endif |
| 163 } | 175 |
| 164 | 176 // Math functions. |
| 177 #ifdef HAS_ARGBMULTIPLYROW_SSE2 |
| 178 ANY21(ARGBMultiplyRow_Any_SSE2, ARGBMultiplyRow_SSE2, 0, 4, 4, 4, 3) |
| 179 #endif |
| 180 #ifdef HAS_ARGBADDROW_SSE2 |
| 181 ANY21(ARGBAddRow_Any_SSE2, ARGBAddRow_SSE2, 0, 4, 4, 4, 3) |
| 182 #endif |
| 183 #ifdef HAS_ARGBSUBTRACTROW_SSE2 |
| 184 ANY21(ARGBSubtractRow_Any_SSE2, ARGBSubtractRow_SSE2, 0, 4, 4, 4, 3) |
| 185 #endif |
| 186 #ifdef HAS_ARGBMULTIPLYROW_AVX2 |
| 187 ANY21(ARGBMultiplyRow_Any_AVX2, ARGBMultiplyRow_AVX2, 0, 4, 4, 4, 7) |
| 188 #endif |
| 189 #ifdef HAS_ARGBADDROW_AVX2 |
| 190 ANY21(ARGBAddRow_Any_AVX2, ARGBAddRow_AVX2, 0, 4, 4, 4, 7) |
| 191 #endif |
| 192 #ifdef HAS_ARGBSUBTRACTROW_AVX2 |
| 193 ANY21(ARGBSubtractRow_Any_AVX2, ARGBSubtractRow_AVX2, 0, 4, 4, 4, 7) |
| 194 #endif |
| 195 #ifdef HAS_ARGBMULTIPLYROW_NEON |
| 196 ANY21(ARGBMultiplyRow_Any_NEON, ARGBMultiplyRow_NEON, 0, 4, 4, 4, 7) |
| 197 #endif |
| 198 #ifdef HAS_ARGBADDROW_NEON |
| 199 ANY21(ARGBAddRow_Any_NEON, ARGBAddRow_NEON, 0, 4, 4, 4, 7) |
| 200 #endif |
| 201 #ifdef HAS_ARGBSUBTRACTROW_NEON |
| 202 ANY21(ARGBSubtractRow_Any_NEON, ARGBSubtractRow_NEON, 0, 4, 4, 4, 7) |
| 203 #endif |
| 204 #ifdef HAS_SOBELROW_SSE2 |
| 205 ANY21(SobelRow_Any_SSE2, SobelRow_SSE2, 0, 1, 1, 4, 15) |
| 206 #endif |
| 207 #ifdef HAS_SOBELROW_NEON |
| 208 ANY21(SobelRow_Any_NEON, SobelRow_NEON, 0, 1, 1, 4, 7) |
| 209 #endif |
| 210 #ifdef HAS_SOBELTOPLANEROW_SSE2 |
| 211 ANY21(SobelToPlaneRow_Any_SSE2, SobelToPlaneRow_SSE2, 0, 1, 1, 1, 15) |
| 212 #endif |
| 213 #ifdef HAS_SOBELTOPLANEROW_NEON |
| 214 ANY21(SobelToPlaneRow_Any_NEON, SobelToPlaneRow_NEON, 0, 1, 1, 1, 15) |
| 215 #endif |
| 216 #ifdef HAS_SOBELXYROW_SSE2 |
| 217 ANY21(SobelXYRow_Any_SSE2, SobelXYRow_SSE2, 0, 1, 1, 4, 15) |
| 218 #endif |
| 219 #ifdef HAS_SOBELXYROW_NEON |
| 220 ANY21(SobelXYRow_Any_NEON, SobelXYRow_NEON, 0, 1, 1, 4, 7) |
| 221 #endif |
| 222 #undef ANY21 |
| 223 |
| 224 // Any 1 to 1. |
| 225 #define ANY11(NAMEANY, ANY_SIMD, UVSHIFT, SBPP, BPP, MASK) \ |
| 226 void NAMEANY(const uint8* src_ptr, uint8* dst_ptr, int width) { \ |
| 227 SIMD_ALIGNED(uint8 temp[128 * 2]); \ |
| 228 memset(temp, 0, 128); /* for YUY2 and msan */ \ |
| 229 int r = width & MASK; \ |
| 230 int n = width & ~MASK; \ |
| 231 if (n > 0) { \ |
| 232 ANY_SIMD(src_ptr, dst_ptr, n); \ |
| 233 } \ |
| 234 memcpy(temp, src_ptr + (n >> UVSHIFT) * SBPP, SS(r, UVSHIFT) * SBPP); \ |
| 235 ANY_SIMD(temp, temp + 128, MASK + 1); \ |
| 236 memcpy(dst_ptr + n * BPP, temp + 128, r * BPP); \ |
| 237 } |
| 238 |
| 239 #ifdef HAS_COPYROW_AVX |
| 240 ANY11(CopyRow_Any_AVX, CopyRow_AVX, 0, 1, 1, 63) |
| 241 #endif |
| 242 #ifdef HAS_COPYROW_SSE2 |
| 243 ANY11(CopyRow_Any_SSE2, CopyRow_SSE2, 0, 1, 1, 31) |
| 244 #endif |
| 245 #ifdef HAS_COPYROW_NEON |
| 246 ANY11(CopyRow_Any_NEON, CopyRow_NEON, 0, 1, 1, 31) |
| 247 #endif |
| 165 #if defined(HAS_ARGBTORGB24ROW_SSSE3) | 248 #if defined(HAS_ARGBTORGB24ROW_SSSE3) |
| 166 RGBANY(ARGBToRGB24Row_Any_SSSE3, ARGBToRGB24Row_SSSE3, ARGBToRGB24Row_C, | 249 ANY11(ARGBToRGB24Row_Any_SSSE3, ARGBToRGB24Row_SSSE3, 0, 4, 3, 15) |
| 167 4, 3, 15) | 250 ANY11(ARGBToRAWRow_Any_SSSE3, ARGBToRAWRow_SSSE3, 0, 4, 3, 15) |
| 168 RGBANY(ARGBToRAWRow_Any_SSSE3, ARGBToRAWRow_SSSE3, ARGBToRAWRow_C, | 251 ANY11(ARGBToRGB565Row_Any_SSE2, ARGBToRGB565Row_SSE2, 0, 4, 2, 3) |
| 169 4, 3, 15) | 252 ANY11(ARGBToARGB1555Row_Any_SSE2, ARGBToARGB1555Row_SSE2, 0, 4, 2, 3) |
| 170 RGBANY(ARGBToRGB565Row_Any_SSE2, ARGBToRGB565Row_SSE2, ARGBToRGB565Row_C, | 253 ANY11(ARGBToARGB4444Row_Any_SSE2, ARGBToARGB4444Row_SSE2, 0, 4, 2, 3) |
| 171 4, 2, 3) | |
| 172 RGBANY(ARGBToARGB1555Row_Any_SSE2, ARGBToARGB1555Row_SSE2, ARGBToARGB1555Row_C, | |
| 173 4, 2, 3) | |
| 174 RGBANY(ARGBToARGB4444Row_Any_SSE2, ARGBToARGB4444Row_SSE2, ARGBToARGB4444Row_C, | |
| 175 4, 2, 3) | |
| 176 #endif | 254 #endif |
| 177 #if defined(HAS_ARGBTOARGB4444ROW_AVX2) | 255 #if defined(HAS_ARGBTOARGB4444ROW_AVX2) |
| 178 RGBANY(ARGBToRGB565Row_Any_AVX2, ARGBToRGB565Row_AVX2, ARGBToRGB565Row_C, | 256 ANY11(ARGBToRGB565Row_Any_AVX2, ARGBToRGB565Row_AVX2, 0, 4, 2, 7) |
| 179 4, 2, 7) | 257 ANY11(ARGBToARGB1555Row_Any_AVX2, ARGBToARGB1555Row_AVX2, 0, 4, 2, 7) |
| 180 RGBANY(ARGBToARGB1555Row_Any_AVX2, ARGBToARGB1555Row_AVX2, ARGBToARGB1555Row_C, | 258 ANY11(ARGBToARGB4444Row_Any_AVX2, ARGBToARGB4444Row_AVX2, 0, 4, 2, 7) |
| 181 4, 2, 7) | 259 #endif |
| 182 RGBANY(ARGBToARGB4444Row_Any_AVX2, ARGBToARGB4444Row_AVX2, ARGBToARGB4444Row_C, | 260 #if defined(HAS_J400TOARGBROW_SSE2) |
| 183 4, 2, 7) | 261 ANY11(J400ToARGBRow_Any_SSE2, J400ToARGBRow_SSE2, 0, 1, 4, 7) |
| 184 #endif | 262 #endif |
| 185 | 263 #if defined(HAS_J400TOARGBROW_AVX2) |
| 264 ANY11(J400ToARGBRow_Any_AVX2, J400ToARGBRow_AVX2, 0, 1, 4, 15) |
| 265 #endif |
| 186 #if defined(HAS_I400TOARGBROW_SSE2) | 266 #if defined(HAS_I400TOARGBROW_SSE2) |
| 187 RGBANY(I400ToARGBRow_Any_SSE2, I400ToARGBRow_SSE2, I400ToARGBRow_C, 1, 4, 7) | 267 ANY11(I400ToARGBRow_Any_SSE2, I400ToARGBRow_SSE2, 0, 1, 4, 7) |
| 188 #endif | 268 #endif |
| 189 #if defined(HAS_YTOARGBROW_SSE2) | 269 #if defined(HAS_I400TOARGBROW_AVX2) |
| 190 RGBANY(YToARGBRow_Any_SSE2, YToARGBRow_SSE2, YToARGBRow_C, 1, 4, 7) | 270 ANY11(I400ToARGBRow_Any_AVX2, I400ToARGBRow_AVX2, 0, 1, 4, 15) |
| 191 #endif | |
| 192 #if defined(HAS_YTOARGBROW_AVX2) | |
| 193 RGBANY(YToARGBRow_Any_AVX2, YToARGBRow_AVX2, YToARGBRow_C, 1, 4, 15) | |
| 194 #endif | 271 #endif |
| 195 #if defined(HAS_YUY2TOARGBROW_SSSE3) | 272 #if defined(HAS_YUY2TOARGBROW_SSSE3) |
| 196 RGBANY(YUY2ToARGBRow_Any_SSSE3, YUY2ToARGBRow_SSSE3, YUY2ToARGBRow_C, 2, 4, 15) | 273 ANY11(YUY2ToARGBRow_Any_SSSE3, YUY2ToARGBRow_SSSE3, 1, 4, 4, 15) |
| 197 RGBANY(UYVYToARGBRow_Any_SSSE3, UYVYToARGBRow_SSSE3, UYVYToARGBRow_C, 2, 4, 15) | 274 ANY11(UYVYToARGBRow_Any_SSSE3, UYVYToARGBRow_SSSE3, 1, 4, 4, 15) |
| 198 RGBANY(RGB24ToARGBRow_Any_SSSE3, RGB24ToARGBRow_SSSE3, RGB24ToARGBRow_C, | 275 ANY11(RGB24ToARGBRow_Any_SSSE3, RGB24ToARGBRow_SSSE3, 0, 3, 4, 15) |
| 199 3, 4, 15) | 276 ANY11(RAWToARGBRow_Any_SSSE3, RAWToARGBRow_SSSE3, 0, 3, 4, 15) |
| 200 RGBANY(RAWToARGBRow_Any_SSSE3, RAWToARGBRow_SSSE3, RAWToARGBRow_C, 3, 4, 15) | 277 ANY11(RGB565ToARGBRow_Any_SSE2, RGB565ToARGBRow_SSE2, 0, 2, 4, 7) |
| 201 RGBANY(RGB565ToARGBRow_Any_SSE2, RGB565ToARGBRow_SSE2, RGB565ToARGBRow_C, | 278 ANY11(ARGB1555ToARGBRow_Any_SSE2, ARGB1555ToARGBRow_SSE2, 0, 2, 4, 7) |
| 202 2, 4, 7) | 279 ANY11(ARGB4444ToARGBRow_Any_SSE2, ARGB4444ToARGBRow_SSE2, 0, 2, 4, 7) |
| 203 RGBANY(ARGB1555ToARGBRow_Any_SSE2, ARGB1555ToARGBRow_SSE2, ARGB1555ToARGBRow_C, | 280 #endif |
| 204 2, 4, 7) | 281 #if defined(HAS_RGB565TOARGBROW_AVX2) |
| 205 RGBANY(ARGB4444ToARGBRow_Any_SSE2, ARGB4444ToARGBRow_SSE2, ARGB4444ToARGBRow_C, | 282 ANY11(RGB565ToARGBRow_Any_AVX2, RGB565ToARGBRow_AVX2, 0, 2, 4, 15) |
| 206 2, 4, 7) | 283 #endif |
| 284 #if defined(HAS_ARGB1555TOARGBROW_AVX2) |
| 285 ANY11(ARGB1555ToARGBRow_Any_AVX2, ARGB1555ToARGBRow_AVX2, 0, 2, 4, 15) |
| 286 #endif |
| 287 #if defined(HAS_ARGB4444TOARGBROW_AVX2) |
| 288 ANY11(ARGB4444ToARGBRow_Any_AVX2, ARGB4444ToARGBRow_AVX2, 0, 2, 4, 15) |
| 207 #endif | 289 #endif |
| 208 #if defined(HAS_YUY2TOARGBROW_AVX2) | 290 #if defined(HAS_YUY2TOARGBROW_AVX2) |
| 209 RGBANY(YUY2ToARGBRow_Any_AVX2, YUY2ToARGBRow_AVX2, YUY2ToARGBRow_C, 2, 4, 31) | 291 ANY11(YUY2ToARGBRow_Any_AVX2, YUY2ToARGBRow_AVX2, 1, 4, 4, 31) |
| 210 RGBANY(UYVYToARGBRow_Any_AVX2, UYVYToARGBRow_AVX2, UYVYToARGBRow_C, 2, 4, 31) | 292 ANY11(UYVYToARGBRow_Any_AVX2, UYVYToARGBRow_AVX2, 1, 4, 4, 31) |
| 211 #endif | 293 #endif |
| 212 #if defined(HAS_ARGBTORGB24ROW_NEON) | 294 #if defined(HAS_ARGBTORGB24ROW_NEON) |
| 213 RGBANY(ARGBToRGB24Row_Any_NEON, ARGBToRGB24Row_NEON, ARGBToRGB24Row_C, 4, 3, 7) | 295 ANY11(ARGBToRGB24Row_Any_NEON, ARGBToRGB24Row_NEON, 0, 4, 3, 7) |
| 214 RGBANY(ARGBToRAWRow_Any_NEON, ARGBToRAWRow_NEON, ARGBToRAWRow_C, 4, 3, 7) | 296 ANY11(ARGBToRAWRow_Any_NEON, ARGBToRAWRow_NEON, 0, 4, 3, 7) |
| 215 RGBANY(ARGBToRGB565Row_Any_NEON, ARGBToRGB565Row_NEON, ARGBToRGB565Row_C, | 297 ANY11(ARGBToRGB565Row_Any_NEON, ARGBToRGB565Row_NEON, 0, 4, 2, 7) |
| 216 4, 2, 7) | 298 ANY11(ARGBToARGB1555Row_Any_NEON, ARGBToARGB1555Row_NEON, 0, 4, 2, 7) |
| 217 RGBANY(ARGBToARGB1555Row_Any_NEON, ARGBToARGB1555Row_NEON, ARGBToARGB1555Row_C, | 299 ANY11(ARGBToARGB4444Row_Any_NEON, ARGBToARGB4444Row_NEON, 0, 4, 2, 7) |
| 218 4, 2, 7) | 300 ANY11(J400ToARGBRow_Any_NEON, J400ToARGBRow_NEON, 0, 1, 4, 7) |
| 219 RGBANY(ARGBToARGB4444Row_Any_NEON, ARGBToARGB4444Row_NEON, ARGBToARGB4444Row_C, | 301 ANY11(I400ToARGBRow_Any_NEON, I400ToARGBRow_NEON, 0, 1, 4, 7) |
| 220 4, 2, 7) | 302 ANY11(YUY2ToARGBRow_Any_NEON, YUY2ToARGBRow_NEON, 1, 4, 4, 7) |
| 221 RGBANY(I400ToARGBRow_Any_NEON, I400ToARGBRow_NEON, I400ToARGBRow_C, 1, 4, 7) | 303 ANY11(UYVYToARGBRow_Any_NEON, UYVYToARGBRow_NEON, 1, 4, 4, 7) |
| 222 RGBANY(YToARGBRow_Any_NEON, YToARGBRow_NEON, YToARGBRow_C, 1, 4, 7) | 304 #endif |
| 223 RGBANY(YUY2ToARGBRow_Any_NEON, YUY2ToARGBRow_NEON, YUY2ToARGBRow_C, 2, 4, 7) | |
| 224 RGBANY(UYVYToARGBRow_Any_NEON, UYVYToARGBRow_NEON, UYVYToARGBRow_C, 2, 4, 7) | |
| 225 #endif | |
| 226 #undef RGBANY | |
| 227 | |
| 228 // ARGB to Bayer does multiple of 4 pixels, SSSE3 aligned src, unaligned dst. | |
| 229 #define BAYERANY(NAMEANY, ARGBTORGB_SIMD, ARGBTORGB_C, SBPP, BPP, MASK) \ | |
| 230 void NAMEANY(const uint8* src, uint8* dst, uint32 selector, int width) { \ | |
| 231 int n = width & ~MASK; \ | |
| 232 if (n > 0) { \ | |
| 233 ARGBTORGB_SIMD(src, dst, selector, n); \ | |
| 234 } \ | |
| 235 ARGBTORGB_C(src + n * SBPP, dst + n * BPP, selector, width & MASK); \ | |
| 236 } | |
| 237 | |
| 238 #if defined(HAS_ARGBTOBAYERGGROW_SSE2) | |
| 239 BAYERANY(ARGBToBayerGGRow_Any_SSE2, ARGBToBayerGGRow_SSE2, ARGBToBayerGGRow_C, | |
| 240 4, 1, 7) | |
| 241 #endif | |
| 242 #if defined(HAS_ARGBTOBAYERGGROW_NEON) | |
| 243 BAYERANY(ARGBToBayerGGRow_Any_NEON, ARGBToBayerGGRow_NEON, ARGBToBayerGGRow_C, | |
| 244 4, 1, 7) | |
| 245 #endif | |
| 246 | |
| 247 #undef BAYERANY | |
| 248 | |
| 249 #define YANY(NAMEANY, ARGBTOY_SIMD, ARGBTOY_C, SBPP, BPP, MASK) \ | |
| 250 void NAMEANY(const uint8* src_argb, uint8* dst_y, int width) { \ | |
| 251 int n = width & ~MASK; \ | |
| 252 if (n > 0) { \ | |
| 253 ARGBTOY_SIMD(src_argb, dst_y, n); \ | |
| 254 } \ | |
| 255 ARGBTOY_C(src_argb + n * SBPP, \ | |
| 256 dst_y + n * BPP, width & MASK); \ | |
| 257 } | |
| 258 #ifdef HAS_ARGBTOYROW_AVX2 | 305 #ifdef HAS_ARGBTOYROW_AVX2 |
| 259 YANY(ARGBToYRow_Any_AVX2, ARGBToYRow_AVX2, ARGBToYRow_C, 4, 1, 31) | 306 ANY11(ARGBToYRow_Any_AVX2, ARGBToYRow_AVX2, 0, 4, 1, 31) |
| 260 #endif | 307 #endif |
| 261 #ifdef HAS_ARGBTOYJROW_AVX2 | 308 #ifdef HAS_ARGBTOYJROW_AVX2 |
| 262 YANY(ARGBToYJRow_Any_AVX2, ARGBToYJRow_AVX2, ARGBToYJRow_C, 4, 1, 31) | 309 ANY11(ARGBToYJRow_Any_AVX2, ARGBToYJRow_AVX2, 0, 4, 1, 31) |
| 263 #endif | 310 #endif |
| 264 #ifdef HAS_UYVYTOYROW_AVX2 | 311 #ifdef HAS_UYVYTOYROW_AVX2 |
| 265 YANY(UYVYToYRow_Any_AVX2, UYVYToYRow_AVX2, UYVYToYRow_C, 2, 1, 31) | 312 ANY11(UYVYToYRow_Any_AVX2, UYVYToYRow_AVX2, 0, 2, 1, 31) |
| 266 #endif | 313 #endif |
| 267 #ifdef HAS_YUY2TOYROW_AVX2 | 314 #ifdef HAS_YUY2TOYROW_AVX2 |
| 268 YANY(YUY2ToYRow_Any_AVX2, YUY2ToYRow_AVX2, YUY2ToYRow_C, 2, 1, 31) | 315 ANY11(YUY2ToYRow_Any_AVX2, YUY2ToYRow_AVX2, 1, 4, 1, 31) |
| 269 #endif | 316 #endif |
| 270 #ifdef HAS_ARGBTOYROW_SSSE3 | 317 #ifdef HAS_ARGBTOYROW_SSSE3 |
| 271 YANY(ARGBToYRow_Any_SSSE3, ARGBToYRow_SSSE3, ARGBToYRow_C, 4, 1, 15) | 318 ANY11(ARGBToYRow_Any_SSSE3, ARGBToYRow_SSSE3, 0, 4, 1, 15) |
| 272 #endif | 319 #endif |
| 273 #ifdef HAS_BGRATOYROW_SSSE3 | 320 #ifdef HAS_BGRATOYROW_SSSE3 |
| 274 YANY(BGRAToYRow_Any_SSSE3, BGRAToYRow_SSSE3, BGRAToYRow_C, 4, 1, 15) | 321 ANY11(BGRAToYRow_Any_SSSE3, BGRAToYRow_SSSE3, 0, 4, 1, 15) |
| 275 YANY(ABGRToYRow_Any_SSSE3, ABGRToYRow_SSSE3, ABGRToYRow_C, 4, 1, 15) | 322 ANY11(ABGRToYRow_Any_SSSE3, ABGRToYRow_SSSE3, 0, 4, 1, 15) |
| 276 YANY(RGBAToYRow_Any_SSSE3, RGBAToYRow_SSSE3, RGBAToYRow_C, 4, 1, 15) | 323 ANY11(RGBAToYRow_Any_SSSE3, RGBAToYRow_SSSE3, 0, 4, 1, 15) |
| 277 YANY(YUY2ToYRow_Any_SSE2, YUY2ToYRow_SSE2, YUY2ToYRow_C, 2, 1, 15) | 324 ANY11(YUY2ToYRow_Any_SSE2, YUY2ToYRow_SSE2, 1, 4, 1, 15) |
| 278 YANY(UYVYToYRow_Any_SSE2, UYVYToYRow_SSE2, UYVYToYRow_C, 2, 1, 15) | 325 ANY11(UYVYToYRow_Any_SSE2, UYVYToYRow_SSE2, 1, 4, 1, 15) |
| 279 #endif | 326 #endif |
| 280 #ifdef HAS_ARGBTOYJROW_SSSE3 | 327 #ifdef HAS_ARGBTOYJROW_SSSE3 |
| 281 YANY(ARGBToYJRow_Any_SSSE3, ARGBToYJRow_SSSE3, ARGBToYJRow_C, 4, 1, 15) | 328 ANY11(ARGBToYJRow_Any_SSSE3, ARGBToYJRow_SSSE3, 0, 4, 1, 15) |
| 282 #endif | 329 #endif |
| 283 #ifdef HAS_ARGBTOYROW_NEON | 330 #ifdef HAS_ARGBTOYROW_NEON |
| 284 YANY(ARGBToYRow_Any_NEON, ARGBToYRow_NEON, ARGBToYRow_C, 4, 1, 7) | 331 ANY11(ARGBToYRow_Any_NEON, ARGBToYRow_NEON, 0, 4, 1, 7) |
| 285 #endif | 332 #endif |
| 286 #ifdef HAS_ARGBTOYJROW_NEON | 333 #ifdef HAS_ARGBTOYJROW_NEON |
| 287 YANY(ARGBToYJRow_Any_NEON, ARGBToYJRow_NEON, ARGBToYJRow_C, 4, 1, 7) | 334 ANY11(ARGBToYJRow_Any_NEON, ARGBToYJRow_NEON, 0, 4, 1, 7) |
| 288 #endif | 335 #endif |
| 289 #ifdef HAS_BGRATOYROW_NEON | 336 #ifdef HAS_BGRATOYROW_NEON |
| 290 YANY(BGRAToYRow_Any_NEON, BGRAToYRow_NEON, BGRAToYRow_C, 4, 1, 7) | 337 ANY11(BGRAToYRow_Any_NEON, BGRAToYRow_NEON, 0, 4, 1, 7) |
| 291 #endif | 338 #endif |
| 292 #ifdef HAS_ABGRTOYROW_NEON | 339 #ifdef HAS_ABGRTOYROW_NEON |
| 293 YANY(ABGRToYRow_Any_NEON, ABGRToYRow_NEON, ABGRToYRow_C, 4, 1, 7) | 340 ANY11(ABGRToYRow_Any_NEON, ABGRToYRow_NEON, 0, 4, 1, 7) |
| 294 #endif | 341 #endif |
| 295 #ifdef HAS_RGBATOYROW_NEON | 342 #ifdef HAS_RGBATOYROW_NEON |
| 296 YANY(RGBAToYRow_Any_NEON, RGBAToYRow_NEON, RGBAToYRow_C, 4, 1, 7) | 343 ANY11(RGBAToYRow_Any_NEON, RGBAToYRow_NEON, 0, 4, 1, 7) |
| 297 #endif | 344 #endif |
| 298 #ifdef HAS_RGB24TOYROW_NEON | 345 #ifdef HAS_RGB24TOYROW_NEON |
| 299 YANY(RGB24ToYRow_Any_NEON, RGB24ToYRow_NEON, RGB24ToYRow_C, 3, 1, 7) | 346 ANY11(RGB24ToYRow_Any_NEON, RGB24ToYRow_NEON, 0, 3, 1, 7) |
| 300 #endif | 347 #endif |
| 301 #ifdef HAS_RAWTOYROW_NEON | 348 #ifdef HAS_RAWTOYROW_NEON |
| 302 YANY(RAWToYRow_Any_NEON, RAWToYRow_NEON, RAWToYRow_C, 3, 1, 7) | 349 ANY11(RAWToYRow_Any_NEON, RAWToYRow_NEON, 0, 3, 1, 7) |
| 303 #endif | 350 #endif |
| 304 #ifdef HAS_RGB565TOYROW_NEON | 351 #ifdef HAS_RGB565TOYROW_NEON |
| 305 YANY(RGB565ToYRow_Any_NEON, RGB565ToYRow_NEON, RGB565ToYRow_C, 2, 1, 7) | 352 ANY11(RGB565ToYRow_Any_NEON, RGB565ToYRow_NEON, 0, 2, 1, 7) |
| 306 #endif | 353 #endif |
| 307 #ifdef HAS_ARGB1555TOYROW_NEON | 354 #ifdef HAS_ARGB1555TOYROW_NEON |
| 308 YANY(ARGB1555ToYRow_Any_NEON, ARGB1555ToYRow_NEON, ARGB1555ToYRow_C, 2, 1, 7) | 355 ANY11(ARGB1555ToYRow_Any_NEON, ARGB1555ToYRow_NEON, 0, 2, 1, 7) |
| 309 #endif | 356 #endif |
| 310 #ifdef HAS_ARGB4444TOYROW_NEON | 357 #ifdef HAS_ARGB4444TOYROW_NEON |
| 311 YANY(ARGB4444ToYRow_Any_NEON, ARGB4444ToYRow_NEON, ARGB4444ToYRow_C, 2, 1, 7) | 358 ANY11(ARGB4444ToYRow_Any_NEON, ARGB4444ToYRow_NEON, 0, 2, 1, 7) |
| 312 #endif | 359 #endif |
| 313 #ifdef HAS_YUY2TOYROW_NEON | 360 #ifdef HAS_YUY2TOYROW_NEON |
| 314 YANY(YUY2ToYRow_Any_NEON, YUY2ToYRow_NEON, YUY2ToYRow_C, 2, 1, 15) | 361 ANY11(YUY2ToYRow_Any_NEON, YUY2ToYRow_NEON, 1, 4, 1, 15) |
| 315 #endif | 362 #endif |
| 316 #ifdef HAS_UYVYTOYROW_NEON | 363 #ifdef HAS_UYVYTOYROW_NEON |
| 317 YANY(UYVYToYRow_Any_NEON, UYVYToYRow_NEON, UYVYToYRow_C, 2, 1, 15) | 364 ANY11(UYVYToYRow_Any_NEON, UYVYToYRow_NEON, 0, 2, 1, 15) |
| 318 #endif | 365 #endif |
| 319 #ifdef HAS_RGB24TOARGBROW_NEON | 366 #ifdef HAS_RGB24TOARGBROW_NEON |
| 320 YANY(RGB24ToARGBRow_Any_NEON, RGB24ToARGBRow_NEON, RGB24ToARGBRow_C, 3, 4, 7) | 367 ANY11(RGB24ToARGBRow_Any_NEON, RGB24ToARGBRow_NEON, 0, 3, 4, 7) |
| 321 #endif | 368 #endif |
| 322 #ifdef HAS_RAWTOARGBROW_NEON | 369 #ifdef HAS_RAWTOARGBROW_NEON |
| 323 YANY(RAWToARGBRow_Any_NEON, RAWToARGBRow_NEON, RAWToARGBRow_C, 3, 4, 7) | 370 ANY11(RAWToARGBRow_Any_NEON, RAWToARGBRow_NEON, 0, 3, 4, 7) |
| 324 #endif | 371 #endif |
| 325 #ifdef HAS_RGB565TOARGBROW_NEON | 372 #ifdef HAS_RGB565TOARGBROW_NEON |
| 326 YANY(RGB565ToARGBRow_Any_NEON, RGB565ToARGBRow_NEON, RGB565ToARGBRow_C, 2, 4, 7) | 373 ANY11(RGB565ToARGBRow_Any_NEON, RGB565ToARGBRow_NEON, 0, 2, 4, 7) |
| 327 #endif | 374 #endif |
| 328 #ifdef HAS_ARGB1555TOARGBROW_NEON | 375 #ifdef HAS_ARGB1555TOARGBROW_NEON |
| 329 YANY(ARGB1555ToARGBRow_Any_NEON, ARGB1555ToARGBRow_NEON, ARGB1555ToARGBRow_C, | 376 ANY11(ARGB1555ToARGBRow_Any_NEON, ARGB1555ToARGBRow_NEON, 0, 2, 4, 7) |
| 330 2, 4, 7) | |
| 331 #endif | 377 #endif |
| 332 #ifdef HAS_ARGB4444TOARGBROW_NEON | 378 #ifdef HAS_ARGB4444TOARGBROW_NEON |
| 333 YANY(ARGB4444ToARGBRow_Any_NEON, ARGB4444ToARGBRow_NEON, ARGB4444ToARGBRow_C, | 379 ANY11(ARGB4444ToARGBRow_Any_NEON, ARGB4444ToARGBRow_NEON, 0, 2, 4, 7) |
| 334 2, 4, 7) | |
| 335 #endif | 380 #endif |
| 336 #ifdef HAS_ARGBATTENUATEROW_SSSE3 | 381 #ifdef HAS_ARGBATTENUATEROW_SSSE3 |
| 337 YANY(ARGBAttenuateRow_Any_SSSE3, ARGBAttenuateRow_SSSE3, ARGBAttenuateRow_C, | 382 ANY11(ARGBAttenuateRow_Any_SSSE3, ARGBAttenuateRow_SSSE3, 0, 4, 4, 3) |
| 338 4, 4, 3) | |
| 339 #endif | 383 #endif |
| 340 #ifdef HAS_ARGBATTENUATEROW_SSE2 | 384 #ifdef HAS_ARGBATTENUATEROW_SSE2 |
| 341 YANY(ARGBAttenuateRow_Any_SSE2, ARGBAttenuateRow_SSE2, ARGBAttenuateRow_C, | 385 ANY11(ARGBAttenuateRow_Any_SSE2, ARGBAttenuateRow_SSE2, 0, 4, 4, 3) |
| 342 4, 4, 3) | |
| 343 #endif | 386 #endif |
| 344 #ifdef HAS_ARGBUNATTENUATEROW_SSE2 | 387 #ifdef HAS_ARGBUNATTENUATEROW_SSE2 |
| 345 YANY(ARGBUnattenuateRow_Any_SSE2, ARGBUnattenuateRow_SSE2, ARGBUnattenuateRow_C, | 388 ANY11(ARGBUnattenuateRow_Any_SSE2, ARGBUnattenuateRow_SSE2, 0, 4, 4, 3) |
| 346 4, 4, 3) | |
| 347 #endif | 389 #endif |
| 348 #ifdef HAS_ARGBATTENUATEROW_AVX2 | 390 #ifdef HAS_ARGBATTENUATEROW_AVX2 |
| 349 YANY(ARGBAttenuateRow_Any_AVX2, ARGBAttenuateRow_AVX2, ARGBAttenuateRow_C, | 391 ANY11(ARGBAttenuateRow_Any_AVX2, ARGBAttenuateRow_AVX2, 0, 4, 4, 7) |
| 350 4, 4, 7) | |
| 351 #endif | 392 #endif |
| 352 #ifdef HAS_ARGBUNATTENUATEROW_AVX2 | 393 #ifdef HAS_ARGBUNATTENUATEROW_AVX2 |
| 353 YANY(ARGBUnattenuateRow_Any_AVX2, ARGBUnattenuateRow_AVX2, ARGBUnattenuateRow_C, | 394 ANY11(ARGBUnattenuateRow_Any_AVX2, ARGBUnattenuateRow_AVX2, 0, 4, 4, 7) |
| 354 4, 4, 7) | |
| 355 #endif | 395 #endif |
| 356 #ifdef HAS_ARGBATTENUATEROW_NEON | 396 #ifdef HAS_ARGBATTENUATEROW_NEON |
| 357 YANY(ARGBAttenuateRow_Any_NEON, ARGBAttenuateRow_NEON, ARGBAttenuateRow_C, | 397 ANY11(ARGBAttenuateRow_Any_NEON, ARGBAttenuateRow_NEON, 0, 4, 4, 7) |
| 358 4, 4, 7) | 398 #endif |
| 359 #endif | 399 #undef ANY11 |
| 360 #undef YANY | 400 |
| 361 | 401 // Any 1 to 1 with parameter. |
| 362 // RGB/YUV to UV does multiple of 16 with SIMD and remainder with C. | 402 #define ANY11P(NAMEANY, ANY_SIMD, T, SBPP, BPP, MASK) \ |
| 363 #define UVANY(NAMEANY, ANYTOUV_SIMD, ANYTOUV_C, BPP, MASK) \ | 403 void NAMEANY(const uint8* src_ptr, uint8* dst_ptr, \ |
| 364 void NAMEANY(const uint8* src_argb, int src_stride_argb, \ | 404 T shuffler, int width) { \ |
| 365 uint8* dst_u, uint8* dst_v, int width) { \ | 405 SIMD_ALIGNED(uint8 temp[64 * 2]); \ |
| 366 int n = width & ~MASK; \ | 406 memset(temp, 0, 64); /* for msan */ \ |
| 367 if (n > 0) { \ | 407 int r = width & MASK; \ |
| 368 ANYTOUV_SIMD(src_argb, src_stride_argb, dst_u, dst_v, n); \ | 408 int n = width & ~MASK; \ |
| 369 } \ | 409 if (n > 0) { \ |
| 370 ANYTOUV_C(src_argb + n * BPP, src_stride_argb, \ | 410 ANY_SIMD(src_ptr, dst_ptr, shuffler, n); \ |
| 371 dst_u + (n >> 1), \ | 411 } \ |
| 372 dst_v + (n >> 1), \ | 412 memcpy(temp, src_ptr + n * SBPP, r * SBPP); \ |
| 373 width & MASK); \ | 413 ANY_SIMD(temp, temp + 64, shuffler, MASK + 1); \ |
| 374 } | 414 memcpy(dst_ptr + n * BPP, temp + 64, r * BPP); \ |
| 375 | 415 } |
| 376 #ifdef HAS_ARGBTOUVROW_AVX2 | 416 |
| 377 UVANY(ARGBToUVRow_Any_AVX2, ARGBToUVRow_AVX2, ARGBToUVRow_C, 4, 31) | 417 #if defined(HAS_ARGBTORGB565DITHERROW_SSE2) |
| 378 #endif | 418 ANY11P(ARGBToRGB565DitherRow_Any_SSE2, ARGBToRGB565DitherRow_SSE2, |
| 379 #ifdef HAS_ARGBTOUVROW_SSSE3 | 419 const uint32, 4, 2, 3) |
| 380 UVANY(ARGBToUVRow_Any_SSSE3, ARGBToUVRow_SSSE3, ARGBToUVRow_C, 4, 15) | 420 #endif |
| 381 UVANY(ARGBToUVJRow_Any_SSSE3, ARGBToUVJRow_SSSE3, ARGBToUVJRow_C, 4, 15) | 421 #if defined(HAS_ARGBTORGB565DITHERROW_AVX2) |
| 382 UVANY(BGRAToUVRow_Any_SSSE3, BGRAToUVRow_SSSE3, BGRAToUVRow_C, 4, 15) | 422 ANY11P(ARGBToRGB565DitherRow_Any_AVX2, ARGBToRGB565DitherRow_AVX2, |
| 383 UVANY(ABGRToUVRow_Any_SSSE3, ABGRToUVRow_SSSE3, ABGRToUVRow_C, 4, 15) | 423 const uint32, 4, 2, 7) |
| 384 UVANY(RGBAToUVRow_Any_SSSE3, RGBAToUVRow_SSSE3, RGBAToUVRow_C, 4, 15) | 424 #endif |
| 385 #endif | 425 #if defined(HAS_ARGBTORGB565DITHERROW_NEON) |
| 386 #ifdef HAS_YUY2TOUVROW_AVX2 | 426 ANY11P(ARGBToRGB565DitherRow_Any_NEON, ARGBToRGB565DitherRow_NEON, |
| 387 UVANY(YUY2ToUVRow_Any_AVX2, YUY2ToUVRow_AVX2, YUY2ToUVRow_C, 2, 31) | 427 const uint32, 4, 2, 7) |
| 388 UVANY(UYVYToUVRow_Any_AVX2, UYVYToUVRow_AVX2, UYVYToUVRow_C, 2, 31) | 428 #endif |
| 389 #endif | |
| 390 #ifdef HAS_YUY2TOUVROW_SSE2 | |
| 391 UVANY(YUY2ToUVRow_Any_SSE2, YUY2ToUVRow_SSE2, YUY2ToUVRow_C, 2, 15) | |
| 392 UVANY(UYVYToUVRow_Any_SSE2, UYVYToUVRow_SSE2, UYVYToUVRow_C, 2, 15) | |
| 393 #endif | |
| 394 #ifdef HAS_ARGBTOUVROW_NEON | |
| 395 UVANY(ARGBToUVRow_Any_NEON, ARGBToUVRow_NEON, ARGBToUVRow_C, 4, 15) | |
| 396 #endif | |
| 397 #ifdef HAS_ARGBTOUVJROW_NEON | |
| 398 UVANY(ARGBToUVJRow_Any_NEON, ARGBToUVJRow_NEON, ARGBToUVJRow_C, 4, 15) | |
| 399 #endif | |
| 400 #ifdef HAS_BGRATOUVROW_NEON | |
| 401 UVANY(BGRAToUVRow_Any_NEON, BGRAToUVRow_NEON, BGRAToUVRow_C, 4, 15) | |
| 402 #endif | |
| 403 #ifdef HAS_ABGRTOUVROW_NEON | |
| 404 UVANY(ABGRToUVRow_Any_NEON, ABGRToUVRow_NEON, ABGRToUVRow_C, 4, 15) | |
| 405 #endif | |
| 406 #ifdef HAS_RGBATOUVROW_NEON | |
| 407 UVANY(RGBAToUVRow_Any_NEON, RGBAToUVRow_NEON, RGBAToUVRow_C, 4, 15) | |
| 408 #endif | |
| 409 #ifdef HAS_RGB24TOUVROW_NEON | |
| 410 UVANY(RGB24ToUVRow_Any_NEON, RGB24ToUVRow_NEON, RGB24ToUVRow_C, 3, 15) | |
| 411 #endif | |
| 412 #ifdef HAS_RAWTOUVROW_NEON | |
| 413 UVANY(RAWToUVRow_Any_NEON, RAWToUVRow_NEON, RAWToUVRow_C, 3, 15) | |
| 414 #endif | |
| 415 #ifdef HAS_RGB565TOUVROW_NEON | |
| 416 UVANY(RGB565ToUVRow_Any_NEON, RGB565ToUVRow_NEON, RGB565ToUVRow_C, 2, 15) | |
| 417 #endif | |
| 418 #ifdef HAS_ARGB1555TOUVROW_NEON | |
| 419 UVANY(ARGB1555ToUVRow_Any_NEON, ARGB1555ToUVRow_NEON, ARGB1555ToUVRow_C, 2, 15) | |
| 420 #endif | |
| 421 #ifdef HAS_ARGB4444TOUVROW_NEON | |
| 422 UVANY(ARGB4444ToUVRow_Any_NEON, ARGB4444ToUVRow_NEON, ARGB4444ToUVRow_C, 2, 15) | |
| 423 #endif | |
| 424 #ifdef HAS_YUY2TOUVROW_NEON | |
| 425 UVANY(YUY2ToUVRow_Any_NEON, YUY2ToUVRow_NEON, YUY2ToUVRow_C, 2, 15) | |
| 426 #endif | |
| 427 #ifdef HAS_UYVYTOUVROW_NEON | |
| 428 UVANY(UYVYToUVRow_Any_NEON, UYVYToUVRow_NEON, UYVYToUVRow_C, 2, 15) | |
| 429 #endif | |
| 430 #undef UVANY | |
| 431 | |
| 432 #define UV422ANY(NAMEANY, ANYTOUV_SIMD, ANYTOUV_C, BPP, SHIFT, MASK) \ | |
| 433 void NAMEANY(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int width) { \ | |
| 434 int n = width & ~MASK; \ | |
| 435 if (n > 0) { \ | |
| 436 ANYTOUV_SIMD(src_uv, dst_u, dst_v, n); \ | |
| 437 } \ | |
| 438 ANYTOUV_C(src_uv + n * BPP, \ | |
| 439 dst_u + (n >> SHIFT), \ | |
| 440 dst_v + (n >> SHIFT), \ | |
| 441 width & MASK); \ | |
| 442 } | |
| 443 | |
| 444 #ifdef HAS_ARGBTOUV444ROW_SSSE3 | |
| 445 UV422ANY(ARGBToUV444Row_Any_SSSE3, ARGBToUV444Row_SSSE3, | |
| 446 ARGBToUV444Row_C, 4, 0, 15) | |
| 447 #endif | |
| 448 #ifdef HAS_YUY2TOUV422ROW_AVX2 | |
| 449 UV422ANY(YUY2ToUV422Row_Any_AVX2, YUY2ToUV422Row_AVX2, | |
| 450 YUY2ToUV422Row_C, 2, 1, 31) | |
| 451 UV422ANY(UYVYToUV422Row_Any_AVX2, UYVYToUV422Row_AVX2, | |
| 452 UYVYToUV422Row_C, 2, 1, 31) | |
| 453 #endif | |
| 454 #ifdef HAS_ARGBTOUV422ROW_SSSE3 | |
| 455 UV422ANY(ARGBToUV422Row_Any_SSSE3, ARGBToUV422Row_SSSE3, | |
| 456 ARGBToUV422Row_C, 4, 1, 15) | |
| 457 #endif | |
| 458 #ifdef HAS_YUY2TOUV422ROW_SSE2 | |
| 459 UV422ANY(YUY2ToUV422Row_Any_SSE2, YUY2ToUV422Row_SSE2, | |
| 460 YUY2ToUV422Row_C, 2, 1, 15) | |
| 461 UV422ANY(UYVYToUV422Row_Any_SSE2, UYVYToUV422Row_SSE2, | |
| 462 UYVYToUV422Row_C, 2, 1, 15) | |
| 463 #endif | |
| 464 #ifdef HAS_YUY2TOUV422ROW_NEON | |
| 465 UV422ANY(ARGBToUV444Row_Any_NEON, ARGBToUV444Row_NEON, | |
| 466 ARGBToUV444Row_C, 4, 0, 7) | |
| 467 UV422ANY(ARGBToUV422Row_Any_NEON, ARGBToUV422Row_NEON, | |
| 468 ARGBToUV422Row_C, 4, 1, 15) | |
| 469 UV422ANY(ARGBToUV411Row_Any_NEON, ARGBToUV411Row_NEON, | |
| 470 ARGBToUV411Row_C, 4, 2, 31) | |
| 471 UV422ANY(YUY2ToUV422Row_Any_NEON, YUY2ToUV422Row_NEON, | |
| 472 YUY2ToUV422Row_C, 2, 1, 15) | |
| 473 UV422ANY(UYVYToUV422Row_Any_NEON, UYVYToUV422Row_NEON, | |
| 474 UYVYToUV422Row_C, 2, 1, 15) | |
| 475 #endif | |
| 476 #undef UV422ANY | |
| 477 | |
| 478 #define SPLITUVROWANY(NAMEANY, ANYTOUV_SIMD, ANYTOUV_C, MASK) \ | |
| 479 void NAMEANY(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int width) { \ | |
| 480 int n = width & ~MASK; \ | |
| 481 if (n > 0) { \ | |
| 482 ANYTOUV_SIMD(src_uv, dst_u, dst_v, n); \ | |
| 483 } \ | |
| 484 ANYTOUV_C(src_uv + n * 2, \ | |
| 485 dst_u + n, \ | |
| 486 dst_v + n, \ | |
| 487 width & MASK); \ | |
| 488 } | |
| 489 | |
| 490 #ifdef HAS_SPLITUVROW_SSE2 | |
| 491 SPLITUVROWANY(SplitUVRow_Any_SSE2, SplitUVRow_SSE2, SplitUVRow_C, 15) | |
| 492 #endif | |
| 493 #ifdef HAS_SPLITUVROW_AVX2 | |
| 494 SPLITUVROWANY(SplitUVRow_Any_AVX2, SplitUVRow_AVX2, SplitUVRow_C, 31) | |
| 495 #endif | |
| 496 #ifdef HAS_SPLITUVROW_NEON | |
| 497 SPLITUVROWANY(SplitUVRow_Any_NEON, SplitUVRow_NEON, SplitUVRow_C, 15) | |
| 498 #endif | |
| 499 #ifdef HAS_SPLITUVROW_MIPS_DSPR2 | |
| 500 SPLITUVROWANY(SplitUVRow_Any_MIPS_DSPR2, SplitUVRow_MIPS_DSPR2, | |
| 501 SplitUVRow_C, 15) | |
| 502 #endif | |
| 503 #undef SPLITUVROWANY | |
| 504 | |
| 505 #define MERGEUVROW_ANY(NAMEANY, ANYTOUV_SIMD, ANYTOUV_C, MASK) \ | |
| 506 void NAMEANY(const uint8* src_u, const uint8* src_v, \ | |
| 507 uint8* dst_uv, int width) { \ | |
| 508 int n = width & ~MASK; \ | |
| 509 if (n > 0) { \ | |
| 510 ANYTOUV_SIMD(src_u, src_v, dst_uv, n); \ | |
| 511 } \ | |
| 512 ANYTOUV_C(src_u + n, \ | |
| 513 src_v + n, \ | |
| 514 dst_uv + n * 2, \ | |
| 515 width & MASK); \ | |
| 516 } | |
| 517 | |
| 518 #ifdef HAS_MERGEUVROW_SSE2 | |
| 519 MERGEUVROW_ANY(MergeUVRow_Any_SSE2, MergeUVRow_SSE2, MergeUVRow_C, 15) | |
| 520 #endif | |
| 521 #ifdef HAS_MERGEUVROW_AVX2 | |
| 522 MERGEUVROW_ANY(MergeUVRow_Any_AVX2, MergeUVRow_AVX2, MergeUVRow_C, 31) | |
| 523 #endif | |
| 524 #ifdef HAS_MERGEUVROW_NEON | |
| 525 MERGEUVROW_ANY(MergeUVRow_Any_NEON, MergeUVRow_NEON, MergeUVRow_C, 15) | |
| 526 #endif | |
| 527 #undef MERGEUVROW_ANY | |
| 528 | |
| 529 #define MATHROW_ANY(NAMEANY, ARGBMATH_SIMD, ARGBMATH_C, MASK) \ | |
| 530 void NAMEANY(const uint8* src_argb0, const uint8* src_argb1, \ | |
| 531 uint8* dst_argb, int width) { \ | |
| 532 int n = width & ~MASK; \ | |
| 533 if (n > 0) { \ | |
| 534 ARGBMATH_SIMD(src_argb0, src_argb1, dst_argb, n); \ | |
| 535 } \ | |
| 536 ARGBMATH_C(src_argb0 + n * 4, \ | |
| 537 src_argb1 + n * 4, \ | |
| 538 dst_argb + n * 4, \ | |
| 539 width & MASK); \ | |
| 540 } | |
| 541 | |
| 542 #ifdef HAS_ARGBMULTIPLYROW_SSE2 | |
| 543 MATHROW_ANY(ARGBMultiplyRow_Any_SSE2, ARGBMultiplyRow_SSE2, ARGBMultiplyRow_C, | |
| 544 3) | |
| 545 #endif | |
| 546 #ifdef HAS_ARGBADDROW_SSE2 | |
| 547 MATHROW_ANY(ARGBAddRow_Any_SSE2, ARGBAddRow_SSE2, ARGBAddRow_C, 3) | |
| 548 #endif | |
| 549 #ifdef HAS_ARGBSUBTRACTROW_SSE2 | |
| 550 MATHROW_ANY(ARGBSubtractRow_Any_SSE2, ARGBSubtractRow_SSE2, ARGBSubtractRow_C, | |
| 551 3) | |
| 552 #endif | |
| 553 #ifdef HAS_ARGBMULTIPLYROW_AVX2 | |
| 554 MATHROW_ANY(ARGBMultiplyRow_Any_AVX2, ARGBMultiplyRow_AVX2, ARGBMultiplyRow_C, | |
| 555 7) | |
| 556 #endif | |
| 557 #ifdef HAS_ARGBADDROW_AVX2 | |
| 558 MATHROW_ANY(ARGBAddRow_Any_AVX2, ARGBAddRow_AVX2, ARGBAddRow_C, 7) | |
| 559 #endif | |
| 560 #ifdef HAS_ARGBSUBTRACTROW_AVX2 | |
| 561 MATHROW_ANY(ARGBSubtractRow_Any_AVX2, ARGBSubtractRow_AVX2, ARGBSubtractRow_C, | |
| 562 7) | |
| 563 #endif | |
| 564 #ifdef HAS_ARGBMULTIPLYROW_NEON | |
| 565 MATHROW_ANY(ARGBMultiplyRow_Any_NEON, ARGBMultiplyRow_NEON, ARGBMultiplyRow_C, | |
| 566 7) | |
| 567 #endif | |
| 568 #ifdef HAS_ARGBADDROW_NEON | |
| 569 MATHROW_ANY(ARGBAddRow_Any_NEON, ARGBAddRow_NEON, ARGBAddRow_C, 7) | |
| 570 #endif | |
| 571 #ifdef HAS_ARGBSUBTRACTROW_NEON | |
| 572 MATHROW_ANY(ARGBSubtractRow_Any_NEON, ARGBSubtractRow_NEON, ARGBSubtractRow_C, | |
| 573 7) | |
| 574 #endif | |
| 575 #undef MATHROW_ANY | |
| 576 | |
| 577 // Shuffle may want to work in place, so last16 method can not be used. | |
| 578 #define YANY(NAMEANY, ARGBTOY_SIMD, ARGBTOY_C, SBPP, BPP, MASK) \ | |
| 579 void NAMEANY(const uint8* src_argb, uint8* dst_argb, \ | |
| 580 const uint8* shuffler, int width) { \ | |
| 581 int n = width & ~MASK; \ | |
| 582 if (n > 0) { \ | |
| 583 ARGBTOY_SIMD(src_argb, dst_argb, shuffler, n); \ | |
| 584 } \ | |
| 585 ARGBTOY_C(src_argb + n * SBPP, \ | |
| 586 dst_argb + n * BPP, shuffler, width & MASK); \ | |
| 587 } | |
| 588 | |
| 589 #ifdef HAS_ARGBSHUFFLEROW_SSE2 | 429 #ifdef HAS_ARGBSHUFFLEROW_SSE2 |
| 590 YANY(ARGBShuffleRow_Any_SSE2, ARGBShuffleRow_SSE2, | 430 ANY11P(ARGBShuffleRow_Any_SSE2, ARGBShuffleRow_SSE2, const uint8*, 4, 4, 3) |
| 591 ARGBShuffleRow_C, 4, 4, 3) | |
| 592 #endif | 431 #endif |
| 593 #ifdef HAS_ARGBSHUFFLEROW_SSSE3 | 432 #ifdef HAS_ARGBSHUFFLEROW_SSSE3 |
| 594 YANY(ARGBShuffleRow_Any_SSSE3, ARGBShuffleRow_SSSE3, | 433 ANY11P(ARGBShuffleRow_Any_SSSE3, ARGBShuffleRow_SSSE3, const uint8*, 4, 4, 7) |
| 595 ARGBShuffleRow_C, 4, 4, 7) | |
| 596 #endif | 434 #endif |
| 597 #ifdef HAS_ARGBSHUFFLEROW_AVX2 | 435 #ifdef HAS_ARGBSHUFFLEROW_AVX2 |
| 598 YANY(ARGBShuffleRow_Any_AVX2, ARGBShuffleRow_AVX2, | 436 ANY11P(ARGBShuffleRow_Any_AVX2, ARGBShuffleRow_AVX2, const uint8*, 4, 4, 15) |
| 599 ARGBShuffleRow_C, 4, 4, 15) | |
| 600 #endif | 437 #endif |
| 601 #ifdef HAS_ARGBSHUFFLEROW_NEON | 438 #ifdef HAS_ARGBSHUFFLEROW_NEON |
| 602 YANY(ARGBShuffleRow_Any_NEON, ARGBShuffleRow_NEON, | 439 ANY11P(ARGBShuffleRow_Any_NEON, ARGBShuffleRow_NEON, const uint8*, 4, 4, 3) |
| 603 ARGBShuffleRow_C, 4, 4, 3) | 440 #endif |
| 604 #endif | 441 #undef ANY11P |
| 605 #undef YANY | 442 |
| 606 | 443 // Any 1 to 1 interpolate. Takes 2 rows of source via stride. |
| 607 // Interpolate may want to work in place, so last16 method can not be used. | 444 #define ANY11T(NAMEANY, ANY_SIMD, SBPP, BPP, MASK) \ |
| 608 #define NANY(NAMEANY, TERP_SIMD, TERP_C, SBPP, BPP, MASK) \ | |
| 609 void NAMEANY(uint8* dst_ptr, const uint8* src_ptr, \ | 445 void NAMEANY(uint8* dst_ptr, const uint8* src_ptr, \ |
| 610 ptrdiff_t src_stride_ptr, int width, \ | 446 ptrdiff_t src_stride_ptr, int width, \ |
| 611 int source_y_fraction) { \ | 447 int source_y_fraction) { \ |
| 612 int n = width & ~MASK; \ | 448 SIMD_ALIGNED(uint8 temp[64 * 3]); \ |
| 613 if (n > 0) { \ | 449 memset(temp, 0, 64 * 2); /* for msan */ \ |
| 614 TERP_SIMD(dst_ptr, src_ptr, src_stride_ptr, n, source_y_fraction); \ | 450 int r = width & MASK; \ |
| 615 } \ | 451 int n = width & ~MASK; \ |
| 616 TERP_C(dst_ptr + n * BPP, \ | 452 if (n > 0) { \ |
| 617 src_ptr + n * SBPP, src_stride_ptr, \ | 453 ANY_SIMD(dst_ptr, src_ptr, src_stride_ptr, n, source_y_fraction); \ |
| 618 width & MASK, source_y_fraction); \ | 454 } \ |
| 455 memcpy(temp, src_ptr + n * SBPP, r * SBPP); \ |
| 456 memcpy(temp + 64, src_ptr + src_stride_ptr + n * SBPP, r * SBPP); \ |
| 457 ANY_SIMD(temp + 128, temp, 64, MASK + 1, source_y_fraction); \ |
| 458 memcpy(dst_ptr + n * BPP, temp + 128, r * BPP); \ |
| 619 } | 459 } |
| 620 | 460 |
| 621 #ifdef HAS_INTERPOLATEROW_AVX2 | 461 #ifdef HAS_INTERPOLATEROW_AVX2 |
| 622 NANY(InterpolateRow_Any_AVX2, InterpolateRow_AVX2, InterpolateRow_C, 1, 1, 31) | 462 ANY11T(InterpolateRow_Any_AVX2, InterpolateRow_AVX2, 1, 1, 31) |
| 623 #endif | 463 #endif |
| 624 #ifdef HAS_INTERPOLATEROW_SSSE3 | 464 #ifdef HAS_INTERPOLATEROW_SSSE3 |
| 625 NANY(InterpolateRow_Any_SSSE3, InterpolateRow_SSSE3, InterpolateRow_C, 1, 1, 15) | 465 ANY11T(InterpolateRow_Any_SSSE3, InterpolateRow_SSSE3, 1, 1, 15) |
| 626 #endif | 466 #endif |
| 627 #ifdef HAS_INTERPOLATEROW_SSE2 | 467 #ifdef HAS_INTERPOLATEROW_SSE2 |
| 628 NANY(InterpolateRow_Any_SSE2, InterpolateRow_SSE2, InterpolateRow_C, 1, 1, 15) | 468 ANY11T(InterpolateRow_Any_SSE2, InterpolateRow_SSE2, 1, 1, 15) |
| 629 #endif | 469 #endif |
| 630 #ifdef HAS_INTERPOLATEROW_NEON | 470 #ifdef HAS_INTERPOLATEROW_NEON |
| 631 NANY(InterpolateRow_Any_NEON, InterpolateRow_NEON, InterpolateRow_C, 1, 1, 15) | 471 ANY11T(InterpolateRow_Any_NEON, InterpolateRow_NEON, 1, 1, 15) |
| 632 #endif | 472 #endif |
| 633 #ifdef HAS_INTERPOLATEROW_MIPS_DSPR2 | 473 #ifdef HAS_INTERPOLATEROW_MIPS_DSPR2 |
| 634 NANY(InterpolateRow_Any_MIPS_DSPR2, InterpolateRow_MIPS_DSPR2, InterpolateRow_C, | 474 ANY11T(InterpolateRow_Any_MIPS_DSPR2, InterpolateRow_MIPS_DSPR2, 1, 1, 3) |
| 635 1, 1, 3) | 475 #endif |
| 636 #endif | 476 #undef ANY11T |
| 637 #undef NANY | 477 |
| 638 | 478 // Any 1 to 1 mirror. |
| 639 #define MANY(NAMEANY, MIRROR_SIMD, MIRROR_C, BPP, MASK) \ | 479 #define ANY11M(NAMEANY, ANY_SIMD, BPP, MASK) \ |
| 640 void NAMEANY(const uint8* src_y, uint8* dst_y, int width) { \ | 480 void NAMEANY(const uint8* src_ptr, uint8* dst_ptr, int width) { \ |
| 641 int n = width & ~MASK; \ | 481 SIMD_ALIGNED(uint8 temp[64 * 2]); \ |
| 642 int r = width & MASK; \ | 482 memset(temp, 0, 64); /* for msan */ \ |
| 643 if (n > 0) { \ | 483 int r = width & MASK; \ |
| 644 MIRROR_SIMD(src_y, dst_y + r * BPP, n); \ | 484 int n = width & ~MASK; \ |
| 645 } \ | 485 if (n > 0) { \ |
| 646 MIRROR_C(src_y + n * BPP, dst_y, r); \ | 486 ANY_SIMD(src_ptr + r * BPP, dst_ptr, n); \ |
| 487 } \ |
| 488 memcpy(temp, src_ptr, r * BPP); \ |
| 489 ANY_SIMD(temp, temp + 64, MASK + 1); \ |
| 490 memcpy(dst_ptr + n * BPP, temp + 64 + (MASK + 1 - r) * BPP, r * BPP); \ |
| 647 } | 491 } |
| 648 | 492 |
| 649 #ifdef HAS_MIRRORROW_AVX2 | 493 #ifdef HAS_MIRRORROW_AVX2 |
| 650 MANY(MirrorRow_Any_AVX2, MirrorRow_AVX2, MirrorRow_C, 1, 31) | 494 ANY11M(MirrorRow_Any_AVX2, MirrorRow_AVX2, 1, 31) |
| 651 #endif | 495 #endif |
| 652 #ifdef HAS_MIRRORROW_SSSE3 | 496 #ifdef HAS_MIRRORROW_SSSE3 |
| 653 MANY(MirrorRow_Any_SSSE3, MirrorRow_SSSE3, MirrorRow_C, 1, 15) | 497 ANY11M(MirrorRow_Any_SSSE3, MirrorRow_SSSE3, 1, 15) |
| 654 #endif | 498 #endif |
| 655 #ifdef HAS_MIRRORROW_SSE2 | 499 #ifdef HAS_MIRRORROW_SSE2 |
| 656 MANY(MirrorRow_Any_SSE2, MirrorRow_SSE2, MirrorRow_C, 1, 15) | 500 ANY11M(MirrorRow_Any_SSE2, MirrorRow_SSE2, 1, 15) |
| 657 #endif | 501 #endif |
| 658 #ifdef HAS_MIRRORROW_NEON | 502 #ifdef HAS_MIRRORROW_NEON |
| 659 MANY(MirrorRow_Any_NEON, MirrorRow_NEON, MirrorRow_C, 1, 15) | 503 ANY11M(MirrorRow_Any_NEON, MirrorRow_NEON, 1, 15) |
| 660 #endif | 504 #endif |
| 661 #ifdef HAS_ARGBMIRRORROW_AVX2 | 505 #ifdef HAS_ARGBMIRRORROW_AVX2 |
| 662 MANY(ARGBMirrorRow_Any_AVX2, ARGBMirrorRow_AVX2, ARGBMirrorRow_C, 4, 7) | 506 ANY11M(ARGBMirrorRow_Any_AVX2, ARGBMirrorRow_AVX2, 4, 7) |
| 663 #endif | 507 #endif |
| 664 #ifdef HAS_ARGBMIRRORROW_SSE2 | 508 #ifdef HAS_ARGBMIRRORROW_SSE2 |
| 665 MANY(ARGBMirrorRow_Any_SSE2, ARGBMirrorRow_SSE2, ARGBMirrorRow_C, 4, 3) | 509 ANY11M(ARGBMirrorRow_Any_SSE2, ARGBMirrorRow_SSE2, 4, 3) |
| 666 #endif | 510 #endif |
| 667 #ifdef HAS_ARGBMIRRORROW_NEON | 511 #ifdef HAS_ARGBMIRRORROW_NEON |
| 668 MANY(ARGBMirrorRow_Any_NEON, ARGBMirrorRow_NEON, ARGBMirrorRow_C, 4, 3) | 512 ANY11M(ARGBMirrorRow_Any_NEON, ARGBMirrorRow_NEON, 4, 3) |
| 669 #endif | 513 #endif |
| 670 #undef MANY | 514 #undef ANY11M |
| 671 | 515 |
| 672 #define MANY(NAMEANY, COPY_SIMD, COPY_C, BPP, MASK) \ | 516 // Any 1 plane. (memset) |
| 673 void NAMEANY(const uint8* src_y, uint8* dst_y, int width) { \ | 517 #define ANY1(NAMEANY, ANY_SIMD, T, BPP, MASK) \ |
| 674 int n = width & ~MASK; \ | 518 void NAMEANY(uint8* dst_ptr, T v32, int width) { \ |
| 675 int r = width & MASK; \ | 519 SIMD_ALIGNED(uint8 temp[64]); \ |
| 676 if (n > 0) { \ | 520 int r = width & MASK; \ |
| 677 COPY_SIMD(src_y, dst_y, n); \ | 521 int n = width & ~MASK; \ |
| 678 } \ | 522 if (n > 0) { \ |
| 679 COPY_C(src_y + n * BPP, dst_y + n * BPP, r); \ | 523 ANY_SIMD(dst_ptr, v32, n); \ |
| 680 } | 524 } \ |
| 681 | 525 ANY_SIMD(temp, v32, MASK + 1); \ |
| 682 #ifdef HAS_COPYROW_AVX | 526 memcpy(dst_ptr + n * BPP, temp, r * BPP); \ |
| 683 MANY(CopyRow_Any_AVX, CopyRow_AVX, CopyRow_C, 1, 63) | |
| 684 #endif | |
| 685 #ifdef HAS_COPYROW_SSE2 | |
| 686 MANY(CopyRow_Any_SSE2, CopyRow_SSE2, CopyRow_C, 1, 31) | |
| 687 #endif | |
| 688 #ifdef HAS_COPYROW_NEON | |
| 689 MANY(CopyRow_Any_NEON, CopyRow_NEON, CopyRow_C, 1, 31) | |
| 690 #endif | |
| 691 #undef MANY | |
| 692 | |
| 693 #define SETANY(NAMEANY, SET_SIMD, SET_C, T, BPP, MASK) \ | |
| 694 void NAMEANY(uint8* dst_y, T v8, int width) { \ | |
| 695 int n = width & ~MASK; \ | |
| 696 int r = width & MASK; \ | |
| 697 if (n > 0) { \ | |
| 698 SET_SIMD(dst_y, v8, n); \ | |
| 699 } \ | |
| 700 SET_C(dst_y + n * BPP, v8, r); \ | |
| 701 } | 527 } |
| 702 | 528 |
| 703 #ifdef HAS_SETROW_X86 | 529 #ifdef HAS_SETROW_X86 |
| 704 SETANY(SetRow_Any_X86, SetRow_X86, SetRow_ERMS, uint8, 1, 3) | 530 ANY1(SetRow_Any_X86, SetRow_X86, uint8, 1, 3) |
| 705 #endif | 531 #endif |
| 706 #ifdef HAS_SETROW_NEON | 532 #ifdef HAS_SETROW_NEON |
| 707 SETANY(SetRow_Any_NEON, SetRow_NEON, SetRow_C, uint8, 1, 15) | 533 ANY1(SetRow_Any_NEON, SetRow_NEON, uint8, 1, 15) |
| 708 #endif | 534 #endif |
| 709 #ifdef HAS_ARGBSETROW_NEON | 535 #ifdef HAS_ARGBSETROW_NEON |
| 710 SETANY(ARGBSetRow_Any_NEON, ARGBSetRow_NEON, ARGBSetRow_C, uint32, 4, 3) | 536 ANY1(ARGBSetRow_Any_NEON, ARGBSetRow_NEON, uint32, 4, 3) |
| 711 #endif | 537 #endif |
| 712 #undef SETANY | 538 #undef ANY1 |
| 539 |
| 540 // Any 1 to 2. Outputs UV planes. |
| 541 #define ANY12(NAMEANY, ANY_SIMD, UVSHIFT, BPP, DUVSHIFT, MASK) \ |
| 542 void NAMEANY(const uint8* src_ptr, uint8* dst_u, uint8* dst_v, int width) {\ |
| 543 SIMD_ALIGNED(uint8 temp[128 * 3]); \ |
| 544 memset(temp, 0, 128); /* for msan */ \ |
| 545 int r = width & MASK; \ |
| 546 int n = width & ~MASK; \ |
| 547 if (n > 0) { \ |
| 548 ANY_SIMD(src_ptr, dst_u, dst_v, n); \ |
| 549 } \ |
| 550 memcpy(temp, src_ptr + (n >> UVSHIFT) * BPP, SS(r, UVSHIFT) * BPP); \ |
| 551 if ((width & 1) && BPP == 4) { /* repeat last 4 bytes for subsampler */ \ |
| 552 memcpy(temp + SS(r, UVSHIFT) * BPP, \ |
| 553 temp + SS(r, UVSHIFT) * BPP - BPP, 4); \ |
| 554 } \ |
| 555 ANY_SIMD(temp, temp + 128, temp + 256, MASK + 1); \ |
| 556 memcpy(dst_u + (n >> DUVSHIFT), temp + 128, SS(r, DUVSHIFT)); \ |
| 557 memcpy(dst_v + (n >> DUVSHIFT), temp + 256, SS(r, DUVSHIFT)); \ |
| 558 } |
| 559 |
| 560 #ifdef HAS_SPLITUVROW_SSE2 |
| 561 ANY12(SplitUVRow_Any_SSE2, SplitUVRow_SSE2, 0, 2, 0, 15) |
| 562 #endif |
| 563 #ifdef HAS_SPLITUVROW_AVX2 |
| 564 ANY12(SplitUVRow_Any_AVX2, SplitUVRow_AVX2, 0, 2, 0, 31) |
| 565 #endif |
| 566 #ifdef HAS_SPLITUVROW_NEON |
| 567 ANY12(SplitUVRow_Any_NEON, SplitUVRow_NEON, 0, 2, 0, 15) |
| 568 #endif |
| 569 #ifdef HAS_SPLITUVROW_MIPS_DSPR2 |
| 570 ANY12(SplitUVRow_Any_MIPS_DSPR2, SplitUVRow_MIPS_DSPR2, 0, 2, 0, 15) |
| 571 #endif |
| 572 #ifdef HAS_ARGBTOUV444ROW_SSSE3 |
| 573 ANY12(ARGBToUV444Row_Any_SSSE3, ARGBToUV444Row_SSSE3, 0, 4, 0, 15) |
| 574 #endif |
| 575 #ifdef HAS_YUY2TOUV422ROW_AVX2 |
| 576 ANY12(YUY2ToUV422Row_Any_AVX2, YUY2ToUV422Row_AVX2, 1, 4, 1, 31) |
| 577 ANY12(UYVYToUV422Row_Any_AVX2, UYVYToUV422Row_AVX2, 1, 4, 1, 31) |
| 578 #endif |
| 579 #ifdef HAS_ARGBTOUV422ROW_SSSE3 |
| 580 ANY12(ARGBToUV422Row_Any_SSSE3, ARGBToUV422Row_SSSE3, 0, 4, 1, 15) |
| 581 #endif |
| 582 #ifdef HAS_YUY2TOUV422ROW_SSE2 |
| 583 ANY12(YUY2ToUV422Row_Any_SSE2, YUY2ToUV422Row_SSE2, 1, 4, 1, 15) |
| 584 ANY12(UYVYToUV422Row_Any_SSE2, UYVYToUV422Row_SSE2, 1, 4, 1, 15) |
| 585 #endif |
| 586 #ifdef HAS_YUY2TOUV422ROW_NEON |
| 587 ANY12(ARGBToUV444Row_Any_NEON, ARGBToUV444Row_NEON, 0, 4, 0, 7) |
| 588 ANY12(ARGBToUV422Row_Any_NEON, ARGBToUV422Row_NEON, 0, 4, 1, 15) |
| 589 ANY12(ARGBToUV411Row_Any_NEON, ARGBToUV411Row_NEON, 0, 4, 2, 31) |
| 590 ANY12(YUY2ToUV422Row_Any_NEON, YUY2ToUV422Row_NEON, 1, 4, 1, 15) |
| 591 ANY12(UYVYToUV422Row_Any_NEON, UYVYToUV422Row_NEON, 1, 4, 1, 15) |
| 592 #endif |
| 593 #undef ANY12 |
| 594 |
| 595 // Any 1 to 2 with source stride (2 rows of source). Outputs UV planes. |
| 596 // 128 byte row allows for 32 avx ARGB pixels. |
| 597 #define ANY12S(NAMEANY, ANY_SIMD, UVSHIFT, BPP, MASK) \ |
| 598 void NAMEANY(const uint8* src_ptr, int src_stride_ptr, \ |
| 599 uint8* dst_u, uint8* dst_v, int width) { \ |
| 600 SIMD_ALIGNED(uint8 temp[128 * 4]); \ |
| 601 memset(temp, 0, 128 * 2); /* for msan */ \ |
| 602 int r = width & MASK; \ |
| 603 int n = width & ~MASK; \ |
| 604 if (n > 0) { \ |
| 605 ANY_SIMD(src_ptr, src_stride_ptr, dst_u, dst_v, n); \ |
| 606 } \ |
| 607 memcpy(temp, src_ptr + (n >> UVSHIFT) * BPP, SS(r, UVSHIFT) * BPP); \ |
| 608 memcpy(temp + 128, src_ptr + src_stride_ptr + (n >> UVSHIFT) * BPP, \ |
| 609 SS(r, UVSHIFT) * BPP); \ |
| 610 if ((width & 1) && BPP == 4) { /* repeat last 4 bytes for subsampler */ \ |
| 611 memcpy(temp + SS(r, UVSHIFT) * BPP, \ |
| 612 temp + SS(r, UVSHIFT) * BPP - BPP, 4); \ |
| 613 memcpy(temp + 128 + SS(r, UVSHIFT) * BPP, \ |
| 614 temp + 128 + SS(r, UVSHIFT) * BPP - BPP, 4); \ |
| 615 } \ |
| 616 ANY_SIMD(temp, 128, temp + 256, temp + 384, MASK + 1); \ |
| 617 memcpy(dst_u + (n >> 1), temp + 256, SS(r, 1)); \ |
| 618 memcpy(dst_v + (n >> 1), temp + 384, SS(r, 1)); \ |
| 619 } |
| 620 |
| 621 #ifdef HAS_ARGBTOUVROW_AVX2 |
| 622 ANY12S(ARGBToUVRow_Any_AVX2, ARGBToUVRow_AVX2, 0, 4, 31) |
| 623 #endif |
| 624 #ifdef HAS_ARGBTOUVROW_SSSE3 |
| 625 ANY12S(ARGBToUVRow_Any_SSSE3, ARGBToUVRow_SSSE3, 0, 4, 15) |
| 626 ANY12S(ARGBToUVJRow_Any_SSSE3, ARGBToUVJRow_SSSE3, 0, 4, 15) |
| 627 ANY12S(BGRAToUVRow_Any_SSSE3, BGRAToUVRow_SSSE3, 0, 4, 15) |
| 628 ANY12S(ABGRToUVRow_Any_SSSE3, ABGRToUVRow_SSSE3, 0, 4, 15) |
| 629 ANY12S(RGBAToUVRow_Any_SSSE3, RGBAToUVRow_SSSE3, 0, 4, 15) |
| 630 #endif |
| 631 #ifdef HAS_YUY2TOUVROW_AVX2 |
| 632 ANY12S(YUY2ToUVRow_Any_AVX2, YUY2ToUVRow_AVX2, 1, 4, 31) |
| 633 ANY12S(UYVYToUVRow_Any_AVX2, UYVYToUVRow_AVX2, 1, 4, 31) |
| 634 #endif |
| 635 #ifdef HAS_YUY2TOUVROW_SSE2 |
| 636 ANY12S(YUY2ToUVRow_Any_SSE2, YUY2ToUVRow_SSE2, 1, 4, 15) |
| 637 ANY12S(UYVYToUVRow_Any_SSE2, UYVYToUVRow_SSE2, 1, 4, 15) |
| 638 #endif |
| 639 #ifdef HAS_ARGBTOUVROW_NEON |
| 640 ANY12S(ARGBToUVRow_Any_NEON, ARGBToUVRow_NEON, 0, 4, 15) |
| 641 #endif |
| 642 #ifdef HAS_ARGBTOUVJROW_NEON |
| 643 ANY12S(ARGBToUVJRow_Any_NEON, ARGBToUVJRow_NEON, 0, 4, 15) |
| 644 #endif |
| 645 #ifdef HAS_BGRATOUVROW_NEON |
| 646 ANY12S(BGRAToUVRow_Any_NEON, BGRAToUVRow_NEON, 0, 4, 15) |
| 647 #endif |
| 648 #ifdef HAS_ABGRTOUVROW_NEON |
| 649 ANY12S(ABGRToUVRow_Any_NEON, ABGRToUVRow_NEON, 0, 4, 15) |
| 650 #endif |
| 651 #ifdef HAS_RGBATOUVROW_NEON |
| 652 ANY12S(RGBAToUVRow_Any_NEON, RGBAToUVRow_NEON, 0, 4, 15) |
| 653 #endif |
| 654 #ifdef HAS_RGB24TOUVROW_NEON |
| 655 ANY12S(RGB24ToUVRow_Any_NEON, RGB24ToUVRow_NEON, 0, 3, 15) |
| 656 #endif |
| 657 #ifdef HAS_RAWTOUVROW_NEON |
| 658 ANY12S(RAWToUVRow_Any_NEON, RAWToUVRow_NEON, 0, 3, 15) |
| 659 #endif |
| 660 #ifdef HAS_RGB565TOUVROW_NEON |
| 661 ANY12S(RGB565ToUVRow_Any_NEON, RGB565ToUVRow_NEON, 0, 2, 15) |
| 662 #endif |
| 663 #ifdef HAS_ARGB1555TOUVROW_NEON |
| 664 ANY12S(ARGB1555ToUVRow_Any_NEON, ARGB1555ToUVRow_NEON, 0, 2, 15) |
| 665 #endif |
| 666 #ifdef HAS_ARGB4444TOUVROW_NEON |
| 667 ANY12S(ARGB4444ToUVRow_Any_NEON, ARGB4444ToUVRow_NEON, 0, 2, 15) |
| 668 #endif |
| 669 #ifdef HAS_YUY2TOUVROW_NEON |
| 670 ANY12S(YUY2ToUVRow_Any_NEON, YUY2ToUVRow_NEON, 1, 4, 15) |
| 671 #endif |
| 672 #ifdef HAS_UYVYTOUVROW_NEON |
| 673 ANY12S(UYVYToUVRow_Any_NEON, UYVYToUVRow_NEON, 1, 4, 15) |
| 674 #endif |
| 675 #undef ANY12S |
| 713 | 676 |
| 714 #ifdef __cplusplus | 677 #ifdef __cplusplus |
| 715 } // extern "C" | 678 } // extern "C" |
| 716 } // namespace libyuv | 679 } // namespace libyuv |
| 717 #endif | 680 #endif |
| OLD | NEW |