OLD | NEW |
1 // VERSION 2 | 1 // VERSION 2 |
2 /* | 2 /* |
3 * Copyright 2011 The LibYuv Project Authors. All rights reserved. | 3 * Copyright 2011 The LibYuv Project Authors. All rights reserved. |
4 * | 4 * |
5 * Use of this source code is governed by a BSD-style license | 5 * Use of this source code is governed by a BSD-style license |
6 * that can be found in the LICENSE file in the root of the source | 6 * that can be found in the LICENSE file in the root of the source |
7 * tree. An additional intellectual property rights grant can be found | 7 * tree. An additional intellectual property rights grant can be found |
8 * in the file PATENTS. All contributing project authors may | 8 * in the file PATENTS. All contributing project authors may |
9 * be found in the AUTHORS file in the root of the source tree. | 9 * be found in the AUTHORS file in the root of the source tree. |
10 */ | 10 */ |
(...skipping 1306 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1317 ); | 1317 ); |
1318 } | 1318 } |
1319 | 1319 |
1320 #if defined(HAS_I422TOARGBROW_SSSE3) || defined(HAS_I422TOARGBROW_AVX2) | 1320 #if defined(HAS_I422TOARGBROW_SSSE3) || defined(HAS_I422TOARGBROW_AVX2) |
1321 | 1321 |
1322 // Read 8 UV from 411 | 1322 // Read 8 UV from 411 |
1323 #define READYUV444 \ | 1323 #define READYUV444 \ |
1324 "movq " MEMACCESS([u_buf]) ",%%xmm0 \n" \ | 1324 "movq " MEMACCESS([u_buf]) ",%%xmm0 \n" \ |
1325 MEMOPREG(movq, 0x00, [u_buf], [v_buf], 1, xmm1) \ | 1325 MEMOPREG(movq, 0x00, [u_buf], [v_buf], 1, xmm1) \ |
1326 "lea " MEMLEA(0x8, [u_buf]) ",%[u_buf] \n" \ | 1326 "lea " MEMLEA(0x8, [u_buf]) ",%[u_buf] \n" \ |
1327 "punpcklbw %%xmm1,%%xmm0 \n" | 1327 "punpcklbw %%xmm1,%%xmm0 \n" \ |
| 1328 "movq " MEMACCESS([y_buf]) ",%%xmm4 \n" \ |
| 1329 "lea " MEMLEA(0x8, [y_buf]) ",%[y_buf] \n" |
1328 | 1330 |
1329 // Read 4 UV from 422, upsample to 8 UV | 1331 // Read 4 UV from 422, upsample to 8 UV |
1330 #define READYUV422 \ | 1332 #define READYUV422 \ |
1331 "movd " MEMACCESS([u_buf]) ",%%xmm0 \n" \ | 1333 "movd " MEMACCESS([u_buf]) ",%%xmm0 \n" \ |
1332 MEMOPREG(movd, 0x00, [u_buf], [v_buf], 1, xmm1) \ | 1334 MEMOPREG(movd, 0x00, [u_buf], [v_buf], 1, xmm1) \ |
1333 "lea " MEMLEA(0x4, [u_buf]) ",%[u_buf] \n" \ | 1335 "lea " MEMLEA(0x4, [u_buf]) ",%[u_buf] \n" \ |
1334 "punpcklbw %%xmm1,%%xmm0 \n" \ | 1336 "punpcklbw %%xmm1,%%xmm0 \n" \ |
1335 "punpcklwd %%xmm0,%%xmm0 \n" | 1337 "punpcklwd %%xmm0,%%xmm0 \n" \ |
| 1338 "movq " MEMACCESS([y_buf]) ",%%xmm4 \n" \ |
| 1339 "lea " MEMLEA(0x8, [y_buf]) ",%[y_buf] \n" |
1336 | 1340 |
1337 // Read 2 UV from 411, upsample to 8 UV | 1341 // Read 2 UV from 411, upsample to 8 UV |
1338 #define READYUV411 \ | 1342 #define READYUV411 \ |
1339 "movd " MEMACCESS([u_buf]) ",%%xmm0 \n" \ | 1343 "movd " MEMACCESS([u_buf]) ",%%xmm0 \n" \ |
1340 MEMOPREG(movd, 0x00, [u_buf], [v_buf], 1, xmm1) \ | 1344 MEMOPREG(movd, 0x00, [u_buf], [v_buf], 1, xmm1) \ |
1341 "lea " MEMLEA(0x2, [u_buf]) ",%[u_buf] \n" \ | 1345 "lea " MEMLEA(0x2, [u_buf]) ",%[u_buf] \n" \ |
1342 "punpcklbw %%xmm1,%%xmm0 \n" \ | 1346 "punpcklbw %%xmm1,%%xmm0 \n" \ |
1343 "punpcklwd %%xmm0,%%xmm0 \n" \ | 1347 "punpcklwd %%xmm0,%%xmm0 \n" \ |
1344 "punpckldq %%xmm0,%%xmm0 \n" | 1348 "punpckldq %%xmm0,%%xmm0 \n" \ |
| 1349 "movq " MEMACCESS([y_buf]) ",%%xmm4 \n" \ |
| 1350 "lea " MEMLEA(0x8, [y_buf]) ",%[y_buf] \n" |
1345 | 1351 |
1346 // Read 4 UV from NV12, upsample to 8 UV | 1352 // Read 4 UV from NV12, upsample to 8 UV |
1347 #define READNV12 \ | 1353 #define READNV12 \ |
1348 "movq " MEMACCESS([uv_buf]) ",%%xmm0 \n" \ | 1354 "movq " MEMACCESS([uv_buf]) ",%%xmm0 \n" \ |
1349 "lea " MEMLEA(0x8, [uv_buf]) ",%[uv_buf] \n" \ | 1355 "lea " MEMLEA(0x8, [uv_buf]) ",%[uv_buf] \n" \ |
1350 "punpcklwd %%xmm0,%%xmm0 \n" | 1356 "punpcklwd %%xmm0,%%xmm0 \n" \ |
| 1357 "movq " MEMACCESS([y_buf]) ",%%xmm4 \n" \ |
| 1358 "lea " MEMLEA(0x8, [y_buf]) ",%[y_buf] \n" |
1351 | 1359 |
1352 // Convert 8 pixels: 8 UV and 8 Y | 1360 // Convert 8 pixels: 8 UV and 8 Y |
1353 #define YUVTORGB(yuvconstants) \ | 1361 #define YUVTORGB(yuvconstants) \ |
1354 "movdqa %%xmm0,%%xmm1 \n" \ | 1362 "movdqa %%xmm0,%%xmm1 \n" \ |
1355 "movdqa %%xmm0,%%xmm2 \n" \ | 1363 "movdqa %%xmm0,%%xmm2 \n" \ |
1356 "movdqa %%xmm0,%%xmm3 \n" \ | 1364 "movdqa %%xmm0,%%xmm3 \n" \ |
1357 "movdqa " MEMACCESS2(96, [yuvconstants]) ",%%xmm0 \n" \ | 1365 "movdqa " MEMACCESS2(96, [yuvconstants]) ",%%xmm0 \n" \ |
1358 "pmaddubsw " MEMACCESS([yuvconstants]) ",%%xmm1 \n" \ | 1366 "pmaddubsw " MEMACCESS([yuvconstants]) ",%%xmm1 \n" \ |
1359 "psubw %%xmm1,%%xmm0 \n" \ | 1367 "psubw %%xmm1,%%xmm0 \n" \ |
1360 "movdqa " MEMACCESS2(128, [yuvconstants]) ",%%xmm1 \n" \ | 1368 "movdqa " MEMACCESS2(128, [yuvconstants]) ",%%xmm1 \n" \ |
1361 "pmaddubsw " MEMACCESS2(32, [yuvconstants]) ",%%xmm2 \n" \ | 1369 "pmaddubsw " MEMACCESS2(32, [yuvconstants]) ",%%xmm2 \n" \ |
1362 "psubw %%xmm2,%%xmm1 \n" \ | 1370 "psubw %%xmm2,%%xmm1 \n" \ |
1363 "movdqa " MEMACCESS2(160, [yuvconstants]) ",%%xmm2 \n" \ | 1371 "movdqa " MEMACCESS2(160, [yuvconstants]) ",%%xmm2 \n" \ |
1364 "pmaddubsw " MEMACCESS2(64, [yuvconstants]) ",%%xmm3 \n" \ | 1372 "pmaddubsw " MEMACCESS2(64, [yuvconstants]) ",%%xmm3 \n" \ |
1365 "psubw %%xmm3,%%xmm2 \n" \ | 1373 "psubw %%xmm3,%%xmm2 \n" \ |
1366 "movq " MEMACCESS([y_buf]) ",%%xmm3 \n" \ | 1374 "punpcklbw %%xmm4,%%xmm4 \n" \ |
1367 "lea " MEMLEA(0x8, [y_buf]) ",%[y_buf] \n" \ | 1375 "pmulhuw " MEMACCESS2(192, [yuvconstants]) ",%%xmm4 \n" \ |
1368 "punpcklbw %%xmm3,%%xmm3 \n" \ | 1376 "paddsw %%xmm4,%%xmm0 \n" \ |
1369 "pmulhuw " MEMACCESS2(192, [yuvconstants]) ",%%xmm3 \n" \ | 1377 "paddsw %%xmm4,%%xmm1 \n" \ |
1370 "paddsw %%xmm3,%%xmm0 \n" \ | 1378 "paddsw %%xmm4,%%xmm2 \n" \ |
1371 "paddsw %%xmm3,%%xmm1 \n" \ | |
1372 "paddsw %%xmm3,%%xmm2 \n" \ | |
1373 "psraw $0x6,%%xmm0 \n" \ | 1379 "psraw $0x6,%%xmm0 \n" \ |
1374 "psraw $0x6,%%xmm1 \n" \ | 1380 "psraw $0x6,%%xmm1 \n" \ |
1375 "psraw $0x6,%%xmm2 \n" \ | 1381 "psraw $0x6,%%xmm2 \n" \ |
1376 "packuswb %%xmm0,%%xmm0 \n" \ | 1382 "packuswb %%xmm0,%%xmm0 \n" \ |
1377 "packuswb %%xmm1,%%xmm1 \n" \ | 1383 "packuswb %%xmm1,%%xmm1 \n" \ |
1378 "packuswb %%xmm2,%%xmm2 \n" | 1384 "packuswb %%xmm2,%%xmm2 \n" |
1379 | 1385 |
1380 // Store 8 ARGB values. Assumes XMM5 is zero. | 1386 // Store 8 ARGB values. Assumes XMM5 is zero. |
1381 #define STOREARGB \ | 1387 #define STOREARGB \ |
1382 "punpcklbw %%xmm1,%%xmm0 \n" \ | 1388 "punpcklbw %%xmm1,%%xmm0 \n" \ |
(...skipping 3818 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
5201 ); | 5207 ); |
5202 } | 5208 } |
5203 #endif // HAS_ARGBLUMACOLORTABLEROW_SSSE3 | 5209 #endif // HAS_ARGBLUMACOLORTABLEROW_SSSE3 |
5204 | 5210 |
5205 #endif // defined(__x86_64__) || defined(__i386__) | 5211 #endif // defined(__x86_64__) || defined(__i386__) |
5206 | 5212 |
5207 #ifdef __cplusplus | 5213 #ifdef __cplusplus |
5208 } // extern "C" | 5214 } // extern "C" |
5209 } // namespace libyuv | 5215 } // namespace libyuv |
5210 #endif | 5216 #endif |
OLD | NEW |