OLD | NEW |
1 // VERSION 2 | 1 // VERSION 2 |
2 /* | 2 /* |
3 * Copyright 2011 The LibYuv Project Authors. All rights reserved. | 3 * Copyright 2011 The LibYuv Project Authors. All rights reserved. |
4 * | 4 * |
5 * Use of this source code is governed by a BSD-style license | 5 * Use of this source code is governed by a BSD-style license |
6 * that can be found in the LICENSE file in the root of the source | 6 * that can be found in the LICENSE file in the root of the source |
7 * tree. An additional intellectual property rights grant can be found | 7 * tree. An additional intellectual property rights grant can be found |
8 * in the file PATENTS. All contributing project authors may | 8 * in the file PATENTS. All contributing project authors may |
9 * be found in the AUTHORS file in the root of the source tree. | 9 * be found in the AUTHORS file in the root of the source tree. |
10 */ | 10 */ |
(...skipping 1308 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1319 | 1319 |
1320 #if defined(HAS_I422TOARGBROW_SSSE3) || defined(HAS_I422TOARGBROW_AVX2) | 1320 #if defined(HAS_I422TOARGBROW_SSSE3) || defined(HAS_I422TOARGBROW_AVX2) |
1321 | 1321 |
1322 // Read 8 UV from 411 | 1322 // Read 8 UV from 411 |
1323 #define READYUV444 \ | 1323 #define READYUV444 \ |
1324 "movq " MEMACCESS([u_buf]) ",%%xmm0 \n" \ | 1324 "movq " MEMACCESS([u_buf]) ",%%xmm0 \n" \ |
1325 MEMOPREG(movq, 0x00, [u_buf], [v_buf], 1, xmm1) \ | 1325 MEMOPREG(movq, 0x00, [u_buf], [v_buf], 1, xmm1) \ |
1326 "lea " MEMLEA(0x8, [u_buf]) ",%[u_buf] \n" \ | 1326 "lea " MEMLEA(0x8, [u_buf]) ",%[u_buf] \n" \ |
1327 "punpcklbw %%xmm1,%%xmm0 \n" \ | 1327 "punpcklbw %%xmm1,%%xmm0 \n" \ |
1328 "movq " MEMACCESS([y_buf]) ",%%xmm4 \n" \ | 1328 "movq " MEMACCESS([y_buf]) ",%%xmm4 \n" \ |
1329 <<<<<<< HEAD | |
1330 "punpcklbw %%xmm4,%%xmm4 \n" \ | 1329 "punpcklbw %%xmm4,%%xmm4 \n" \ |
1331 ======= | |
1332 >>>>>>> refs/remotes/origin/master | |
1333 "lea " MEMLEA(0x8, [y_buf]) ",%[y_buf] \n" | 1330 "lea " MEMLEA(0x8, [y_buf]) ",%[y_buf] \n" |
1334 | 1331 |
1335 // Read 4 UV from 422, upsample to 8 UV | 1332 // Read 4 UV from 422, upsample to 8 UV |
1336 #define READYUV422 \ | 1333 #define READYUV422 \ |
1337 "movd " MEMACCESS([u_buf]) ",%%xmm0 \n" \ | 1334 "movd " MEMACCESS([u_buf]) ",%%xmm0 \n" \ |
1338 MEMOPREG(movd, 0x00, [u_buf], [v_buf], 1, xmm1) \ | 1335 MEMOPREG(movd, 0x00, [u_buf], [v_buf], 1, xmm1) \ |
1339 "lea " MEMLEA(0x4, [u_buf]) ",%[u_buf] \n" \ | 1336 "lea " MEMLEA(0x4, [u_buf]) ",%[u_buf] \n" \ |
1340 "punpcklbw %%xmm1,%%xmm0 \n" \ | 1337 "punpcklbw %%xmm1,%%xmm0 \n" \ |
1341 "punpcklwd %%xmm0,%%xmm0 \n" \ | 1338 "punpcklwd %%xmm0,%%xmm0 \n" \ |
1342 "movq " MEMACCESS([y_buf]) ",%%xmm4 \n" \ | 1339 "movq " MEMACCESS([y_buf]) ",%%xmm4 \n" \ |
1343 <<<<<<< HEAD | |
1344 "punpcklbw %%xmm4,%%xmm4 \n" \ | 1340 "punpcklbw %%xmm4,%%xmm4 \n" \ |
1345 ======= | |
1346 >>>>>>> refs/remotes/origin/master | |
1347 "lea " MEMLEA(0x8, [y_buf]) ",%[y_buf] \n" | 1341 "lea " MEMLEA(0x8, [y_buf]) ",%[y_buf] \n" |
1348 | 1342 |
1349 // Read 2 UV from 411, upsample to 8 UV | 1343 // Read 2 UV from 411, upsample to 8 UV |
1350 #define READYUV411 \ | 1344 #define READYUV411 \ |
1351 "movd " MEMACCESS([u_buf]) ",%%xmm0 \n" \ | 1345 "movd " MEMACCESS([u_buf]) ",%%xmm0 \n" \ |
1352 MEMOPREG(movd, 0x00, [u_buf], [v_buf], 1, xmm1) \ | 1346 MEMOPREG(movd, 0x00, [u_buf], [v_buf], 1, xmm1) \ |
1353 "lea " MEMLEA(0x2, [u_buf]) ",%[u_buf] \n" \ | 1347 "lea " MEMLEA(0x2, [u_buf]) ",%[u_buf] \n" \ |
1354 "punpcklbw %%xmm1,%%xmm0 \n" \ | 1348 "punpcklbw %%xmm1,%%xmm0 \n" \ |
1355 "punpcklwd %%xmm0,%%xmm0 \n" \ | 1349 "punpcklwd %%xmm0,%%xmm0 \n" \ |
1356 "punpckldq %%xmm0,%%xmm0 \n" \ | 1350 "punpckldq %%xmm0,%%xmm0 \n" \ |
1357 "movq " MEMACCESS([y_buf]) ",%%xmm4 \n" \ | 1351 "movq " MEMACCESS([y_buf]) ",%%xmm4 \n" \ |
1358 <<<<<<< HEAD | |
1359 "punpcklbw %%xmm4,%%xmm4 \n" \ | 1352 "punpcklbw %%xmm4,%%xmm4 \n" \ |
1360 ======= | |
1361 >>>>>>> refs/remotes/origin/master | |
1362 "lea " MEMLEA(0x8, [y_buf]) ",%[y_buf] \n" | 1353 "lea " MEMLEA(0x8, [y_buf]) ",%[y_buf] \n" |
1363 | 1354 |
1364 // Read 4 UV from NV12, upsample to 8 UV | 1355 // Read 4 UV from NV12, upsample to 8 UV |
1365 #define READNV12 \ | 1356 #define READNV12 \ |
1366 "movq " MEMACCESS([uv_buf]) ",%%xmm0 \n" \ | 1357 "movq " MEMACCESS([uv_buf]) ",%%xmm0 \n" \ |
1367 "lea " MEMLEA(0x8, [uv_buf]) ",%[uv_buf] \n" \ | 1358 "lea " MEMLEA(0x8, [uv_buf]) ",%[uv_buf] \n" \ |
1368 "punpcklwd %%xmm0,%%xmm0 \n" \ | 1359 "punpcklwd %%xmm0,%%xmm0 \n" \ |
1369 "movq " MEMACCESS([y_buf]) ",%%xmm4 \n" \ | 1360 "movq " MEMACCESS([y_buf]) ",%%xmm4 \n" \ |
1370 <<<<<<< HEAD | |
1371 "punpcklbw %%xmm4,%%xmm4 \n" \ | 1361 "punpcklbw %%xmm4,%%xmm4 \n" \ |
1372 "lea " MEMLEA(0x8, [y_buf]) ",%[y_buf] \n" | 1362 "lea " MEMLEA(0x8, [y_buf]) ",%[y_buf] \n" |
1373 | 1363 |
1374 // YUY2 shuf 8 Y to 16 Y. | 1364 // YUY2 shuf 8 Y to 16 Y. |
1375 static const vec8 kShuffleYUY2Y = { | 1365 static const vec8 kShuffleYUY2Y = { |
1376 0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14 | 1366 0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14 |
1377 }; | 1367 }; |
1378 | 1368 |
1379 // YUY2 shuf 4 UV to 8 UV. | 1369 // YUY2 shuf 4 UV to 8 UV. |
1380 static const vec8 kShuffleYUY2UV = { | 1370 static const vec8 kShuffleYUY2UV = { |
(...skipping 18 matching lines...) Expand all Loading... |
1399 0, 2, 0, 2, 4, 6, 4, 6, 8, 10, 8, 10, 12, 14, 12, 14 | 1389 0, 2, 0, 2, 4, 6, 4, 6, 8, 10, 8, 10, 12, 14, 12, 14 |
1400 }; | 1390 }; |
1401 | 1391 |
1402 // Read 4 UYVY with 8 Y and update 4 UV to 8 UV. | 1392 // Read 4 UYVY with 8 Y and update 4 UV to 8 UV. |
1403 #define READUYVY \ | 1393 #define READUYVY \ |
1404 "movdqu " MEMACCESS([uyvy_buf]) ",%%xmm4 \n" \ | 1394 "movdqu " MEMACCESS([uyvy_buf]) ",%%xmm4 \n" \ |
1405 "pshufb %[kShuffleUYVYY], %%xmm4 \n" \ | 1395 "pshufb %[kShuffleUYVYY], %%xmm4 \n" \ |
1406 "movdqu " MEMACCESS([uyvy_buf]) ",%%xmm0 \n" \ | 1396 "movdqu " MEMACCESS([uyvy_buf]) ",%%xmm0 \n" \ |
1407 "pshufb %[kShuffleUYVYUV], %%xmm0 \n" \ | 1397 "pshufb %[kShuffleUYVYUV], %%xmm0 \n" \ |
1408 "lea " MEMLEA(0x10, [uyvy_buf]) ",%[uyvy_buf] \n" | 1398 "lea " MEMLEA(0x10, [uyvy_buf]) ",%[uyvy_buf] \n" |
1409 ======= | |
1410 "lea " MEMLEA(0x8, [y_buf]) ",%[y_buf] \n" | |
1411 >>>>>>> refs/remotes/origin/master | |
1412 | 1399 |
1413 // Convert 8 pixels: 8 UV and 8 Y | 1400 // Convert 8 pixels: 8 UV and 8 Y |
1414 #define YUVTORGB(yuvconstants) \ | 1401 #define YUVTORGB(yuvconstants) \ |
1415 "movdqa %%xmm0,%%xmm1 \n" \ | 1402 "movdqa %%xmm0,%%xmm1 \n" \ |
1416 "movdqa %%xmm0,%%xmm2 \n" \ | 1403 "movdqa %%xmm0,%%xmm2 \n" \ |
1417 "movdqa %%xmm0,%%xmm3 \n" \ | 1404 "movdqa %%xmm0,%%xmm3 \n" \ |
1418 "movdqa " MEMACCESS2(96, [yuvconstants]) ",%%xmm0 \n" \ | 1405 "movdqa " MEMACCESS2(96, [yuvconstants]) ",%%xmm0 \n" \ |
1419 "pmaddubsw " MEMACCESS([yuvconstants]) ",%%xmm1 \n" \ | 1406 "pmaddubsw " MEMACCESS([yuvconstants]) ",%%xmm1 \n" \ |
1420 "psubw %%xmm1,%%xmm0 \n" \ | 1407 "psubw %%xmm1,%%xmm0 \n" \ |
1421 "movdqa " MEMACCESS2(128, [yuvconstants]) ",%%xmm1 \n" \ | 1408 "movdqa " MEMACCESS2(128, [yuvconstants]) ",%%xmm1 \n" \ |
1422 "pmaddubsw " MEMACCESS2(32, [yuvconstants]) ",%%xmm2 \n" \ | 1409 "pmaddubsw " MEMACCESS2(32, [yuvconstants]) ",%%xmm2 \n" \ |
1423 "psubw %%xmm2,%%xmm1 \n" \ | 1410 "psubw %%xmm2,%%xmm1 \n" \ |
1424 "movdqa " MEMACCESS2(160, [yuvconstants]) ",%%xmm2 \n" \ | 1411 "movdqa " MEMACCESS2(160, [yuvconstants]) ",%%xmm2 \n" \ |
1425 "pmaddubsw " MEMACCESS2(64, [yuvconstants]) ",%%xmm3 \n" \ | 1412 "pmaddubsw " MEMACCESS2(64, [yuvconstants]) ",%%xmm3 \n" \ |
1426 "psubw %%xmm3,%%xmm2 \n" \ | 1413 "psubw %%xmm3,%%xmm2 \n" \ |
1427 <<<<<<< HEAD | |
1428 ======= | |
1429 "punpcklbw %%xmm4,%%xmm4 \n" \ | |
1430 >>>>>>> refs/remotes/origin/master | |
1431 "pmulhuw " MEMACCESS2(192, [yuvconstants]) ",%%xmm4 \n" \ | 1414 "pmulhuw " MEMACCESS2(192, [yuvconstants]) ",%%xmm4 \n" \ |
1432 "paddsw %%xmm4,%%xmm0 \n" \ | 1415 "paddsw %%xmm4,%%xmm0 \n" \ |
1433 "paddsw %%xmm4,%%xmm1 \n" \ | 1416 "paddsw %%xmm4,%%xmm1 \n" \ |
1434 "paddsw %%xmm4,%%xmm2 \n" \ | 1417 "paddsw %%xmm4,%%xmm2 \n" \ |
1435 "psraw $0x6,%%xmm0 \n" \ | 1418 "psraw $0x6,%%xmm0 \n" \ |
1436 "psraw $0x6,%%xmm1 \n" \ | 1419 "psraw $0x6,%%xmm1 \n" \ |
1437 "psraw $0x6,%%xmm2 \n" \ | 1420 "psraw $0x6,%%xmm2 \n" \ |
1438 "packuswb %%xmm0,%%xmm0 \n" \ | 1421 "packuswb %%xmm0,%%xmm0 \n" \ |
1439 "packuswb %%xmm1,%%xmm1 \n" \ | 1422 "packuswb %%xmm1,%%xmm1 \n" \ |
1440 "packuswb %%xmm2,%%xmm2 \n" | 1423 "packuswb %%xmm2,%%xmm2 \n" |
(...skipping 3870 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
5311 ); | 5294 ); |
5312 } | 5295 } |
5313 #endif // HAS_ARGBLUMACOLORTABLEROW_SSSE3 | 5296 #endif // HAS_ARGBLUMACOLORTABLEROW_SSSE3 |
5314 | 5297 |
5315 #endif // defined(__x86_64__) || defined(__i386__) | 5298 #endif // defined(__x86_64__) || defined(__i386__) |
5316 | 5299 |
5317 #ifdef __cplusplus | 5300 #ifdef __cplusplus |
5318 } // extern "C" | 5301 } // extern "C" |
5319 } // namespace libyuv | 5302 } // namespace libyuv |
5320 #endif | 5303 #endif |
OLD | NEW |