| OLD | NEW |
| 1 // VERSION 2 | 1 // VERSION 2 |
| 2 /* | 2 /* |
| 3 * Copyright 2011 The LibYuv Project Authors. All rights reserved. | 3 * Copyright 2011 The LibYuv Project Authors. All rights reserved. |
| 4 * | 4 * |
| 5 * Use of this source code is governed by a BSD-style license | 5 * Use of this source code is governed by a BSD-style license |
| 6 * that can be found in the LICENSE file in the root of the source | 6 * that can be found in the LICENSE file in the root of the source |
| 7 * tree. An additional intellectual property rights grant can be found | 7 * tree. An additional intellectual property rights grant can be found |
| 8 * in the file PATENTS. All contributing project authors may | 8 * in the file PATENTS. All contributing project authors may |
| 9 * be found in the AUTHORS file in the root of the source tree. | 9 * be found in the AUTHORS file in the root of the source tree. |
| 10 */ | 10 */ |
| (...skipping 1308 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1319 | 1319 |
| 1320 #if defined(HAS_I422TOARGBROW_SSSE3) || defined(HAS_I422TOARGBROW_AVX2) | 1320 #if defined(HAS_I422TOARGBROW_SSSE3) || defined(HAS_I422TOARGBROW_AVX2) |
| 1321 | 1321 |
| 1322 // Read 8 UV from 411 | 1322 // Read 8 UV from 411 |
| 1323 #define READYUV444 \ | 1323 #define READYUV444 \ |
| 1324 "movq " MEMACCESS([u_buf]) ",%%xmm0 \n" \ | 1324 "movq " MEMACCESS([u_buf]) ",%%xmm0 \n" \ |
| 1325 MEMOPREG(movq, 0x00, [u_buf], [v_buf], 1, xmm1) \ | 1325 MEMOPREG(movq, 0x00, [u_buf], [v_buf], 1, xmm1) \ |
| 1326 "lea " MEMLEA(0x8, [u_buf]) ",%[u_buf] \n" \ | 1326 "lea " MEMLEA(0x8, [u_buf]) ",%[u_buf] \n" \ |
| 1327 "punpcklbw %%xmm1,%%xmm0 \n" \ | 1327 "punpcklbw %%xmm1,%%xmm0 \n" \ |
| 1328 "movq " MEMACCESS([y_buf]) ",%%xmm4 \n" \ | 1328 "movq " MEMACCESS([y_buf]) ",%%xmm4 \n" \ |
| 1329 <<<<<<< HEAD | |
| 1330 "punpcklbw %%xmm4,%%xmm4 \n" \ | 1329 "punpcklbw %%xmm4,%%xmm4 \n" \ |
| 1331 ======= | |
| 1332 >>>>>>> refs/remotes/origin/master | |
| 1333 "lea " MEMLEA(0x8, [y_buf]) ",%[y_buf] \n" | 1330 "lea " MEMLEA(0x8, [y_buf]) ",%[y_buf] \n" |
| 1334 | 1331 |
| 1335 // Read 4 UV from 422, upsample to 8 UV | 1332 // Read 4 UV from 422, upsample to 8 UV |
| 1336 #define READYUV422 \ | 1333 #define READYUV422 \ |
| 1337 "movd " MEMACCESS([u_buf]) ",%%xmm0 \n" \ | 1334 "movd " MEMACCESS([u_buf]) ",%%xmm0 \n" \ |
| 1338 MEMOPREG(movd, 0x00, [u_buf], [v_buf], 1, xmm1) \ | 1335 MEMOPREG(movd, 0x00, [u_buf], [v_buf], 1, xmm1) \ |
| 1339 "lea " MEMLEA(0x4, [u_buf]) ",%[u_buf] \n" \ | 1336 "lea " MEMLEA(0x4, [u_buf]) ",%[u_buf] \n" \ |
| 1340 "punpcklbw %%xmm1,%%xmm0 \n" \ | 1337 "punpcklbw %%xmm1,%%xmm0 \n" \ |
| 1341 "punpcklwd %%xmm0,%%xmm0 \n" \ | 1338 "punpcklwd %%xmm0,%%xmm0 \n" \ |
| 1342 "movq " MEMACCESS([y_buf]) ",%%xmm4 \n" \ | 1339 "movq " MEMACCESS([y_buf]) ",%%xmm4 \n" \ |
| 1343 <<<<<<< HEAD | |
| 1344 "punpcklbw %%xmm4,%%xmm4 \n" \ | 1340 "punpcklbw %%xmm4,%%xmm4 \n" \ |
| 1345 ======= | |
| 1346 >>>>>>> refs/remotes/origin/master | |
| 1347 "lea " MEMLEA(0x8, [y_buf]) ",%[y_buf] \n" | 1341 "lea " MEMLEA(0x8, [y_buf]) ",%[y_buf] \n" |
| 1348 | 1342 |
| 1349 // Read 2 UV from 411, upsample to 8 UV | 1343 // Read 2 UV from 411, upsample to 8 UV |
| 1350 #define READYUV411 \ | 1344 #define READYUV411 \ |
| 1351 "movd " MEMACCESS([u_buf]) ",%%xmm0 \n" \ | 1345 "movd " MEMACCESS([u_buf]) ",%%xmm0 \n" \ |
| 1352 MEMOPREG(movd, 0x00, [u_buf], [v_buf], 1, xmm1) \ | 1346 MEMOPREG(movd, 0x00, [u_buf], [v_buf], 1, xmm1) \ |
| 1353 "lea " MEMLEA(0x2, [u_buf]) ",%[u_buf] \n" \ | 1347 "lea " MEMLEA(0x2, [u_buf]) ",%[u_buf] \n" \ |
| 1354 "punpcklbw %%xmm1,%%xmm0 \n" \ | 1348 "punpcklbw %%xmm1,%%xmm0 \n" \ |
| 1355 "punpcklwd %%xmm0,%%xmm0 \n" \ | 1349 "punpcklwd %%xmm0,%%xmm0 \n" \ |
| 1356 "punpckldq %%xmm0,%%xmm0 \n" \ | 1350 "punpckldq %%xmm0,%%xmm0 \n" \ |
| 1357 "movq " MEMACCESS([y_buf]) ",%%xmm4 \n" \ | 1351 "movq " MEMACCESS([y_buf]) ",%%xmm4 \n" \ |
| 1358 <<<<<<< HEAD | |
| 1359 "punpcklbw %%xmm4,%%xmm4 \n" \ | 1352 "punpcklbw %%xmm4,%%xmm4 \n" \ |
| 1360 ======= | |
| 1361 >>>>>>> refs/remotes/origin/master | |
| 1362 "lea " MEMLEA(0x8, [y_buf]) ",%[y_buf] \n" | 1353 "lea " MEMLEA(0x8, [y_buf]) ",%[y_buf] \n" |
| 1363 | 1354 |
| 1364 // Read 4 UV from NV12, upsample to 8 UV | 1355 // Read 4 UV from NV12, upsample to 8 UV |
| 1365 #define READNV12 \ | 1356 #define READNV12 \ |
| 1366 "movq " MEMACCESS([uv_buf]) ",%%xmm0 \n" \ | 1357 "movq " MEMACCESS([uv_buf]) ",%%xmm0 \n" \ |
| 1367 "lea " MEMLEA(0x8, [uv_buf]) ",%[uv_buf] \n" \ | 1358 "lea " MEMLEA(0x8, [uv_buf]) ",%[uv_buf] \n" \ |
| 1368 "punpcklwd %%xmm0,%%xmm0 \n" \ | 1359 "punpcklwd %%xmm0,%%xmm0 \n" \ |
| 1369 "movq " MEMACCESS([y_buf]) ",%%xmm4 \n" \ | 1360 "movq " MEMACCESS([y_buf]) ",%%xmm4 \n" \ |
| 1370 <<<<<<< HEAD | |
| 1371 "punpcklbw %%xmm4,%%xmm4 \n" \ | 1361 "punpcklbw %%xmm4,%%xmm4 \n" \ |
| 1372 "lea " MEMLEA(0x8, [y_buf]) ",%[y_buf] \n" | 1362 "lea " MEMLEA(0x8, [y_buf]) ",%[y_buf] \n" |
| 1373 | 1363 |
| 1374 // YUY2 shuf 8 Y to 16 Y. | 1364 // YUY2 shuf 8 Y to 16 Y. |
| 1375 static const vec8 kShuffleYUY2Y = { | 1365 static const vec8 kShuffleYUY2Y = { |
| 1376 0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14 | 1366 0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14 |
| 1377 }; | 1367 }; |
| 1378 | 1368 |
| 1379 // YUY2 shuf 4 UV to 8 UV. | 1369 // YUY2 shuf 4 UV to 8 UV. |
| 1380 static const vec8 kShuffleYUY2UV = { | 1370 static const vec8 kShuffleYUY2UV = { |
| (...skipping 18 matching lines...) Expand all Loading... |
| 1399 0, 2, 0, 2, 4, 6, 4, 6, 8, 10, 8, 10, 12, 14, 12, 14 | 1389 0, 2, 0, 2, 4, 6, 4, 6, 8, 10, 8, 10, 12, 14, 12, 14 |
| 1400 }; | 1390 }; |
| 1401 | 1391 |
| 1402 // Read 4 UYVY with 8 Y and update 4 UV to 8 UV. | 1392 // Read 4 UYVY with 8 Y and update 4 UV to 8 UV. |
| 1403 #define READUYVY \ | 1393 #define READUYVY \ |
| 1404 "movdqu " MEMACCESS([uyvy_buf]) ",%%xmm4 \n" \ | 1394 "movdqu " MEMACCESS([uyvy_buf]) ",%%xmm4 \n" \ |
| 1405 "pshufb %[kShuffleUYVYY], %%xmm4 \n" \ | 1395 "pshufb %[kShuffleUYVYY], %%xmm4 \n" \ |
| 1406 "movdqu " MEMACCESS([uyvy_buf]) ",%%xmm0 \n" \ | 1396 "movdqu " MEMACCESS([uyvy_buf]) ",%%xmm0 \n" \ |
| 1407 "pshufb %[kShuffleUYVYUV], %%xmm0 \n" \ | 1397 "pshufb %[kShuffleUYVYUV], %%xmm0 \n" \ |
| 1408 "lea " MEMLEA(0x10, [uyvy_buf]) ",%[uyvy_buf] \n" | 1398 "lea " MEMLEA(0x10, [uyvy_buf]) ",%[uyvy_buf] \n" |
| 1409 ======= | |
| 1410 "lea " MEMLEA(0x8, [y_buf]) ",%[y_buf] \n" | |
| 1411 >>>>>>> refs/remotes/origin/master | |
| 1412 | 1399 |
| 1413 // Convert 8 pixels: 8 UV and 8 Y | 1400 // Convert 8 pixels: 8 UV and 8 Y |
| 1414 #define YUVTORGB(yuvconstants) \ | 1401 #define YUVTORGB(yuvconstants) \ |
| 1415 "movdqa %%xmm0,%%xmm1 \n" \ | 1402 "movdqa %%xmm0,%%xmm1 \n" \ |
| 1416 "movdqa %%xmm0,%%xmm2 \n" \ | 1403 "movdqa %%xmm0,%%xmm2 \n" \ |
| 1417 "movdqa %%xmm0,%%xmm3 \n" \ | 1404 "movdqa %%xmm0,%%xmm3 \n" \ |
| 1418 "movdqa " MEMACCESS2(96, [yuvconstants]) ",%%xmm0 \n" \ | 1405 "movdqa " MEMACCESS2(96, [yuvconstants]) ",%%xmm0 \n" \ |
| 1419 "pmaddubsw " MEMACCESS([yuvconstants]) ",%%xmm1 \n" \ | 1406 "pmaddubsw " MEMACCESS([yuvconstants]) ",%%xmm1 \n" \ |
| 1420 "psubw %%xmm1,%%xmm0 \n" \ | 1407 "psubw %%xmm1,%%xmm0 \n" \ |
| 1421 "movdqa " MEMACCESS2(128, [yuvconstants]) ",%%xmm1 \n" \ | 1408 "movdqa " MEMACCESS2(128, [yuvconstants]) ",%%xmm1 \n" \ |
| 1422 "pmaddubsw " MEMACCESS2(32, [yuvconstants]) ",%%xmm2 \n" \ | 1409 "pmaddubsw " MEMACCESS2(32, [yuvconstants]) ",%%xmm2 \n" \ |
| 1423 "psubw %%xmm2,%%xmm1 \n" \ | 1410 "psubw %%xmm2,%%xmm1 \n" \ |
| 1424 "movdqa " MEMACCESS2(160, [yuvconstants]) ",%%xmm2 \n" \ | 1411 "movdqa " MEMACCESS2(160, [yuvconstants]) ",%%xmm2 \n" \ |
| 1425 "pmaddubsw " MEMACCESS2(64, [yuvconstants]) ",%%xmm3 \n" \ | 1412 "pmaddubsw " MEMACCESS2(64, [yuvconstants]) ",%%xmm3 \n" \ |
| 1426 "psubw %%xmm3,%%xmm2 \n" \ | 1413 "psubw %%xmm3,%%xmm2 \n" \ |
| 1427 <<<<<<< HEAD | |
| 1428 ======= | |
| 1429 "punpcklbw %%xmm4,%%xmm4 \n" \ | |
| 1430 >>>>>>> refs/remotes/origin/master | |
| 1431 "pmulhuw " MEMACCESS2(192, [yuvconstants]) ",%%xmm4 \n" \ | 1414 "pmulhuw " MEMACCESS2(192, [yuvconstants]) ",%%xmm4 \n" \ |
| 1432 "paddsw %%xmm4,%%xmm0 \n" \ | 1415 "paddsw %%xmm4,%%xmm0 \n" \ |
| 1433 "paddsw %%xmm4,%%xmm1 \n" \ | 1416 "paddsw %%xmm4,%%xmm1 \n" \ |
| 1434 "paddsw %%xmm4,%%xmm2 \n" \ | 1417 "paddsw %%xmm4,%%xmm2 \n" \ |
| 1435 "psraw $0x6,%%xmm0 \n" \ | 1418 "psraw $0x6,%%xmm0 \n" \ |
| 1436 "psraw $0x6,%%xmm1 \n" \ | 1419 "psraw $0x6,%%xmm1 \n" \ |
| 1437 "psraw $0x6,%%xmm2 \n" \ | 1420 "psraw $0x6,%%xmm2 \n" \ |
| 1438 "packuswb %%xmm0,%%xmm0 \n" \ | 1421 "packuswb %%xmm0,%%xmm0 \n" \ |
| 1439 "packuswb %%xmm1,%%xmm1 \n" \ | 1422 "packuswb %%xmm1,%%xmm1 \n" \ |
| 1440 "packuswb %%xmm2,%%xmm2 \n" | 1423 "packuswb %%xmm2,%%xmm2 \n" |
| (...skipping 3870 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 5311 ); | 5294 ); |
| 5312 } | 5295 } |
| 5313 #endif // HAS_ARGBLUMACOLORTABLEROW_SSSE3 | 5296 #endif // HAS_ARGBLUMACOLORTABLEROW_SSSE3 |
| 5314 | 5297 |
| 5315 #endif // defined(__x86_64__) || defined(__i386__) | 5298 #endif // defined(__x86_64__) || defined(__i386__) |
| 5316 | 5299 |
| 5317 #ifdef __cplusplus | 5300 #ifdef __cplusplus |
| 5318 } // extern "C" | 5301 } // extern "C" |
| 5319 } // namespace libyuv | 5302 } // namespace libyuv |
| 5320 #endif | 5303 #endif |
| OLD | NEW |