| OLD | NEW |
| 1 // VERSION 2 | 1 // VERSION 2 |
| 2 /* | 2 /* |
| 3 * Copyright 2011 The LibYuv Project Authors. All rights reserved. | 3 * Copyright 2011 The LibYuv Project Authors. All rights reserved. |
| 4 * | 4 * |
| 5 * Use of this source code is governed by a BSD-style license | 5 * Use of this source code is governed by a BSD-style license |
| 6 * that can be found in the LICENSE file in the root of the source | 6 * that can be found in the LICENSE file in the root of the source |
| 7 * tree. An additional intellectual property rights grant can be found | 7 * tree. An additional intellectual property rights grant can be found |
| 8 * in the file PATENTS. All contributing project authors may | 8 * in the file PATENTS. All contributing project authors may |
| 9 * be found in the AUTHORS file in the root of the source tree. | 9 * be found in the AUTHORS file in the root of the source tree. |
| 10 */ | 10 */ |
| (...skipping 1306 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1317 ); | 1317 ); |
| 1318 } | 1318 } |
| 1319 | 1319 |
| 1320 #if defined(HAS_I422TOARGBROW_SSSE3) || defined(HAS_I422TOARGBROW_AVX2) | 1320 #if defined(HAS_I422TOARGBROW_SSSE3) || defined(HAS_I422TOARGBROW_AVX2) |
| 1321 | 1321 |
| 1322 // Read 8 UV from 411 | 1322 // Read 8 UV from 411 |
| 1323 #define READYUV444 \ | 1323 #define READYUV444 \ |
| 1324 "movq " MEMACCESS([u_buf]) ",%%xmm0 \n" \ | 1324 "movq " MEMACCESS([u_buf]) ",%%xmm0 \n" \ |
| 1325 MEMOPREG(movq, 0x00, [u_buf], [v_buf], 1, xmm1) \ | 1325 MEMOPREG(movq, 0x00, [u_buf], [v_buf], 1, xmm1) \ |
| 1326 "lea " MEMLEA(0x8, [u_buf]) ",%[u_buf] \n" \ | 1326 "lea " MEMLEA(0x8, [u_buf]) ",%[u_buf] \n" \ |
| 1327 "punpcklbw %%xmm1,%%xmm0 \n" | 1327 "punpcklbw %%xmm1,%%xmm0 \n" \ |
| 1328 "movq " MEMACCESS([y_buf]) ",%%xmm4 \n" \ |
| 1329 "punpcklbw %%xmm4,%%xmm4 \n" \ |
| 1330 "lea " MEMLEA(0x8, [y_buf]) ",%[y_buf] \n" |
| 1328 | 1331 |
| 1329 // Read 4 UV from 422, upsample to 8 UV | 1332 // Read 4 UV from 422, upsample to 8 UV |
| 1330 #define READYUV422 \ | 1333 #define READYUV422 \ |
| 1331 "movd " MEMACCESS([u_buf]) ",%%xmm0 \n" \ | 1334 "movd " MEMACCESS([u_buf]) ",%%xmm0 \n" \ |
| 1332 MEMOPREG(movd, 0x00, [u_buf], [v_buf], 1, xmm1) \ | 1335 MEMOPREG(movd, 0x00, [u_buf], [v_buf], 1, xmm1) \ |
| 1333 "lea " MEMLEA(0x4, [u_buf]) ",%[u_buf] \n" \ | 1336 "lea " MEMLEA(0x4, [u_buf]) ",%[u_buf] \n" \ |
| 1334 "punpcklbw %%xmm1,%%xmm0 \n" \ | 1337 "punpcklbw %%xmm1,%%xmm0 \n" \ |
| 1335 "punpcklwd %%xmm0,%%xmm0 \n" | 1338 "punpcklwd %%xmm0,%%xmm0 \n" \ |
| 1339 "movq " MEMACCESS([y_buf]) ",%%xmm4 \n" \ |
| 1340 "punpcklbw %%xmm4,%%xmm4 \n" \ |
| 1341 "lea " MEMLEA(0x8, [y_buf]) ",%[y_buf] \n" |
| 1336 | 1342 |
| 1337 // Read 2 UV from 411, upsample to 8 UV | 1343 // Read 2 UV from 411, upsample to 8 UV |
| 1338 #define READYUV411 \ | 1344 #define READYUV411 \ |
| 1339 "movd " MEMACCESS([u_buf]) ",%%xmm0 \n" \ | 1345 "movd " MEMACCESS([u_buf]) ",%%xmm0 \n" \ |
| 1340 MEMOPREG(movd, 0x00, [u_buf], [v_buf], 1, xmm1) \ | 1346 MEMOPREG(movd, 0x00, [u_buf], [v_buf], 1, xmm1) \ |
| 1341 "lea " MEMLEA(0x2, [u_buf]) ",%[u_buf] \n" \ | 1347 "lea " MEMLEA(0x2, [u_buf]) ",%[u_buf] \n" \ |
| 1342 "punpcklbw %%xmm1,%%xmm0 \n" \ | 1348 "punpcklbw %%xmm1,%%xmm0 \n" \ |
| 1343 "punpcklwd %%xmm0,%%xmm0 \n" \ | 1349 "punpcklwd %%xmm0,%%xmm0 \n" \ |
| 1344 "punpckldq %%xmm0,%%xmm0 \n" | 1350 "punpckldq %%xmm0,%%xmm0 \n" \ |
| 1351 "movq " MEMACCESS([y_buf]) ",%%xmm4 \n" \ |
| 1352 "punpcklbw %%xmm4,%%xmm4 \n" \ |
| 1353 "lea " MEMLEA(0x8, [y_buf]) ",%[y_buf] \n" |
| 1345 | 1354 |
| 1346 // Read 4 UV from NV12, upsample to 8 UV | 1355 // Read 4 UV from NV12, upsample to 8 UV |
| 1347 #define READNV12 \ | 1356 #define READNV12 \ |
| 1348 "movq " MEMACCESS([uv_buf]) ",%%xmm0 \n" \ | 1357 "movq " MEMACCESS([uv_buf]) ",%%xmm0 \n" \ |
| 1349 "lea " MEMLEA(0x8, [uv_buf]) ",%[uv_buf] \n" \ | 1358 "lea " MEMLEA(0x8, [uv_buf]) ",%[uv_buf] \n" \ |
| 1350 "punpcklwd %%xmm0,%%xmm0 \n" | 1359 "punpcklwd %%xmm0,%%xmm0 \n" \ |
| 1360 "movq " MEMACCESS([y_buf]) ",%%xmm4 \n" \ |
| 1361 "punpcklbw %%xmm4,%%xmm4 \n" \ |
| 1362 "lea " MEMLEA(0x8, [y_buf]) ",%[y_buf] \n" |
| 1363 |
| 1364 // YUY2 shuf 8 Y to 16 Y. |
| 1365 static const vec8 kShuffleYUY2Y = { |
| 1366 0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14 |
| 1367 }; |
| 1368 |
| 1369 // YUY2 shuf 4 UV to 8 UV. |
| 1370 static const vec8 kShuffleYUY2UV = { |
| 1371 1, 3, 1, 3, 5, 7, 5, 7, 9, 11, 9, 11, 13, 15, 13, 15 |
| 1372 }; |
| 1373 |
| 1374 // Read 4 YUY2 with 8 Y and update 4 UV to 8 UV. |
| 1375 #define READYUY2 \ |
| 1376 "movdqu " MEMACCESS([yuy2_buf]) ",%%xmm4 \n" \ |
| 1377 "pshufb %[kShuffleYUY2Y], %%xmm4 \n" \ |
| 1378 "movdqu " MEMACCESS([yuy2_buf]) ",%%xmm0 \n" \ |
| 1379 "pshufb %[kShuffleYUY2UV], %%xmm0 \n" \ |
| 1380 "lea " MEMLEA(0x10, [yuy2_buf]) ",%[yuy2_buf] \n" |
| 1381 |
| 1382 // UYVY shuf 8 Y to 16 Y. |
| 1383 static const vec8 kShuffleUYVYY = { |
| 1384 1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15 |
| 1385 }; |
| 1386 |
| 1387 // UYVY shuf 4 UV to 8 UV. |
| 1388 static const vec8 kShuffleUYVYUV = { |
| 1389 0, 2, 0, 2, 4, 6, 4, 6, 8, 10, 8, 10, 12, 14, 12, 14 |
| 1390 }; |
| 1391 |
| 1392 // Read 4 UYVY with 8 Y and update 4 UV to 8 UV. |
| 1393 #define READUYVY \ |
| 1394 "movdqu " MEMACCESS([uyvy_buf]) ",%%xmm4 \n" \ |
| 1395 "pshufb %[kShuffleUYVYY], %%xmm4 \n" \ |
| 1396 "movdqu " MEMACCESS([uyvy_buf]) ",%%xmm0 \n" \ |
| 1397 "pshufb %[kShuffleUYVYUV], %%xmm0 \n" \ |
| 1398 "lea " MEMLEA(0x10, [uyvy_buf]) ",%[uyvy_buf] \n" |
| 1351 | 1399 |
| 1352 // Convert 8 pixels: 8 UV and 8 Y | 1400 // Convert 8 pixels: 8 UV and 8 Y |
| 1353 #define YUVTORGB(yuvconstants) \ | 1401 #define YUVTORGB(yuvconstants) \ |
| 1354 "movdqa %%xmm0,%%xmm1 \n" \ | 1402 "movdqa %%xmm0,%%xmm1 \n" \ |
| 1355 "movdqa %%xmm0,%%xmm2 \n" \ | 1403 "movdqa %%xmm0,%%xmm2 \n" \ |
| 1356 "movdqa %%xmm0,%%xmm3 \n" \ | 1404 "movdqa %%xmm0,%%xmm3 \n" \ |
| 1357 "movdqa " MEMACCESS2(96, [yuvconstants]) ",%%xmm0 \n" \ | 1405 "movdqa " MEMACCESS2(96, [yuvconstants]) ",%%xmm0 \n" \ |
| 1358 "pmaddubsw " MEMACCESS([yuvconstants]) ",%%xmm1 \n" \ | 1406 "pmaddubsw " MEMACCESS([yuvconstants]) ",%%xmm1 \n" \ |
| 1359 "psubw %%xmm1,%%xmm0 \n" \ | 1407 "psubw %%xmm1,%%xmm0 \n" \ |
| 1360 "movdqa " MEMACCESS2(128, [yuvconstants]) ",%%xmm1 \n" \ | 1408 "movdqa " MEMACCESS2(128, [yuvconstants]) ",%%xmm1 \n" \ |
| 1361 "pmaddubsw " MEMACCESS2(32, [yuvconstants]) ",%%xmm2 \n" \ | 1409 "pmaddubsw " MEMACCESS2(32, [yuvconstants]) ",%%xmm2 \n" \ |
| 1362 "psubw %%xmm2,%%xmm1 \n" \ | 1410 "psubw %%xmm2,%%xmm1 \n" \ |
| 1363 "movdqa " MEMACCESS2(160, [yuvconstants]) ",%%xmm2 \n" \ | 1411 "movdqa " MEMACCESS2(160, [yuvconstants]) ",%%xmm2 \n" \ |
| 1364 "pmaddubsw " MEMACCESS2(64, [yuvconstants]) ",%%xmm3 \n" \ | 1412 "pmaddubsw " MEMACCESS2(64, [yuvconstants]) ",%%xmm3 \n" \ |
| 1365 "psubw %%xmm3,%%xmm2 \n" \ | 1413 "psubw %%xmm3,%%xmm2 \n" \ |
| 1366 "movq " MEMACCESS([y_buf]) ",%%xmm3 \n" \ | 1414 "pmulhuw " MEMACCESS2(192, [yuvconstants]) ",%%xmm4 \n" \ |
| 1367 "lea " MEMLEA(0x8, [y_buf]) ",%[y_buf] \n" \ | 1415 "paddsw %%xmm4,%%xmm0 \n" \ |
| 1368 "punpcklbw %%xmm3,%%xmm3 \n" \ | 1416 "paddsw %%xmm4,%%xmm1 \n" \ |
| 1369 "pmulhuw " MEMACCESS2(192, [yuvconstants]) ",%%xmm3 \n" \ | 1417 "paddsw %%xmm4,%%xmm2 \n" \ |
| 1370 "paddsw %%xmm3,%%xmm0 \n" \ | |
| 1371 "paddsw %%xmm3,%%xmm1 \n" \ | |
| 1372 "paddsw %%xmm3,%%xmm2 \n" \ | |
| 1373 "psraw $0x6,%%xmm0 \n" \ | 1418 "psraw $0x6,%%xmm0 \n" \ |
| 1374 "psraw $0x6,%%xmm1 \n" \ | 1419 "psraw $0x6,%%xmm1 \n" \ |
| 1375 "psraw $0x6,%%xmm2 \n" \ | 1420 "psraw $0x6,%%xmm2 \n" \ |
| 1376 "packuswb %%xmm0,%%xmm0 \n" \ | 1421 "packuswb %%xmm0,%%xmm0 \n" \ |
| 1377 "packuswb %%xmm1,%%xmm1 \n" \ | 1422 "packuswb %%xmm1,%%xmm1 \n" \ |
| 1378 "packuswb %%xmm2,%%xmm2 \n" | 1423 "packuswb %%xmm2,%%xmm2 \n" |
| 1379 | 1424 |
| 1380 // Store 8 ARGB values. Assumes XMM5 is zero. | 1425 // Store 8 ARGB values. Assumes XMM5 is zero. |
| 1381 #define STOREARGB \ | 1426 #define STOREARGB \ |
| 1382 "punpcklbw %%xmm1,%%xmm0 \n" \ | 1427 "punpcklbw %%xmm1,%%xmm0 \n" \ |
| (...skipping 56 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1439 STOREARGB | 1484 STOREARGB |
| 1440 "sub $0x8,%[width] \n" | 1485 "sub $0x8,%[width] \n" |
| 1441 "jg 1b \n" | 1486 "jg 1b \n" |
| 1442 : [y_buf]"+r"(y_buf), // %[y_buf] | 1487 : [y_buf]"+r"(y_buf), // %[y_buf] |
| 1443 [u_buf]"+r"(u_buf), // %[u_buf] | 1488 [u_buf]"+r"(u_buf), // %[u_buf] |
| 1444 [v_buf]"+r"(v_buf), // %[v_buf] | 1489 [v_buf]"+r"(v_buf), // %[v_buf] |
| 1445 [dst_argb]"+r"(dst_argb), // %[dst_argb] | 1490 [dst_argb]"+r"(dst_argb), // %[dst_argb] |
| 1446 [width]"+rm"(width) // %[width] | 1491 [width]"+rm"(width) // %[width] |
| 1447 : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] | 1492 : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] |
| 1448 : "memory", "cc", NACL_R14 | 1493 : "memory", "cc", NACL_R14 |
| 1449 "xmm0", "xmm1", "xmm2", "xmm3", "xmm5" | 1494 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" |
| 1450 ); | 1495 ); |
| 1451 } | 1496 } |
| 1452 | 1497 |
| 1453 void OMITFP I444ToABGRRow_SSSE3(const uint8* y_buf, | 1498 void OMITFP I444ToABGRRow_SSSE3(const uint8* y_buf, |
| 1454 const uint8* u_buf, | 1499 const uint8* u_buf, |
| 1455 const uint8* v_buf, | 1500 const uint8* v_buf, |
| 1456 uint8* dst_abgr, | 1501 uint8* dst_abgr, |
| 1457 struct YuvConstants* yuvconstants, | 1502 struct YuvConstants* yuvconstants, |
| 1458 int width) { | 1503 int width) { |
| 1459 asm volatile ( | 1504 asm volatile ( |
| 1460 "sub %[u_buf],%[v_buf] \n" | 1505 "sub %[u_buf],%[v_buf] \n" |
| 1461 "pcmpeqb %%xmm5,%%xmm5 \n" | 1506 "pcmpeqb %%xmm5,%%xmm5 \n" |
| 1462 LABELALIGN | 1507 LABELALIGN |
| 1463 "1: \n" | 1508 "1: \n" |
| 1464 READYUV444 | 1509 READYUV444 |
| 1465 YUVTORGB(yuvconstants) | 1510 YUVTORGB(yuvconstants) |
| 1466 STOREABGR | 1511 STOREABGR |
| 1467 "sub $0x8,%[width] \n" | 1512 "sub $0x8,%[width] \n" |
| 1468 "jg 1b \n" | 1513 "jg 1b \n" |
| 1469 : [y_buf]"+r"(y_buf), // %[y_buf] | 1514 : [y_buf]"+r"(y_buf), // %[y_buf] |
| 1470 [u_buf]"+r"(u_buf), // %[u_buf] | 1515 [u_buf]"+r"(u_buf), // %[u_buf] |
| 1471 [v_buf]"+r"(v_buf), // %[v_buf] | 1516 [v_buf]"+r"(v_buf), // %[v_buf] |
| 1472 [dst_abgr]"+r"(dst_abgr), // %[dst_abgr] | 1517 [dst_abgr]"+r"(dst_abgr), // %[dst_abgr] |
| 1473 [width]"+rm"(width) // %[width] | 1518 [width]"+rm"(width) // %[width] |
| 1474 : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] | 1519 : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] |
| 1475 : "memory", "cc", NACL_R14 | 1520 : "memory", "cc", NACL_R14 |
| 1476 "xmm0", "xmm1", "xmm2", "xmm3", "xmm5" | 1521 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" |
| 1477 ); | 1522 ); |
| 1478 } | 1523 } |
| 1479 | 1524 |
| 1480 // TODO(fbarchard): Consider putting masks into constants. | 1525 // TODO(fbarchard): Consider putting masks into constants. |
| 1481 void OMITFP I422ToRGB24Row_SSSE3(const uint8* y_buf, | 1526 void OMITFP I422ToRGB24Row_SSSE3(const uint8* y_buf, |
| 1482 const uint8* u_buf, | 1527 const uint8* u_buf, |
| 1483 const uint8* v_buf, | 1528 const uint8* v_buf, |
| 1484 uint8* dst_rgb24, | 1529 uint8* dst_rgb24, |
| 1485 struct YuvConstants* yuvconstants, | 1530 struct YuvConstants* yuvconstants, |
| 1486 int width) { | 1531 int width) { |
| (...skipping 25 matching lines...) Expand all Loading... |
| 1512 // TODO(fbarchard): Make width a register for 32 bit. | 1557 // TODO(fbarchard): Make width a register for 32 bit. |
| 1513 #if defined(__i386__) && defined(__pic__) | 1558 #if defined(__i386__) && defined(__pic__) |
| 1514 [width]"+m"(width) // %[width] | 1559 [width]"+m"(width) // %[width] |
| 1515 #else | 1560 #else |
| 1516 [width]"+rm"(width) // %[width] | 1561 [width]"+rm"(width) // %[width] |
| 1517 #endif | 1562 #endif |
| 1518 : [yuvconstants]"r"(yuvconstants), // %[yuvconstants] | 1563 : [yuvconstants]"r"(yuvconstants), // %[yuvconstants] |
| 1519 [kShuffleMaskARGBToRGB24_0]"m"(kShuffleMaskARGBToRGB24_0), | 1564 [kShuffleMaskARGBToRGB24_0]"m"(kShuffleMaskARGBToRGB24_0), |
| 1520 [kShuffleMaskARGBToRGB24]"m"(kShuffleMaskARGBToRGB24) | 1565 [kShuffleMaskARGBToRGB24]"m"(kShuffleMaskARGBToRGB24) |
| 1521 : "memory", "cc", NACL_R14 | 1566 : "memory", "cc", NACL_R14 |
| 1522 "xmm0", "xmm1", "xmm2", "xmm3", "xmm5", "xmm6" | 1567 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6" |
| 1523 ); | 1568 ); |
| 1524 } | 1569 } |
| 1525 | 1570 |
| 1526 void OMITFP I422ToRAWRow_SSSE3(const uint8* y_buf, | 1571 void OMITFP I422ToRAWRow_SSSE3(const uint8* y_buf, |
| 1527 const uint8* u_buf, | 1572 const uint8* u_buf, |
| 1528 const uint8* v_buf, | 1573 const uint8* v_buf, |
| 1529 uint8* dst_raw, | 1574 uint8* dst_raw, |
| 1530 struct YuvConstants* yuvconstants, | 1575 struct YuvConstants* yuvconstants, |
| 1531 int width) { | 1576 int width) { |
| 1532 asm volatile ( | 1577 asm volatile ( |
| (...skipping 24 matching lines...) Expand all Loading... |
| 1557 // TODO(fbarchard): Make width a register for 32 bit. | 1602 // TODO(fbarchard): Make width a register for 32 bit. |
| 1558 #if defined(__i386__) && defined(__pic__) | 1603 #if defined(__i386__) && defined(__pic__) |
| 1559 [width]"+m"(width) // %[width] | 1604 [width]"+m"(width) // %[width] |
| 1560 #else | 1605 #else |
| 1561 [width]"+rm"(width) // %[width] | 1606 [width]"+rm"(width) // %[width] |
| 1562 #endif | 1607 #endif |
| 1563 : [yuvconstants]"r"(yuvconstants), // %[yuvconstants] | 1608 : [yuvconstants]"r"(yuvconstants), // %[yuvconstants] |
| 1564 [kShuffleMaskARGBToRAW_0]"m"(kShuffleMaskARGBToRAW_0), | 1609 [kShuffleMaskARGBToRAW_0]"m"(kShuffleMaskARGBToRAW_0), |
| 1565 [kShuffleMaskARGBToRAW]"m"(kShuffleMaskARGBToRAW) | 1610 [kShuffleMaskARGBToRAW]"m"(kShuffleMaskARGBToRAW) |
| 1566 : "memory", "cc", NACL_R14 | 1611 : "memory", "cc", NACL_R14 |
| 1567 "xmm0", "xmm1", "xmm2", "xmm3", "xmm5", "xmm6" | 1612 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6" |
| 1568 ); | 1613 ); |
| 1569 } | 1614 } |
| 1570 | 1615 |
| 1571 void OMITFP I422ToARGBRow_SSSE3(const uint8* y_buf, | 1616 void OMITFP I422ToARGBRow_SSSE3(const uint8* y_buf, |
| 1572 const uint8* u_buf, | 1617 const uint8* u_buf, |
| 1573 const uint8* v_buf, | 1618 const uint8* v_buf, |
| 1574 uint8* dst_argb, | 1619 uint8* dst_argb, |
| 1575 struct YuvConstants* yuvconstants, | 1620 struct YuvConstants* yuvconstants, |
| 1576 int width) { | 1621 int width) { |
| 1577 asm volatile ( | 1622 asm volatile ( |
| 1578 "sub %[u_buf],%[v_buf] \n" | 1623 "sub %[u_buf],%[v_buf] \n" |
| 1579 "pcmpeqb %%xmm5,%%xmm5 \n" | 1624 "pcmpeqb %%xmm5,%%xmm5 \n" |
| 1580 LABELALIGN | 1625 LABELALIGN |
| 1581 "1: \n" | 1626 "1: \n" |
| 1582 READYUV422 | 1627 READYUV422 |
| 1583 YUVTORGB(yuvconstants) | 1628 YUVTORGB(yuvconstants) |
| 1584 STOREARGB | 1629 STOREARGB |
| 1585 "sub $0x8,%[width] \n" | 1630 "sub $0x8,%[width] \n" |
| 1586 "jg 1b \n" | 1631 "jg 1b \n" |
| 1587 : [y_buf]"+r"(y_buf), // %[y_buf] | 1632 : [y_buf]"+r"(y_buf), // %[y_buf] |
| 1588 [u_buf]"+r"(u_buf), // %[u_buf] | 1633 [u_buf]"+r"(u_buf), // %[u_buf] |
| 1589 [v_buf]"+r"(v_buf), // %[v_buf] | 1634 [v_buf]"+r"(v_buf), // %[v_buf] |
| 1590 [dst_argb]"+r"(dst_argb), // %[dst_argb] | 1635 [dst_argb]"+r"(dst_argb), // %[dst_argb] |
| 1591 [width]"+rm"(width) // %[width] | 1636 [width]"+rm"(width) // %[width] |
| 1592 : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] | 1637 : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] |
| 1593 : "memory", "cc", NACL_R14 | 1638 : "memory", "cc", NACL_R14 |
| 1594 "xmm0", "xmm1", "xmm2", "xmm3", "xmm5" | 1639 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" |
| 1595 ); | 1640 ); |
| 1596 } | 1641 } |
| 1597 | 1642 |
| 1598 void OMITFP I411ToARGBRow_SSSE3(const uint8* y_buf, | 1643 void OMITFP I411ToARGBRow_SSSE3(const uint8* y_buf, |
| 1599 const uint8* u_buf, | 1644 const uint8* u_buf, |
| 1600 const uint8* v_buf, | 1645 const uint8* v_buf, |
| 1601 uint8* dst_argb, | 1646 uint8* dst_argb, |
| 1602 struct YuvConstants* yuvconstants, | 1647 struct YuvConstants* yuvconstants, |
| 1603 int width) { | 1648 int width) { |
| 1604 asm volatile ( | 1649 asm volatile ( |
| 1605 "sub %[u_buf],%[v_buf] \n" | 1650 "sub %[u_buf],%[v_buf] \n" |
| 1606 "pcmpeqb %%xmm5,%%xmm5 \n" | 1651 "pcmpeqb %%xmm5,%%xmm5 \n" |
| 1607 LABELALIGN | 1652 LABELALIGN |
| 1608 "1: \n" | 1653 "1: \n" |
| 1609 READYUV411 | 1654 READYUV411 |
| 1610 YUVTORGB(yuvconstants) | 1655 YUVTORGB(yuvconstants) |
| 1611 STOREARGB | 1656 STOREARGB |
| 1612 "sub $0x8,%[width] \n" | 1657 "sub $0x8,%[width] \n" |
| 1613 "jg 1b \n" | 1658 "jg 1b \n" |
| 1614 : [y_buf]"+r"(y_buf), // %[y_buf] | 1659 : [y_buf]"+r"(y_buf), // %[y_buf] |
| 1615 [u_buf]"+r"(u_buf), // %[u_buf] | 1660 [u_buf]"+r"(u_buf), // %[u_buf] |
| 1616 [v_buf]"+r"(v_buf), // %[v_buf] | 1661 [v_buf]"+r"(v_buf), // %[v_buf] |
| 1617 [dst_argb]"+r"(dst_argb), // %[dst_argb] | 1662 [dst_argb]"+r"(dst_argb), // %[dst_argb] |
| 1618 [width]"+rm"(width) // %[width] | 1663 [width]"+rm"(width) // %[width] |
| 1619 : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] | 1664 : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] |
| 1620 : "memory", "cc", NACL_R14 | 1665 : "memory", "cc", NACL_R14 |
| 1621 "xmm0", "xmm1", "xmm2", "xmm3", "xmm5" | 1666 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" |
| 1622 ); | 1667 ); |
| 1623 } | 1668 } |
| 1624 | 1669 |
| 1625 void OMITFP NV12ToARGBRow_SSSE3(const uint8* y_buf, | 1670 void OMITFP NV12ToARGBRow_SSSE3(const uint8* y_buf, |
| 1626 const uint8* uv_buf, | 1671 const uint8* uv_buf, |
| 1627 uint8* dst_argb, | 1672 uint8* dst_argb, |
| 1628 struct YuvConstants* yuvconstants, | 1673 struct YuvConstants* yuvconstants, |
| 1629 int width) { | 1674 int width) { |
| 1630 asm volatile ( | 1675 asm volatile ( |
| 1631 "pcmpeqb %%xmm5,%%xmm5 \n" | 1676 "pcmpeqb %%xmm5,%%xmm5 \n" |
| 1632 LABELALIGN | 1677 LABELALIGN |
| 1633 "1: \n" | 1678 "1: \n" |
| 1634 READNV12 | 1679 READNV12 |
| 1635 YUVTORGB(yuvconstants) | 1680 YUVTORGB(yuvconstants) |
| 1636 STOREARGB | 1681 STOREARGB |
| 1637 "sub $0x8,%[width] \n" | 1682 "sub $0x8,%[width] \n" |
| 1638 "jg 1b \n" | 1683 "jg 1b \n" |
| 1639 : [y_buf]"+r"(y_buf), // %[y_buf] | 1684 : [y_buf]"+r"(y_buf), // %[y_buf] |
| 1640 [uv_buf]"+r"(uv_buf), // %[uv_buf] | 1685 [uv_buf]"+r"(uv_buf), // %[uv_buf] |
| 1641 [dst_argb]"+r"(dst_argb), // %[dst_argb] | 1686 [dst_argb]"+r"(dst_argb), // %[dst_argb] |
| 1642 [width]"+rm"(width) // %[width] | 1687 [width]"+rm"(width) // %[width] |
| 1643 : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] | 1688 : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] |
| 1644 // Does not use r14. | 1689 // Does not use r14. |
| 1645 : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm5" | 1690 : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" |
| 1646 ); | 1691 ); |
| 1647 } | 1692 } |
| 1648 | 1693 |
| 1694 void OMITFP YUY2ToARGBRow_SSSE3(const uint8* yuy2_buf, |
| 1695 uint8* dst_argb, |
| 1696 struct YuvConstants* yuvconstants, |
| 1697 int width) { |
| 1698 asm volatile ( |
| 1699 "pcmpeqb %%xmm5,%%xmm5 \n" |
| 1700 LABELALIGN |
| 1701 "1: \n" |
| 1702 READYUY2 |
| 1703 YUVTORGB(yuvconstants) |
| 1704 STOREARGB |
| 1705 "sub $0x8,%[width] \n" |
| 1706 "jg 1b \n" |
| 1707 : [yuy2_buf]"+r"(yuy2_buf), // %[yuy2_buf] |
| 1708 [dst_argb]"+r"(dst_argb), // %[dst_argb] |
| 1709 [width]"+rm"(width) // %[width] |
| 1710 : [yuvconstants]"r"(yuvconstants), // %[yuvconstants] |
| 1711 [kShuffleYUY2Y]"m"(kShuffleYUY2Y), |
| 1712 [kShuffleYUY2UV]"m"(kShuffleYUY2UV) |
| 1713 // Does not use r14. |
| 1714 : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" |
| 1715 ); |
| 1716 } |
| 1717 |
| 1718 void OMITFP UYVYToARGBRow_SSSE3(const uint8* uyvy_buf, |
| 1719 uint8* dst_argb, |
| 1720 struct YuvConstants* yuvconstants, |
| 1721 int width) { |
| 1722 asm volatile ( |
| 1723 "pcmpeqb %%xmm5,%%xmm5 \n" |
| 1724 LABELALIGN |
| 1725 "1: \n" |
| 1726 READUYVY |
| 1727 YUVTORGB(yuvconstants) |
| 1728 STOREARGB |
| 1729 "sub $0x8,%[width] \n" |
| 1730 "jg 1b \n" |
| 1731 : [uyvy_buf]"+r"(uyvy_buf), // %[uyvy_buf] |
| 1732 [dst_argb]"+r"(dst_argb), // %[dst_argb] |
| 1733 [width]"+rm"(width) // %[width] |
| 1734 : [yuvconstants]"r"(yuvconstants), // %[yuvconstants] |
| 1735 [kShuffleUYVYY]"m"(kShuffleUYVYY), |
| 1736 [kShuffleUYVYUV]"m"(kShuffleUYVYUV) |
| 1737 // Does not use r14. |
| 1738 : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" |
| 1739 ); |
| 1740 } |
| 1741 |
| 1649 void OMITFP I422ToBGRARow_SSSE3(const uint8* y_buf, | 1742 void OMITFP I422ToBGRARow_SSSE3(const uint8* y_buf, |
| 1650 const uint8* u_buf, | 1743 const uint8* u_buf, |
| 1651 const uint8* v_buf, | 1744 const uint8* v_buf, |
| 1652 uint8* dst_bgra, | 1745 uint8* dst_bgra, |
| 1653 struct YuvConstants* yuvconstants, | 1746 struct YuvConstants* yuvconstants, |
| 1654 int width) { | 1747 int width) { |
| 1655 asm volatile ( | 1748 asm volatile ( |
| 1656 "sub %[u_buf],%[v_buf] \n" | 1749 "sub %[u_buf],%[v_buf] \n" |
| 1657 "pcmpeqb %%xmm5,%%xmm5 \n" | 1750 "pcmpeqb %%xmm5,%%xmm5 \n" |
| 1658 LABELALIGN | 1751 LABELALIGN |
| 1659 "1: \n" | 1752 "1: \n" |
| 1660 READYUV422 | 1753 READYUV422 |
| 1661 YUVTORGB(yuvconstants) | 1754 YUVTORGB(yuvconstants) |
| 1662 STOREBGRA | 1755 STOREBGRA |
| 1663 "sub $0x8,%[width] \n" | 1756 "sub $0x8,%[width] \n" |
| 1664 "jg 1b \n" | 1757 "jg 1b \n" |
| 1665 : [y_buf]"+r"(y_buf), // %[y_buf] | 1758 : [y_buf]"+r"(y_buf), // %[y_buf] |
| 1666 [u_buf]"+r"(u_buf), // %[u_buf] | 1759 [u_buf]"+r"(u_buf), // %[u_buf] |
| 1667 [v_buf]"+r"(v_buf), // %[v_buf] | 1760 [v_buf]"+r"(v_buf), // %[v_buf] |
| 1668 [dst_bgra]"+r"(dst_bgra), // %[dst_bgra] | 1761 [dst_bgra]"+r"(dst_bgra), // %[dst_bgra] |
| 1669 [width]"+rm"(width) // %[width] | 1762 [width]"+rm"(width) // %[width] |
| 1670 : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] | 1763 : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] |
| 1671 : "memory", "cc", NACL_R14 | 1764 : "memory", "cc", NACL_R14 |
| 1672 "xmm0", "xmm1", "xmm2", "xmm3", "xmm5" | 1765 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" |
| 1673 ); | 1766 ); |
| 1674 } | 1767 } |
| 1675 | 1768 |
| 1676 void OMITFP I422ToABGRRow_SSSE3(const uint8* y_buf, | 1769 void OMITFP I422ToABGRRow_SSSE3(const uint8* y_buf, |
| 1677 const uint8* u_buf, | 1770 const uint8* u_buf, |
| 1678 const uint8* v_buf, | 1771 const uint8* v_buf, |
| 1679 uint8* dst_abgr, | 1772 uint8* dst_abgr, |
| 1680 struct YuvConstants* yuvconstants, | 1773 struct YuvConstants* yuvconstants, |
| 1681 int width) { | 1774 int width) { |
| 1682 asm volatile ( | 1775 asm volatile ( |
| 1683 "sub %[u_buf],%[v_buf] \n" | 1776 "sub %[u_buf],%[v_buf] \n" |
| 1684 "pcmpeqb %%xmm5,%%xmm5 \n" | 1777 "pcmpeqb %%xmm5,%%xmm5 \n" |
| 1685 LABELALIGN | 1778 LABELALIGN |
| 1686 "1: \n" | 1779 "1: \n" |
| 1687 READYUV422 | 1780 READYUV422 |
| 1688 YUVTORGB(yuvconstants) | 1781 YUVTORGB(yuvconstants) |
| 1689 STOREABGR | 1782 STOREABGR |
| 1690 "sub $0x8,%[width] \n" | 1783 "sub $0x8,%[width] \n" |
| 1691 "jg 1b \n" | 1784 "jg 1b \n" |
| 1692 : [y_buf]"+r"(y_buf), // %[y_buf] | 1785 : [y_buf]"+r"(y_buf), // %[y_buf] |
| 1693 [u_buf]"+r"(u_buf), // %[u_buf] | 1786 [u_buf]"+r"(u_buf), // %[u_buf] |
| 1694 [v_buf]"+r"(v_buf), // %[v_buf] | 1787 [v_buf]"+r"(v_buf), // %[v_buf] |
| 1695 [dst_abgr]"+r"(dst_abgr), // %[dst_abgr] | 1788 [dst_abgr]"+r"(dst_abgr), // %[dst_abgr] |
| 1696 [width]"+rm"(width) // %[width] | 1789 [width]"+rm"(width) // %[width] |
| 1697 : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] | 1790 : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] |
| 1698 : "memory", "cc", NACL_R14 | 1791 : "memory", "cc", NACL_R14 |
| 1699 "xmm0", "xmm1", "xmm2", "xmm3", "xmm5" | 1792 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" |
| 1700 ); | 1793 ); |
| 1701 } | 1794 } |
| 1702 | 1795 |
| 1703 void OMITFP I422ToRGBARow_SSSE3(const uint8* y_buf, | 1796 void OMITFP I422ToRGBARow_SSSE3(const uint8* y_buf, |
| 1704 const uint8* u_buf, | 1797 const uint8* u_buf, |
| 1705 const uint8* v_buf, | 1798 const uint8* v_buf, |
| 1706 uint8* dst_rgba, | 1799 uint8* dst_rgba, |
| 1707 struct YuvConstants* yuvconstants, | 1800 struct YuvConstants* yuvconstants, |
| 1708 int width) { | 1801 int width) { |
| 1709 asm volatile ( | 1802 asm volatile ( |
| 1710 "sub %[u_buf],%[v_buf] \n" | 1803 "sub %[u_buf],%[v_buf] \n" |
| 1711 "pcmpeqb %%xmm5,%%xmm5 \n" | 1804 "pcmpeqb %%xmm5,%%xmm5 \n" |
| 1712 LABELALIGN | 1805 LABELALIGN |
| 1713 "1: \n" | 1806 "1: \n" |
| 1714 READYUV422 | 1807 READYUV422 |
| 1715 YUVTORGB(yuvconstants) | 1808 YUVTORGB(yuvconstants) |
| 1716 STORERGBA | 1809 STORERGBA |
| 1717 "sub $0x8,%[width] \n" | 1810 "sub $0x8,%[width] \n" |
| 1718 "jg 1b \n" | 1811 "jg 1b \n" |
| 1719 : [y_buf]"+r"(y_buf), // %[y_buf] | 1812 : [y_buf]"+r"(y_buf), // %[y_buf] |
| 1720 [u_buf]"+r"(u_buf), // %[u_buf] | 1813 [u_buf]"+r"(u_buf), // %[u_buf] |
| 1721 [v_buf]"+r"(v_buf), // %[v_buf] | 1814 [v_buf]"+r"(v_buf), // %[v_buf] |
| 1722 [dst_rgba]"+r"(dst_rgba), // %[dst_rgba] | 1815 [dst_rgba]"+r"(dst_rgba), // %[dst_rgba] |
| 1723 [width]"+rm"(width) // %[width] | 1816 [width]"+rm"(width) // %[width] |
| 1724 : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] | 1817 : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] |
| 1725 : "memory", "cc", NACL_R14 | 1818 : "memory", "cc", NACL_R14 |
| 1726 "xmm0", "xmm1", "xmm2", "xmm3", "xmm5" | 1819 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" |
| 1727 ); | 1820 ); |
| 1728 } | 1821 } |
| 1729 | 1822 |
| 1730 #endif // HAS_I422TOARGBROW_SSSE3 | 1823 #endif // HAS_I422TOARGBROW_SSSE3 |
| 1731 | 1824 |
| 1732 // Read 8 UV from 422, upsample to 16 UV. | 1825 // Read 8 UV from 422, upsample to 16 UV. |
| 1733 #define READYUV422_AVX2 \ | 1826 #define READYUV422_AVX2 \ |
| 1734 "vmovq " MEMACCESS([u_buf]) ",%%xmm0 \n" \ | 1827 "vmovq " MEMACCESS([u_buf]) ",%%xmm0 \n" \ |
| 1735 MEMOPREG(vmovq, 0x00, [u_buf], [v_buf], 1, xmm1) \ | 1828 MEMOPREG(vmovq, 0x00, [u_buf], [v_buf], 1, xmm1) \ |
| 1736 "lea " MEMLEA(0x8, [u_buf]) ",%[u_buf] \n" \ | 1829 "lea " MEMLEA(0x8, [u_buf]) ",%[u_buf] \n" \ |
| (...skipping 58 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1795 "sub $0x10,%[width] \n" | 1888 "sub $0x10,%[width] \n" |
| 1796 "jg 1b \n" | 1889 "jg 1b \n" |
| 1797 "vzeroupper \n" | 1890 "vzeroupper \n" |
| 1798 : [y_buf]"+r"(y_buf), // %[y_buf] | 1891 : [y_buf]"+r"(y_buf), // %[y_buf] |
| 1799 [u_buf]"+r"(u_buf), // %[u_buf] | 1892 [u_buf]"+r"(u_buf), // %[u_buf] |
| 1800 [v_buf]"+r"(v_buf), // %[v_buf] | 1893 [v_buf]"+r"(v_buf), // %[v_buf] |
| 1801 [dst_bgra]"+r"(dst_bgra), // %[dst_bgra] | 1894 [dst_bgra]"+r"(dst_bgra), // %[dst_bgra] |
| 1802 [width]"+rm"(width) // %[width] | 1895 [width]"+rm"(width) // %[width] |
| 1803 : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] | 1896 : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] |
| 1804 : "memory", "cc", NACL_R14 | 1897 : "memory", "cc", NACL_R14 |
| 1805 "xmm0", "xmm1", "xmm2", "xmm3", "xmm5" | 1898 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" |
| 1806 ); | 1899 ); |
| 1807 } | 1900 } |
| 1808 #endif // HAS_I422TOBGRAROW_AVX2 | 1901 #endif // HAS_I422TOBGRAROW_AVX2 |
| 1809 | 1902 |
| 1810 #if defined(HAS_I422TOARGBROW_AVX2) | 1903 #if defined(HAS_I422TOARGBROW_AVX2) |
| 1811 // 16 pixels | 1904 // 16 pixels |
| 1812 // 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes). | 1905 // 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes). |
| 1813 void OMITFP I422ToARGBRow_AVX2(const uint8* y_buf, | 1906 void OMITFP I422ToARGBRow_AVX2(const uint8* y_buf, |
| 1814 const uint8* u_buf, | 1907 const uint8* u_buf, |
| 1815 const uint8* v_buf, | 1908 const uint8* v_buf, |
| (...skipping 22 matching lines...) Expand all Loading... |
| 1838 "sub $0x10,%[width] \n" | 1931 "sub $0x10,%[width] \n" |
| 1839 "jg 1b \n" | 1932 "jg 1b \n" |
| 1840 "vzeroupper \n" | 1933 "vzeroupper \n" |
| 1841 : [y_buf]"+r"(y_buf), // %[y_buf] | 1934 : [y_buf]"+r"(y_buf), // %[y_buf] |
| 1842 [u_buf]"+r"(u_buf), // %[u_buf] | 1935 [u_buf]"+r"(u_buf), // %[u_buf] |
| 1843 [v_buf]"+r"(v_buf), // %[v_buf] | 1936 [v_buf]"+r"(v_buf), // %[v_buf] |
| 1844 [dst_argb]"+r"(dst_argb), // %[dst_argb] | 1937 [dst_argb]"+r"(dst_argb), // %[dst_argb] |
| 1845 [width]"+rm"(width) // %[width] | 1938 [width]"+rm"(width) // %[width] |
| 1846 : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] | 1939 : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] |
| 1847 : "memory", "cc", NACL_R14 | 1940 : "memory", "cc", NACL_R14 |
| 1848 "xmm0", "xmm1", "xmm2", "xmm3", "xmm5" | 1941 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" |
| 1849 ); | 1942 ); |
| 1850 } | 1943 } |
| 1851 #endif // HAS_I422TOARGBROW_AVX2 | 1944 #endif // HAS_I422TOARGBROW_AVX2 |
| 1852 | 1945 |
| 1853 #if defined(HAS_I422TOABGRROW_AVX2) | 1946 #if defined(HAS_I422TOABGRROW_AVX2) |
| 1854 // 16 pixels | 1947 // 16 pixels |
| 1855 // 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ABGR (64 bytes). | 1948 // 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ABGR (64 bytes). |
| 1856 void OMITFP I422ToABGRRow_AVX2(const uint8* y_buf, | 1949 void OMITFP I422ToABGRRow_AVX2(const uint8* y_buf, |
| 1857 const uint8* u_buf, | 1950 const uint8* u_buf, |
| 1858 const uint8* v_buf, | 1951 const uint8* v_buf, |
| (...skipping 21 matching lines...) Expand all Loading... |
| 1880 "sub $0x10,%[width] \n" | 1973 "sub $0x10,%[width] \n" |
| 1881 "jg 1b \n" | 1974 "jg 1b \n" |
| 1882 "vzeroupper \n" | 1975 "vzeroupper \n" |
| 1883 : [y_buf]"+r"(y_buf), // %[y_buf] | 1976 : [y_buf]"+r"(y_buf), // %[y_buf] |
| 1884 [u_buf]"+r"(u_buf), // %[u_buf] | 1977 [u_buf]"+r"(u_buf), // %[u_buf] |
| 1885 [v_buf]"+r"(v_buf), // %[v_buf] | 1978 [v_buf]"+r"(v_buf), // %[v_buf] |
| 1886 [dst_argb]"+r"(dst_argb), // %[dst_argb] | 1979 [dst_argb]"+r"(dst_argb), // %[dst_argb] |
| 1887 [width]"+rm"(width) // %[width] | 1980 [width]"+rm"(width) // %[width] |
| 1888 : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] | 1981 : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] |
| 1889 : "memory", "cc", NACL_R14 | 1982 : "memory", "cc", NACL_R14 |
| 1890 "xmm0", "xmm1", "xmm2", "xmm3", "xmm5" | 1983 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" |
| 1891 ); | 1984 ); |
| 1892 } | 1985 } |
| 1893 #endif // HAS_I422TOABGRROW_AVX2 | 1986 #endif // HAS_I422TOABGRROW_AVX2 |
| 1894 | 1987 |
| 1895 #if defined(HAS_I422TORGBAROW_AVX2) | 1988 #if defined(HAS_I422TORGBAROW_AVX2) |
| 1896 // 16 pixels | 1989 // 16 pixels |
| 1897 // 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 RGBA (64 bytes). | 1990 // 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 RGBA (64 bytes). |
| 1898 void OMITFP I422ToRGBARow_AVX2(const uint8* y_buf, | 1991 void OMITFP I422ToRGBARow_AVX2(const uint8* y_buf, |
| 1899 const uint8* u_buf, | 1992 const uint8* u_buf, |
| 1900 const uint8* v_buf, | 1993 const uint8* v_buf, |
| (...skipping 21 matching lines...) Expand all Loading... |
| 1922 "sub $0x10,%[width] \n" | 2015 "sub $0x10,%[width] \n" |
| 1923 "jg 1b \n" | 2016 "jg 1b \n" |
| 1924 "vzeroupper \n" | 2017 "vzeroupper \n" |
| 1925 : [y_buf]"+r"(y_buf), // %[y_buf] | 2018 : [y_buf]"+r"(y_buf), // %[y_buf] |
| 1926 [u_buf]"+r"(u_buf), // %[u_buf] | 2019 [u_buf]"+r"(u_buf), // %[u_buf] |
| 1927 [v_buf]"+r"(v_buf), // %[v_buf] | 2020 [v_buf]"+r"(v_buf), // %[v_buf] |
| 1928 [dst_argb]"+r"(dst_argb), // %[dst_argb] | 2021 [dst_argb]"+r"(dst_argb), // %[dst_argb] |
| 1929 [width]"+rm"(width) // %[width] | 2022 [width]"+rm"(width) // %[width] |
| 1930 : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] | 2023 : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] |
| 1931 : "memory", "cc", NACL_R14 | 2024 : "memory", "cc", NACL_R14 |
| 1932 "xmm0", "xmm1", "xmm2", "xmm3", "xmm5" | 2025 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" |
| 1933 ); | 2026 ); |
| 1934 } | 2027 } |
| 1935 #endif // HAS_I422TORGBAROW_AVX2 | 2028 #endif // HAS_I422TORGBAROW_AVX2 |
| 1936 | 2029 |
| 1937 #ifdef HAS_I400TOARGBROW_SSE2 | 2030 #ifdef HAS_I400TOARGBROW_SSE2 |
| 1938 void I400ToARGBRow_SSE2(const uint8* y_buf, uint8* dst_argb, int width) { | 2031 void I400ToARGBRow_SSE2(const uint8* y_buf, uint8* dst_argb, int width) { |
| 1939 asm volatile ( | 2032 asm volatile ( |
| 1940 "mov $0x4a354a35,%%eax \n" // 4a35 = 18997 = 1.164 | 2033 "mov $0x4a354a35,%%eax \n" // 4a35 = 18997 = 1.164 |
| 1941 "movd %%eax,%%xmm2 \n" | 2034 "movd %%eax,%%xmm2 \n" |
| 1942 "pshufd $0x0,%%xmm2,%%xmm2 \n" | 2035 "pshufd $0x0,%%xmm2,%%xmm2 \n" |
| (...skipping 3258 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 5201 ); | 5294 ); |
| 5202 } | 5295 } |
| 5203 #endif // HAS_ARGBLUMACOLORTABLEROW_SSSE3 | 5296 #endif // HAS_ARGBLUMACOLORTABLEROW_SSSE3 |
| 5204 | 5297 |
| 5205 #endif // defined(__x86_64__) || defined(__i386__) | 5298 #endif // defined(__x86_64__) || defined(__i386__) |
| 5206 | 5299 |
| 5207 #ifdef __cplusplus | 5300 #ifdef __cplusplus |
| 5208 } // extern "C" | 5301 } // extern "C" |
| 5209 } // namespace libyuv | 5302 } // namespace libyuv |
| 5210 #endif | 5303 #endif |
| OLD | NEW |