OLD | NEW |
1 // VERSION 2 | 1 // VERSION 2 |
2 /* | 2 /* |
3 * Copyright 2011 The LibYuv Project Authors. All rights reserved. | 3 * Copyright 2011 The LibYuv Project Authors. All rights reserved. |
4 * | 4 * |
5 * Use of this source code is governed by a BSD-style license | 5 * Use of this source code is governed by a BSD-style license |
6 * that can be found in the LICENSE file in the root of the source | 6 * that can be found in the LICENSE file in the root of the source |
7 * tree. An additional intellectual property rights grant can be found | 7 * tree. An additional intellectual property rights grant can be found |
8 * in the file PATENTS. All contributing project authors may | 8 * in the file PATENTS. All contributing project authors may |
9 * be found in the AUTHORS file in the root of the source tree. | 9 * be found in the AUTHORS file in the root of the source tree. |
10 */ | 10 */ |
(...skipping 1306 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1317 ); | 1317 ); |
1318 } | 1318 } |
1319 | 1319 |
1320 #if defined(HAS_I422TOARGBROW_SSSE3) || defined(HAS_I422TOARGBROW_AVX2) | 1320 #if defined(HAS_I422TOARGBROW_SSSE3) || defined(HAS_I422TOARGBROW_AVX2) |
1321 | 1321 |
1322 // Read 8 UV from 411 | 1322 // Read 8 UV from 411 |
1323 #define READYUV444 \ | 1323 #define READYUV444 \ |
1324 "movq " MEMACCESS([u_buf]) ",%%xmm0 \n" \ | 1324 "movq " MEMACCESS([u_buf]) ",%%xmm0 \n" \ |
1325 MEMOPREG(movq, 0x00, [u_buf], [v_buf], 1, xmm1) \ | 1325 MEMOPREG(movq, 0x00, [u_buf], [v_buf], 1, xmm1) \ |
1326 "lea " MEMLEA(0x8, [u_buf]) ",%[u_buf] \n" \ | 1326 "lea " MEMLEA(0x8, [u_buf]) ",%[u_buf] \n" \ |
1327 "punpcklbw %%xmm1,%%xmm0 \n" | 1327 "punpcklbw %%xmm1,%%xmm0 \n" \ |
| 1328 "movq " MEMACCESS([y_buf]) ",%%xmm4 \n" \ |
| 1329 "punpcklbw %%xmm4,%%xmm4 \n" \ |
| 1330 "lea " MEMLEA(0x8, [y_buf]) ",%[y_buf] \n" |
1328 | 1331 |
1329 // Read 4 UV from 422, upsample to 8 UV | 1332 // Read 4 UV from 422, upsample to 8 UV |
1330 #define READYUV422 \ | 1333 #define READYUV422 \ |
1331 "movd " MEMACCESS([u_buf]) ",%%xmm0 \n" \ | 1334 "movd " MEMACCESS([u_buf]) ",%%xmm0 \n" \ |
1332 MEMOPREG(movd, 0x00, [u_buf], [v_buf], 1, xmm1) \ | 1335 MEMOPREG(movd, 0x00, [u_buf], [v_buf], 1, xmm1) \ |
1333 "lea " MEMLEA(0x4, [u_buf]) ",%[u_buf] \n" \ | 1336 "lea " MEMLEA(0x4, [u_buf]) ",%[u_buf] \n" \ |
1334 "punpcklbw %%xmm1,%%xmm0 \n" \ | 1337 "punpcklbw %%xmm1,%%xmm0 \n" \ |
1335 "punpcklwd %%xmm0,%%xmm0 \n" | 1338 "punpcklwd %%xmm0,%%xmm0 \n" \ |
| 1339 "movq " MEMACCESS([y_buf]) ",%%xmm4 \n" \ |
| 1340 "punpcklbw %%xmm4,%%xmm4 \n" \ |
| 1341 "lea " MEMLEA(0x8, [y_buf]) ",%[y_buf] \n" |
1336 | 1342 |
1337 // Read 2 UV from 411, upsample to 8 UV | 1343 // Read 2 UV from 411, upsample to 8 UV |
1338 #define READYUV411 \ | 1344 #define READYUV411 \ |
1339 "movd " MEMACCESS([u_buf]) ",%%xmm0 \n" \ | 1345 "movd " MEMACCESS([u_buf]) ",%%xmm0 \n" \ |
1340 MEMOPREG(movd, 0x00, [u_buf], [v_buf], 1, xmm1) \ | 1346 MEMOPREG(movd, 0x00, [u_buf], [v_buf], 1, xmm1) \ |
1341 "lea " MEMLEA(0x2, [u_buf]) ",%[u_buf] \n" \ | 1347 "lea " MEMLEA(0x2, [u_buf]) ",%[u_buf] \n" \ |
1342 "punpcklbw %%xmm1,%%xmm0 \n" \ | 1348 "punpcklbw %%xmm1,%%xmm0 \n" \ |
1343 "punpcklwd %%xmm0,%%xmm0 \n" \ | 1349 "punpcklwd %%xmm0,%%xmm0 \n" \ |
1344 "punpckldq %%xmm0,%%xmm0 \n" | 1350 "punpckldq %%xmm0,%%xmm0 \n" \ |
| 1351 "movq " MEMACCESS([y_buf]) ",%%xmm4 \n" \ |
| 1352 "punpcklbw %%xmm4,%%xmm4 \n" \ |
| 1353 "lea " MEMLEA(0x8, [y_buf]) ",%[y_buf] \n" |
1345 | 1354 |
1346 // Read 4 UV from NV12, upsample to 8 UV | 1355 // Read 4 UV from NV12, upsample to 8 UV |
1347 #define READNV12 \ | 1356 #define READNV12 \ |
1348 "movq " MEMACCESS([uv_buf]) ",%%xmm0 \n" \ | 1357 "movq " MEMACCESS([uv_buf]) ",%%xmm0 \n" \ |
1349 "lea " MEMLEA(0x8, [uv_buf]) ",%[uv_buf] \n" \ | 1358 "lea " MEMLEA(0x8, [uv_buf]) ",%[uv_buf] \n" \ |
1350 "punpcklwd %%xmm0,%%xmm0 \n" | 1359 "punpcklwd %%xmm0,%%xmm0 \n" \ |
| 1360 "movq " MEMACCESS([y_buf]) ",%%xmm4 \n" \ |
| 1361 "punpcklbw %%xmm4,%%xmm4 \n" \ |
| 1362 "lea " MEMLEA(0x8, [y_buf]) ",%[y_buf] \n" |
| 1363 |
| 1364 // YUY2 shuf 8 Y to 16 Y. |
| 1365 static const vec8 kShuffleYUY2Y = { |
| 1366 0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14 |
| 1367 }; |
| 1368 |
| 1369 // YUY2 shuf 4 UV to 8 UV. |
| 1370 static const vec8 kShuffleYUY2UV = { |
| 1371 1, 3, 1, 3, 5, 7, 5, 7, 9, 11, 9, 11, 13, 15, 13, 15 |
| 1372 }; |
| 1373 |
| 1374 // Read 4 YUY2 with 8 Y and update 4 UV to 8 UV. |
| 1375 #define READYUY2 \ |
| 1376 "movdqu " MEMACCESS([yuy2_buf]) ",%%xmm4 \n" \ |
| 1377 "pshufb %[kShuffleYUY2Y], %%xmm4 \n" \ |
| 1378 "movdqu " MEMACCESS([yuy2_buf]) ",%%xmm0 \n" \ |
| 1379 "pshufb %[kShuffleYUY2UV], %%xmm0 \n" \ |
| 1380 "lea " MEMLEA(0x10, [yuy2_buf]) ",%[yuy2_buf] \n" |
| 1381 |
| 1382 // UYVY shuf 8 Y to 16 Y. |
| 1383 static const vec8 kShuffleUYVYY = { |
| 1384 1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15 |
| 1385 }; |
| 1386 |
| 1387 // UYVY shuf 4 UV to 8 UV. |
| 1388 static const vec8 kShuffleUYVYUV = { |
| 1389 0, 2, 0, 2, 4, 6, 4, 6, 8, 10, 8, 10, 12, 14, 12, 14 |
| 1390 }; |
| 1391 |
| 1392 // Read 4 UYVY with 8 Y and update 4 UV to 8 UV. |
| 1393 #define READUYVY \ |
| 1394 "movdqu " MEMACCESS([uyvy_buf]) ",%%xmm4 \n" \ |
| 1395 "pshufb %[kShuffleUYVYY], %%xmm4 \n" \ |
| 1396 "movdqu " MEMACCESS([uyvy_buf]) ",%%xmm0 \n" \ |
| 1397 "pshufb %[kShuffleUYVYUV], %%xmm0 \n" \ |
| 1398 "lea " MEMLEA(0x10, [uyvy_buf]) ",%[uyvy_buf] \n" |
1351 | 1399 |
1352 // Convert 8 pixels: 8 UV and 8 Y | 1400 // Convert 8 pixels: 8 UV and 8 Y |
1353 #define YUVTORGB(yuvconstants) \ | 1401 #define YUVTORGB(yuvconstants) \ |
1354 "movdqa %%xmm0,%%xmm1 \n" \ | 1402 "movdqa %%xmm0,%%xmm1 \n" \ |
1355 "movdqa %%xmm0,%%xmm2 \n" \ | 1403 "movdqa %%xmm0,%%xmm2 \n" \ |
1356 "movdqa %%xmm0,%%xmm3 \n" \ | 1404 "movdqa %%xmm0,%%xmm3 \n" \ |
1357 "movdqa " MEMACCESS2(96, [yuvconstants]) ",%%xmm0 \n" \ | 1405 "movdqa " MEMACCESS2(96, [yuvconstants]) ",%%xmm0 \n" \ |
1358 "pmaddubsw " MEMACCESS([yuvconstants]) ",%%xmm1 \n" \ | 1406 "pmaddubsw " MEMACCESS([yuvconstants]) ",%%xmm1 \n" \ |
1359 "psubw %%xmm1,%%xmm0 \n" \ | 1407 "psubw %%xmm1,%%xmm0 \n" \ |
1360 "movdqa " MEMACCESS2(128, [yuvconstants]) ",%%xmm1 \n" \ | 1408 "movdqa " MEMACCESS2(128, [yuvconstants]) ",%%xmm1 \n" \ |
1361 "pmaddubsw " MEMACCESS2(32, [yuvconstants]) ",%%xmm2 \n" \ | 1409 "pmaddubsw " MEMACCESS2(32, [yuvconstants]) ",%%xmm2 \n" \ |
1362 "psubw %%xmm2,%%xmm1 \n" \ | 1410 "psubw %%xmm2,%%xmm1 \n" \ |
1363 "movdqa " MEMACCESS2(160, [yuvconstants]) ",%%xmm2 \n" \ | 1411 "movdqa " MEMACCESS2(160, [yuvconstants]) ",%%xmm2 \n" \ |
1364 "pmaddubsw " MEMACCESS2(64, [yuvconstants]) ",%%xmm3 \n" \ | 1412 "pmaddubsw " MEMACCESS2(64, [yuvconstants]) ",%%xmm3 \n" \ |
1365 "psubw %%xmm3,%%xmm2 \n" \ | 1413 "psubw %%xmm3,%%xmm2 \n" \ |
1366 "movq " MEMACCESS([y_buf]) ",%%xmm3 \n" \ | 1414 "pmulhuw " MEMACCESS2(192, [yuvconstants]) ",%%xmm4 \n" \ |
1367 "lea " MEMLEA(0x8, [y_buf]) ",%[y_buf] \n" \ | 1415 "paddsw %%xmm4,%%xmm0 \n" \ |
1368 "punpcklbw %%xmm3,%%xmm3 \n" \ | 1416 "paddsw %%xmm4,%%xmm1 \n" \ |
1369 "pmulhuw " MEMACCESS2(192, [yuvconstants]) ",%%xmm3 \n" \ | 1417 "paddsw %%xmm4,%%xmm2 \n" \ |
1370 "paddsw %%xmm3,%%xmm0 \n" \ | |
1371 "paddsw %%xmm3,%%xmm1 \n" \ | |
1372 "paddsw %%xmm3,%%xmm2 \n" \ | |
1373 "psraw $0x6,%%xmm0 \n" \ | 1418 "psraw $0x6,%%xmm0 \n" \ |
1374 "psraw $0x6,%%xmm1 \n" \ | 1419 "psraw $0x6,%%xmm1 \n" \ |
1375 "psraw $0x6,%%xmm2 \n" \ | 1420 "psraw $0x6,%%xmm2 \n" \ |
1376 "packuswb %%xmm0,%%xmm0 \n" \ | 1421 "packuswb %%xmm0,%%xmm0 \n" \ |
1377 "packuswb %%xmm1,%%xmm1 \n" \ | 1422 "packuswb %%xmm1,%%xmm1 \n" \ |
1378 "packuswb %%xmm2,%%xmm2 \n" | 1423 "packuswb %%xmm2,%%xmm2 \n" |
1379 | 1424 |
1380 // Store 8 ARGB values. Assumes XMM5 is zero. | 1425 // Store 8 ARGB values. Assumes XMM5 is zero. |
1381 #define STOREARGB \ | 1426 #define STOREARGB \ |
1382 "punpcklbw %%xmm1,%%xmm0 \n" \ | 1427 "punpcklbw %%xmm1,%%xmm0 \n" \ |
(...skipping 56 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1439 STOREARGB | 1484 STOREARGB |
1440 "sub $0x8,%[width] \n" | 1485 "sub $0x8,%[width] \n" |
1441 "jg 1b \n" | 1486 "jg 1b \n" |
1442 : [y_buf]"+r"(y_buf), // %[y_buf] | 1487 : [y_buf]"+r"(y_buf), // %[y_buf] |
1443 [u_buf]"+r"(u_buf), // %[u_buf] | 1488 [u_buf]"+r"(u_buf), // %[u_buf] |
1444 [v_buf]"+r"(v_buf), // %[v_buf] | 1489 [v_buf]"+r"(v_buf), // %[v_buf] |
1445 [dst_argb]"+r"(dst_argb), // %[dst_argb] | 1490 [dst_argb]"+r"(dst_argb), // %[dst_argb] |
1446 [width]"+rm"(width) // %[width] | 1491 [width]"+rm"(width) // %[width] |
1447 : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] | 1492 : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] |
1448 : "memory", "cc", NACL_R14 | 1493 : "memory", "cc", NACL_R14 |
1449 "xmm0", "xmm1", "xmm2", "xmm3", "xmm5" | 1494 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" |
1450 ); | 1495 ); |
1451 } | 1496 } |
1452 | 1497 |
1453 void OMITFP I444ToABGRRow_SSSE3(const uint8* y_buf, | 1498 void OMITFP I444ToABGRRow_SSSE3(const uint8* y_buf, |
1454 const uint8* u_buf, | 1499 const uint8* u_buf, |
1455 const uint8* v_buf, | 1500 const uint8* v_buf, |
1456 uint8* dst_abgr, | 1501 uint8* dst_abgr, |
1457 struct YuvConstants* yuvconstants, | 1502 struct YuvConstants* yuvconstants, |
1458 int width) { | 1503 int width) { |
1459 asm volatile ( | 1504 asm volatile ( |
1460 "sub %[u_buf],%[v_buf] \n" | 1505 "sub %[u_buf],%[v_buf] \n" |
1461 "pcmpeqb %%xmm5,%%xmm5 \n" | 1506 "pcmpeqb %%xmm5,%%xmm5 \n" |
1462 LABELALIGN | 1507 LABELALIGN |
1463 "1: \n" | 1508 "1: \n" |
1464 READYUV444 | 1509 READYUV444 |
1465 YUVTORGB(yuvconstants) | 1510 YUVTORGB(yuvconstants) |
1466 STOREABGR | 1511 STOREABGR |
1467 "sub $0x8,%[width] \n" | 1512 "sub $0x8,%[width] \n" |
1468 "jg 1b \n" | 1513 "jg 1b \n" |
1469 : [y_buf]"+r"(y_buf), // %[y_buf] | 1514 : [y_buf]"+r"(y_buf), // %[y_buf] |
1470 [u_buf]"+r"(u_buf), // %[u_buf] | 1515 [u_buf]"+r"(u_buf), // %[u_buf] |
1471 [v_buf]"+r"(v_buf), // %[v_buf] | 1516 [v_buf]"+r"(v_buf), // %[v_buf] |
1472 [dst_abgr]"+r"(dst_abgr), // %[dst_abgr] | 1517 [dst_abgr]"+r"(dst_abgr), // %[dst_abgr] |
1473 [width]"+rm"(width) // %[width] | 1518 [width]"+rm"(width) // %[width] |
1474 : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] | 1519 : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] |
1475 : "memory", "cc", NACL_R14 | 1520 : "memory", "cc", NACL_R14 |
1476 "xmm0", "xmm1", "xmm2", "xmm3", "xmm5" | 1521 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" |
1477 ); | 1522 ); |
1478 } | 1523 } |
1479 | 1524 |
1480 // TODO(fbarchard): Consider putting masks into constants. | 1525 // TODO(fbarchard): Consider putting masks into constants. |
1481 void OMITFP I422ToRGB24Row_SSSE3(const uint8* y_buf, | 1526 void OMITFP I422ToRGB24Row_SSSE3(const uint8* y_buf, |
1482 const uint8* u_buf, | 1527 const uint8* u_buf, |
1483 const uint8* v_buf, | 1528 const uint8* v_buf, |
1484 uint8* dst_rgb24, | 1529 uint8* dst_rgb24, |
1485 struct YuvConstants* yuvconstants, | 1530 struct YuvConstants* yuvconstants, |
1486 int width) { | 1531 int width) { |
(...skipping 25 matching lines...) Expand all Loading... |
1512 // TODO(fbarchard): Make width a register for 32 bit. | 1557 // TODO(fbarchard): Make width a register for 32 bit. |
1513 #if defined(__i386__) && defined(__pic__) | 1558 #if defined(__i386__) && defined(__pic__) |
1514 [width]"+m"(width) // %[width] | 1559 [width]"+m"(width) // %[width] |
1515 #else | 1560 #else |
1516 [width]"+rm"(width) // %[width] | 1561 [width]"+rm"(width) // %[width] |
1517 #endif | 1562 #endif |
1518 : [yuvconstants]"r"(yuvconstants), // %[yuvconstants] | 1563 : [yuvconstants]"r"(yuvconstants), // %[yuvconstants] |
1519 [kShuffleMaskARGBToRGB24_0]"m"(kShuffleMaskARGBToRGB24_0), | 1564 [kShuffleMaskARGBToRGB24_0]"m"(kShuffleMaskARGBToRGB24_0), |
1520 [kShuffleMaskARGBToRGB24]"m"(kShuffleMaskARGBToRGB24) | 1565 [kShuffleMaskARGBToRGB24]"m"(kShuffleMaskARGBToRGB24) |
1521 : "memory", "cc", NACL_R14 | 1566 : "memory", "cc", NACL_R14 |
1522 "xmm0", "xmm1", "xmm2", "xmm3", "xmm5", "xmm6" | 1567 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6" |
1523 ); | 1568 ); |
1524 } | 1569 } |
1525 | 1570 |
1526 void OMITFP I422ToRAWRow_SSSE3(const uint8* y_buf, | 1571 void OMITFP I422ToRAWRow_SSSE3(const uint8* y_buf, |
1527 const uint8* u_buf, | 1572 const uint8* u_buf, |
1528 const uint8* v_buf, | 1573 const uint8* v_buf, |
1529 uint8* dst_raw, | 1574 uint8* dst_raw, |
1530 struct YuvConstants* yuvconstants, | 1575 struct YuvConstants* yuvconstants, |
1531 int width) { | 1576 int width) { |
1532 asm volatile ( | 1577 asm volatile ( |
(...skipping 24 matching lines...) Expand all Loading... |
1557 // TODO(fbarchard): Make width a register for 32 bit. | 1602 // TODO(fbarchard): Make width a register for 32 bit. |
1558 #if defined(__i386__) && defined(__pic__) | 1603 #if defined(__i386__) && defined(__pic__) |
1559 [width]"+m"(width) // %[width] | 1604 [width]"+m"(width) // %[width] |
1560 #else | 1605 #else |
1561 [width]"+rm"(width) // %[width] | 1606 [width]"+rm"(width) // %[width] |
1562 #endif | 1607 #endif |
1563 : [yuvconstants]"r"(yuvconstants), // %[yuvconstants] | 1608 : [yuvconstants]"r"(yuvconstants), // %[yuvconstants] |
1564 [kShuffleMaskARGBToRAW_0]"m"(kShuffleMaskARGBToRAW_0), | 1609 [kShuffleMaskARGBToRAW_0]"m"(kShuffleMaskARGBToRAW_0), |
1565 [kShuffleMaskARGBToRAW]"m"(kShuffleMaskARGBToRAW) | 1610 [kShuffleMaskARGBToRAW]"m"(kShuffleMaskARGBToRAW) |
1566 : "memory", "cc", NACL_R14 | 1611 : "memory", "cc", NACL_R14 |
1567 "xmm0", "xmm1", "xmm2", "xmm3", "xmm5", "xmm6" | 1612 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6" |
1568 ); | 1613 ); |
1569 } | 1614 } |
1570 | 1615 |
1571 void OMITFP I422ToARGBRow_SSSE3(const uint8* y_buf, | 1616 void OMITFP I422ToARGBRow_SSSE3(const uint8* y_buf, |
1572 const uint8* u_buf, | 1617 const uint8* u_buf, |
1573 const uint8* v_buf, | 1618 const uint8* v_buf, |
1574 uint8* dst_argb, | 1619 uint8* dst_argb, |
1575 struct YuvConstants* yuvconstants, | 1620 struct YuvConstants* yuvconstants, |
1576 int width) { | 1621 int width) { |
1577 asm volatile ( | 1622 asm volatile ( |
1578 "sub %[u_buf],%[v_buf] \n" | 1623 "sub %[u_buf],%[v_buf] \n" |
1579 "pcmpeqb %%xmm5,%%xmm5 \n" | 1624 "pcmpeqb %%xmm5,%%xmm5 \n" |
1580 LABELALIGN | 1625 LABELALIGN |
1581 "1: \n" | 1626 "1: \n" |
1582 READYUV422 | 1627 READYUV422 |
1583 YUVTORGB(yuvconstants) | 1628 YUVTORGB(yuvconstants) |
1584 STOREARGB | 1629 STOREARGB |
1585 "sub $0x8,%[width] \n" | 1630 "sub $0x8,%[width] \n" |
1586 "jg 1b \n" | 1631 "jg 1b \n" |
1587 : [y_buf]"+r"(y_buf), // %[y_buf] | 1632 : [y_buf]"+r"(y_buf), // %[y_buf] |
1588 [u_buf]"+r"(u_buf), // %[u_buf] | 1633 [u_buf]"+r"(u_buf), // %[u_buf] |
1589 [v_buf]"+r"(v_buf), // %[v_buf] | 1634 [v_buf]"+r"(v_buf), // %[v_buf] |
1590 [dst_argb]"+r"(dst_argb), // %[dst_argb] | 1635 [dst_argb]"+r"(dst_argb), // %[dst_argb] |
1591 [width]"+rm"(width) // %[width] | 1636 [width]"+rm"(width) // %[width] |
1592 : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] | 1637 : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] |
1593 : "memory", "cc", NACL_R14 | 1638 : "memory", "cc", NACL_R14 |
1594 "xmm0", "xmm1", "xmm2", "xmm3", "xmm5" | 1639 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" |
1595 ); | 1640 ); |
1596 } | 1641 } |
1597 | 1642 |
1598 void OMITFP I411ToARGBRow_SSSE3(const uint8* y_buf, | 1643 void OMITFP I411ToARGBRow_SSSE3(const uint8* y_buf, |
1599 const uint8* u_buf, | 1644 const uint8* u_buf, |
1600 const uint8* v_buf, | 1645 const uint8* v_buf, |
1601 uint8* dst_argb, | 1646 uint8* dst_argb, |
1602 struct YuvConstants* yuvconstants, | 1647 struct YuvConstants* yuvconstants, |
1603 int width) { | 1648 int width) { |
1604 asm volatile ( | 1649 asm volatile ( |
1605 "sub %[u_buf],%[v_buf] \n" | 1650 "sub %[u_buf],%[v_buf] \n" |
1606 "pcmpeqb %%xmm5,%%xmm5 \n" | 1651 "pcmpeqb %%xmm5,%%xmm5 \n" |
1607 LABELALIGN | 1652 LABELALIGN |
1608 "1: \n" | 1653 "1: \n" |
1609 READYUV411 | 1654 READYUV411 |
1610 YUVTORGB(yuvconstants) | 1655 YUVTORGB(yuvconstants) |
1611 STOREARGB | 1656 STOREARGB |
1612 "sub $0x8,%[width] \n" | 1657 "sub $0x8,%[width] \n" |
1613 "jg 1b \n" | 1658 "jg 1b \n" |
1614 : [y_buf]"+r"(y_buf), // %[y_buf] | 1659 : [y_buf]"+r"(y_buf), // %[y_buf] |
1615 [u_buf]"+r"(u_buf), // %[u_buf] | 1660 [u_buf]"+r"(u_buf), // %[u_buf] |
1616 [v_buf]"+r"(v_buf), // %[v_buf] | 1661 [v_buf]"+r"(v_buf), // %[v_buf] |
1617 [dst_argb]"+r"(dst_argb), // %[dst_argb] | 1662 [dst_argb]"+r"(dst_argb), // %[dst_argb] |
1618 [width]"+rm"(width) // %[width] | 1663 [width]"+rm"(width) // %[width] |
1619 : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] | 1664 : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] |
1620 : "memory", "cc", NACL_R14 | 1665 : "memory", "cc", NACL_R14 |
1621 "xmm0", "xmm1", "xmm2", "xmm3", "xmm5" | 1666 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" |
1622 ); | 1667 ); |
1623 } | 1668 } |
1624 | 1669 |
1625 void OMITFP NV12ToARGBRow_SSSE3(const uint8* y_buf, | 1670 void OMITFP NV12ToARGBRow_SSSE3(const uint8* y_buf, |
1626 const uint8* uv_buf, | 1671 const uint8* uv_buf, |
1627 uint8* dst_argb, | 1672 uint8* dst_argb, |
1628 struct YuvConstants* yuvconstants, | 1673 struct YuvConstants* yuvconstants, |
1629 int width) { | 1674 int width) { |
1630 asm volatile ( | 1675 asm volatile ( |
1631 "pcmpeqb %%xmm5,%%xmm5 \n" | 1676 "pcmpeqb %%xmm5,%%xmm5 \n" |
1632 LABELALIGN | 1677 LABELALIGN |
1633 "1: \n" | 1678 "1: \n" |
1634 READNV12 | 1679 READNV12 |
1635 YUVTORGB(yuvconstants) | 1680 YUVTORGB(yuvconstants) |
1636 STOREARGB | 1681 STOREARGB |
1637 "sub $0x8,%[width] \n" | 1682 "sub $0x8,%[width] \n" |
1638 "jg 1b \n" | 1683 "jg 1b \n" |
1639 : [y_buf]"+r"(y_buf), // %[y_buf] | 1684 : [y_buf]"+r"(y_buf), // %[y_buf] |
1640 [uv_buf]"+r"(uv_buf), // %[uv_buf] | 1685 [uv_buf]"+r"(uv_buf), // %[uv_buf] |
1641 [dst_argb]"+r"(dst_argb), // %[dst_argb] | 1686 [dst_argb]"+r"(dst_argb), // %[dst_argb] |
1642 [width]"+rm"(width) // %[width] | 1687 [width]"+rm"(width) // %[width] |
1643 : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] | 1688 : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] |
1644 // Does not use r14. | 1689 // Does not use r14. |
1645 : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm5" | 1690 : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" |
1646 ); | 1691 ); |
1647 } | 1692 } |
1648 | 1693 |
| 1694 void OMITFP YUY2ToARGBRow_SSSE3(const uint8* yuy2_buf, |
| 1695 uint8* dst_argb, |
| 1696 struct YuvConstants* yuvconstants, |
| 1697 int width) { |
| 1698 asm volatile ( |
| 1699 "pcmpeqb %%xmm5,%%xmm5 \n" |
| 1700 LABELALIGN |
| 1701 "1: \n" |
| 1702 READYUY2 |
| 1703 YUVTORGB(yuvconstants) |
| 1704 STOREARGB |
| 1705 "sub $0x8,%[width] \n" |
| 1706 "jg 1b \n" |
| 1707 : [yuy2_buf]"+r"(yuy2_buf), // %[yuy2_buf] |
| 1708 [dst_argb]"+r"(dst_argb), // %[dst_argb] |
| 1709 [width]"+rm"(width) // %[width] |
| 1710 : [yuvconstants]"r"(yuvconstants), // %[yuvconstants] |
| 1711 [kShuffleYUY2Y]"m"(kShuffleYUY2Y), |
| 1712 [kShuffleYUY2UV]"m"(kShuffleYUY2UV) |
| 1713 // Does not use r14. |
| 1714 : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" |
| 1715 ); |
| 1716 } |
| 1717 |
| 1718 void OMITFP UYVYToARGBRow_SSSE3(const uint8* uyvy_buf, |
| 1719 uint8* dst_argb, |
| 1720 struct YuvConstants* yuvconstants, |
| 1721 int width) { |
| 1722 asm volatile ( |
| 1723 "pcmpeqb %%xmm5,%%xmm5 \n" |
| 1724 LABELALIGN |
| 1725 "1: \n" |
| 1726 READUYVY |
| 1727 YUVTORGB(yuvconstants) |
| 1728 STOREARGB |
| 1729 "sub $0x8,%[width] \n" |
| 1730 "jg 1b \n" |
| 1731 : [uyvy_buf]"+r"(uyvy_buf), // %[uyvy_buf] |
| 1732 [dst_argb]"+r"(dst_argb), // %[dst_argb] |
| 1733 [width]"+rm"(width) // %[width] |
| 1734 : [yuvconstants]"r"(yuvconstants), // %[yuvconstants] |
| 1735 [kShuffleUYVYY]"m"(kShuffleUYVYY), |
| 1736 [kShuffleUYVYUV]"m"(kShuffleUYVYUV) |
| 1737 // Does not use r14. |
| 1738 : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" |
| 1739 ); |
| 1740 } |
| 1741 |
1649 void OMITFP I422ToBGRARow_SSSE3(const uint8* y_buf, | 1742 void OMITFP I422ToBGRARow_SSSE3(const uint8* y_buf, |
1650 const uint8* u_buf, | 1743 const uint8* u_buf, |
1651 const uint8* v_buf, | 1744 const uint8* v_buf, |
1652 uint8* dst_bgra, | 1745 uint8* dst_bgra, |
1653 struct YuvConstants* yuvconstants, | 1746 struct YuvConstants* yuvconstants, |
1654 int width) { | 1747 int width) { |
1655 asm volatile ( | 1748 asm volatile ( |
1656 "sub %[u_buf],%[v_buf] \n" | 1749 "sub %[u_buf],%[v_buf] \n" |
1657 "pcmpeqb %%xmm5,%%xmm5 \n" | 1750 "pcmpeqb %%xmm5,%%xmm5 \n" |
1658 LABELALIGN | 1751 LABELALIGN |
1659 "1: \n" | 1752 "1: \n" |
1660 READYUV422 | 1753 READYUV422 |
1661 YUVTORGB(yuvconstants) | 1754 YUVTORGB(yuvconstants) |
1662 STOREBGRA | 1755 STOREBGRA |
1663 "sub $0x8,%[width] \n" | 1756 "sub $0x8,%[width] \n" |
1664 "jg 1b \n" | 1757 "jg 1b \n" |
1665 : [y_buf]"+r"(y_buf), // %[y_buf] | 1758 : [y_buf]"+r"(y_buf), // %[y_buf] |
1666 [u_buf]"+r"(u_buf), // %[u_buf] | 1759 [u_buf]"+r"(u_buf), // %[u_buf] |
1667 [v_buf]"+r"(v_buf), // %[v_buf] | 1760 [v_buf]"+r"(v_buf), // %[v_buf] |
1668 [dst_bgra]"+r"(dst_bgra), // %[dst_bgra] | 1761 [dst_bgra]"+r"(dst_bgra), // %[dst_bgra] |
1669 [width]"+rm"(width) // %[width] | 1762 [width]"+rm"(width) // %[width] |
1670 : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] | 1763 : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] |
1671 : "memory", "cc", NACL_R14 | 1764 : "memory", "cc", NACL_R14 |
1672 "xmm0", "xmm1", "xmm2", "xmm3", "xmm5" | 1765 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" |
1673 ); | 1766 ); |
1674 } | 1767 } |
1675 | 1768 |
1676 void OMITFP I422ToABGRRow_SSSE3(const uint8* y_buf, | 1769 void OMITFP I422ToABGRRow_SSSE3(const uint8* y_buf, |
1677 const uint8* u_buf, | 1770 const uint8* u_buf, |
1678 const uint8* v_buf, | 1771 const uint8* v_buf, |
1679 uint8* dst_abgr, | 1772 uint8* dst_abgr, |
1680 struct YuvConstants* yuvconstants, | 1773 struct YuvConstants* yuvconstants, |
1681 int width) { | 1774 int width) { |
1682 asm volatile ( | 1775 asm volatile ( |
1683 "sub %[u_buf],%[v_buf] \n" | 1776 "sub %[u_buf],%[v_buf] \n" |
1684 "pcmpeqb %%xmm5,%%xmm5 \n" | 1777 "pcmpeqb %%xmm5,%%xmm5 \n" |
1685 LABELALIGN | 1778 LABELALIGN |
1686 "1: \n" | 1779 "1: \n" |
1687 READYUV422 | 1780 READYUV422 |
1688 YUVTORGB(yuvconstants) | 1781 YUVTORGB(yuvconstants) |
1689 STOREABGR | 1782 STOREABGR |
1690 "sub $0x8,%[width] \n" | 1783 "sub $0x8,%[width] \n" |
1691 "jg 1b \n" | 1784 "jg 1b \n" |
1692 : [y_buf]"+r"(y_buf), // %[y_buf] | 1785 : [y_buf]"+r"(y_buf), // %[y_buf] |
1693 [u_buf]"+r"(u_buf), // %[u_buf] | 1786 [u_buf]"+r"(u_buf), // %[u_buf] |
1694 [v_buf]"+r"(v_buf), // %[v_buf] | 1787 [v_buf]"+r"(v_buf), // %[v_buf] |
1695 [dst_abgr]"+r"(dst_abgr), // %[dst_abgr] | 1788 [dst_abgr]"+r"(dst_abgr), // %[dst_abgr] |
1696 [width]"+rm"(width) // %[width] | 1789 [width]"+rm"(width) // %[width] |
1697 : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] | 1790 : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] |
1698 : "memory", "cc", NACL_R14 | 1791 : "memory", "cc", NACL_R14 |
1699 "xmm0", "xmm1", "xmm2", "xmm3", "xmm5" | 1792 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" |
1700 ); | 1793 ); |
1701 } | 1794 } |
1702 | 1795 |
1703 void OMITFP I422ToRGBARow_SSSE3(const uint8* y_buf, | 1796 void OMITFP I422ToRGBARow_SSSE3(const uint8* y_buf, |
1704 const uint8* u_buf, | 1797 const uint8* u_buf, |
1705 const uint8* v_buf, | 1798 const uint8* v_buf, |
1706 uint8* dst_rgba, | 1799 uint8* dst_rgba, |
1707 struct YuvConstants* yuvconstants, | 1800 struct YuvConstants* yuvconstants, |
1708 int width) { | 1801 int width) { |
1709 asm volatile ( | 1802 asm volatile ( |
1710 "sub %[u_buf],%[v_buf] \n" | 1803 "sub %[u_buf],%[v_buf] \n" |
1711 "pcmpeqb %%xmm5,%%xmm5 \n" | 1804 "pcmpeqb %%xmm5,%%xmm5 \n" |
1712 LABELALIGN | 1805 LABELALIGN |
1713 "1: \n" | 1806 "1: \n" |
1714 READYUV422 | 1807 READYUV422 |
1715 YUVTORGB(yuvconstants) | 1808 YUVTORGB(yuvconstants) |
1716 STORERGBA | 1809 STORERGBA |
1717 "sub $0x8,%[width] \n" | 1810 "sub $0x8,%[width] \n" |
1718 "jg 1b \n" | 1811 "jg 1b \n" |
1719 : [y_buf]"+r"(y_buf), // %[y_buf] | 1812 : [y_buf]"+r"(y_buf), // %[y_buf] |
1720 [u_buf]"+r"(u_buf), // %[u_buf] | 1813 [u_buf]"+r"(u_buf), // %[u_buf] |
1721 [v_buf]"+r"(v_buf), // %[v_buf] | 1814 [v_buf]"+r"(v_buf), // %[v_buf] |
1722 [dst_rgba]"+r"(dst_rgba), // %[dst_rgba] | 1815 [dst_rgba]"+r"(dst_rgba), // %[dst_rgba] |
1723 [width]"+rm"(width) // %[width] | 1816 [width]"+rm"(width) // %[width] |
1724 : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] | 1817 : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] |
1725 : "memory", "cc", NACL_R14 | 1818 : "memory", "cc", NACL_R14 |
1726 "xmm0", "xmm1", "xmm2", "xmm3", "xmm5" | 1819 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" |
1727 ); | 1820 ); |
1728 } | 1821 } |
1729 | 1822 |
1730 #endif // HAS_I422TOARGBROW_SSSE3 | 1823 #endif // HAS_I422TOARGBROW_SSSE3 |
1731 | 1824 |
1732 // Read 8 UV from 422, upsample to 16 UV. | 1825 // Read 8 UV from 422, upsample to 16 UV. |
1733 #define READYUV422_AVX2 \ | 1826 #define READYUV422_AVX2 \ |
1734 "vmovq " MEMACCESS([u_buf]) ",%%xmm0 \n" \ | 1827 "vmovq " MEMACCESS([u_buf]) ",%%xmm0 \n" \ |
1735 MEMOPREG(vmovq, 0x00, [u_buf], [v_buf], 1, xmm1) \ | 1828 MEMOPREG(vmovq, 0x00, [u_buf], [v_buf], 1, xmm1) \ |
1736 "lea " MEMLEA(0x8, [u_buf]) ",%[u_buf] \n" \ | 1829 "lea " MEMLEA(0x8, [u_buf]) ",%[u_buf] \n" \ |
(...skipping 58 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1795 "sub $0x10,%[width] \n" | 1888 "sub $0x10,%[width] \n" |
1796 "jg 1b \n" | 1889 "jg 1b \n" |
1797 "vzeroupper \n" | 1890 "vzeroupper \n" |
1798 : [y_buf]"+r"(y_buf), // %[y_buf] | 1891 : [y_buf]"+r"(y_buf), // %[y_buf] |
1799 [u_buf]"+r"(u_buf), // %[u_buf] | 1892 [u_buf]"+r"(u_buf), // %[u_buf] |
1800 [v_buf]"+r"(v_buf), // %[v_buf] | 1893 [v_buf]"+r"(v_buf), // %[v_buf] |
1801 [dst_bgra]"+r"(dst_bgra), // %[dst_bgra] | 1894 [dst_bgra]"+r"(dst_bgra), // %[dst_bgra] |
1802 [width]"+rm"(width) // %[width] | 1895 [width]"+rm"(width) // %[width] |
1803 : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] | 1896 : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] |
1804 : "memory", "cc", NACL_R14 | 1897 : "memory", "cc", NACL_R14 |
1805 "xmm0", "xmm1", "xmm2", "xmm3", "xmm5" | 1898 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" |
1806 ); | 1899 ); |
1807 } | 1900 } |
1808 #endif // HAS_I422TOBGRAROW_AVX2 | 1901 #endif // HAS_I422TOBGRAROW_AVX2 |
1809 | 1902 |
1810 #if defined(HAS_I422TOARGBROW_AVX2) | 1903 #if defined(HAS_I422TOARGBROW_AVX2) |
1811 // 16 pixels | 1904 // 16 pixels |
1812 // 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes). | 1905 // 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes). |
1813 void OMITFP I422ToARGBRow_AVX2(const uint8* y_buf, | 1906 void OMITFP I422ToARGBRow_AVX2(const uint8* y_buf, |
1814 const uint8* u_buf, | 1907 const uint8* u_buf, |
1815 const uint8* v_buf, | 1908 const uint8* v_buf, |
(...skipping 22 matching lines...) Expand all Loading... |
1838 "sub $0x10,%[width] \n" | 1931 "sub $0x10,%[width] \n" |
1839 "jg 1b \n" | 1932 "jg 1b \n" |
1840 "vzeroupper \n" | 1933 "vzeroupper \n" |
1841 : [y_buf]"+r"(y_buf), // %[y_buf] | 1934 : [y_buf]"+r"(y_buf), // %[y_buf] |
1842 [u_buf]"+r"(u_buf), // %[u_buf] | 1935 [u_buf]"+r"(u_buf), // %[u_buf] |
1843 [v_buf]"+r"(v_buf), // %[v_buf] | 1936 [v_buf]"+r"(v_buf), // %[v_buf] |
1844 [dst_argb]"+r"(dst_argb), // %[dst_argb] | 1937 [dst_argb]"+r"(dst_argb), // %[dst_argb] |
1845 [width]"+rm"(width) // %[width] | 1938 [width]"+rm"(width) // %[width] |
1846 : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] | 1939 : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] |
1847 : "memory", "cc", NACL_R14 | 1940 : "memory", "cc", NACL_R14 |
1848 "xmm0", "xmm1", "xmm2", "xmm3", "xmm5" | 1941 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" |
1849 ); | 1942 ); |
1850 } | 1943 } |
1851 #endif // HAS_I422TOARGBROW_AVX2 | 1944 #endif // HAS_I422TOARGBROW_AVX2 |
1852 | 1945 |
1853 #if defined(HAS_I422TOABGRROW_AVX2) | 1946 #if defined(HAS_I422TOABGRROW_AVX2) |
1854 // 16 pixels | 1947 // 16 pixels |
1855 // 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ABGR (64 bytes). | 1948 // 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ABGR (64 bytes). |
1856 void OMITFP I422ToABGRRow_AVX2(const uint8* y_buf, | 1949 void OMITFP I422ToABGRRow_AVX2(const uint8* y_buf, |
1857 const uint8* u_buf, | 1950 const uint8* u_buf, |
1858 const uint8* v_buf, | 1951 const uint8* v_buf, |
(...skipping 21 matching lines...) Expand all Loading... |
1880 "sub $0x10,%[width] \n" | 1973 "sub $0x10,%[width] \n" |
1881 "jg 1b \n" | 1974 "jg 1b \n" |
1882 "vzeroupper \n" | 1975 "vzeroupper \n" |
1883 : [y_buf]"+r"(y_buf), // %[y_buf] | 1976 : [y_buf]"+r"(y_buf), // %[y_buf] |
1884 [u_buf]"+r"(u_buf), // %[u_buf] | 1977 [u_buf]"+r"(u_buf), // %[u_buf] |
1885 [v_buf]"+r"(v_buf), // %[v_buf] | 1978 [v_buf]"+r"(v_buf), // %[v_buf] |
1886 [dst_argb]"+r"(dst_argb), // %[dst_argb] | 1979 [dst_argb]"+r"(dst_argb), // %[dst_argb] |
1887 [width]"+rm"(width) // %[width] | 1980 [width]"+rm"(width) // %[width] |
1888 : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] | 1981 : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] |
1889 : "memory", "cc", NACL_R14 | 1982 : "memory", "cc", NACL_R14 |
1890 "xmm0", "xmm1", "xmm2", "xmm3", "xmm5" | 1983 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" |
1891 ); | 1984 ); |
1892 } | 1985 } |
1893 #endif // HAS_I422TOABGRROW_AVX2 | 1986 #endif // HAS_I422TOABGRROW_AVX2 |
1894 | 1987 |
1895 #if defined(HAS_I422TORGBAROW_AVX2) | 1988 #if defined(HAS_I422TORGBAROW_AVX2) |
1896 // 16 pixels | 1989 // 16 pixels |
1897 // 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 RGBA (64 bytes). | 1990 // 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 RGBA (64 bytes). |
1898 void OMITFP I422ToRGBARow_AVX2(const uint8* y_buf, | 1991 void OMITFP I422ToRGBARow_AVX2(const uint8* y_buf, |
1899 const uint8* u_buf, | 1992 const uint8* u_buf, |
1900 const uint8* v_buf, | 1993 const uint8* v_buf, |
(...skipping 21 matching lines...) Expand all Loading... |
1922 "sub $0x10,%[width] \n" | 2015 "sub $0x10,%[width] \n" |
1923 "jg 1b \n" | 2016 "jg 1b \n" |
1924 "vzeroupper \n" | 2017 "vzeroupper \n" |
1925 : [y_buf]"+r"(y_buf), // %[y_buf] | 2018 : [y_buf]"+r"(y_buf), // %[y_buf] |
1926 [u_buf]"+r"(u_buf), // %[u_buf] | 2019 [u_buf]"+r"(u_buf), // %[u_buf] |
1927 [v_buf]"+r"(v_buf), // %[v_buf] | 2020 [v_buf]"+r"(v_buf), // %[v_buf] |
1928 [dst_argb]"+r"(dst_argb), // %[dst_argb] | 2021 [dst_argb]"+r"(dst_argb), // %[dst_argb] |
1929 [width]"+rm"(width) // %[width] | 2022 [width]"+rm"(width) // %[width] |
1930 : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] | 2023 : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] |
1931 : "memory", "cc", NACL_R14 | 2024 : "memory", "cc", NACL_R14 |
1932 "xmm0", "xmm1", "xmm2", "xmm3", "xmm5" | 2025 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" |
1933 ); | 2026 ); |
1934 } | 2027 } |
1935 #endif // HAS_I422TORGBAROW_AVX2 | 2028 #endif // HAS_I422TORGBAROW_AVX2 |
1936 | 2029 |
1937 #ifdef HAS_I400TOARGBROW_SSE2 | 2030 #ifdef HAS_I400TOARGBROW_SSE2 |
1938 void I400ToARGBRow_SSE2(const uint8* y_buf, uint8* dst_argb, int width) { | 2031 void I400ToARGBRow_SSE2(const uint8* y_buf, uint8* dst_argb, int width) { |
1939 asm volatile ( | 2032 asm volatile ( |
1940 "mov $0x4a354a35,%%eax \n" // 4a35 = 18997 = 1.164 | 2033 "mov $0x4a354a35,%%eax \n" // 4a35 = 18997 = 1.164 |
1941 "movd %%eax,%%xmm2 \n" | 2034 "movd %%eax,%%xmm2 \n" |
1942 "pshufd $0x0,%%xmm2,%%xmm2 \n" | 2035 "pshufd $0x0,%%xmm2,%%xmm2 \n" |
(...skipping 3258 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
5201 ); | 5294 ); |
5202 } | 5295 } |
5203 #endif // HAS_ARGBLUMACOLORTABLEROW_SSSE3 | 5296 #endif // HAS_ARGBLUMACOLORTABLEROW_SSSE3 |
5204 | 5297 |
5205 #endif // defined(__x86_64__) || defined(__i386__) | 5298 #endif // defined(__x86_64__) || defined(__i386__) |
5206 | 5299 |
5207 #ifdef __cplusplus | 5300 #ifdef __cplusplus |
5208 } // extern "C" | 5301 } // extern "C" |
5209 } // namespace libyuv | 5302 } // namespace libyuv |
5210 #endif | 5303 #endif |
OLD | NEW |