Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(15)

Side by Side Diff: source/row_gcc.cc

Issue 1363503002: yuvconstants for all YUV to RGB conversion functions. (Closed) Base URL: https://chromium.googlesource.com/libyuv/libyuv@master
Patch Set: mips dspr2 add constants parameter Created 5 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « source/row_common.cc ('k') | source/row_mips.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // VERSION 2 1 // VERSION 2
2 /* 2 /*
3 * Copyright 2011 The LibYuv Project Authors. All rights reserved. 3 * Copyright 2011 The LibYuv Project Authors. All rights reserved.
4 * 4 *
5 * Use of this source code is governed by a BSD-style license 5 * Use of this source code is governed by a BSD-style license
6 * that can be found in the LICENSE file in the root of the source 6 * that can be found in the LICENSE file in the root of the source
7 * tree. An additional intellectual property rights grant can be found 7 * tree. An additional intellectual property rights grant can be found
8 * in the file PATENTS. All contributing project authors may 8 * in the file PATENTS. All contributing project authors may
9 * be found in the AUTHORS file in the root of the source tree. 9 * be found in the AUTHORS file in the root of the source tree.
10 */ 10 */
(...skipping 1332 matching lines...) Expand 10 before | Expand all | Expand 10 after
1343 "punpcklwd %%xmm0,%%xmm0 \n" \ 1343 "punpcklwd %%xmm0,%%xmm0 \n" \
1344 "punpckldq %%xmm0,%%xmm0 \n" 1344 "punpckldq %%xmm0,%%xmm0 \n"
1345 1345
1346 // Read 4 UV from NV12, upsample to 8 UV 1346 // Read 4 UV from NV12, upsample to 8 UV
1347 #define READNV12 \ 1347 #define READNV12 \
1348 "movq " MEMACCESS([uv_buf]) ",%%xmm0 \n" \ 1348 "movq " MEMACCESS([uv_buf]) ",%%xmm0 \n" \
1349 "lea " MEMLEA(0x8, [uv_buf]) ",%[uv_buf] \n" \ 1349 "lea " MEMLEA(0x8, [uv_buf]) ",%[uv_buf] \n" \
1350 "punpcklwd %%xmm0,%%xmm0 \n" 1350 "punpcklwd %%xmm0,%%xmm0 \n"
1351 1351
1352 // Convert 8 pixels: 8 UV and 8 Y 1352 // Convert 8 pixels: 8 UV and 8 Y
1353 #define YUVTORGB(YuvConstants) \ 1353 #define YUVTORGB(yuvconstants) \
1354 "movdqa %%xmm0,%%xmm1 \n" \ 1354 "movdqa %%xmm0,%%xmm1 \n" \
1355 "movdqa %%xmm0,%%xmm2 \n" \ 1355 "movdqa %%xmm0,%%xmm2 \n" \
1356 "movdqa %%xmm0,%%xmm3 \n" \ 1356 "movdqa %%xmm0,%%xmm3 \n" \
1357 "movdqa " MEMACCESS2(96, [YuvConstants]) ",%%xmm0 \n" \ 1357 "movdqa " MEMACCESS2(96, [yuvconstants]) ",%%xmm0 \n" \
1358 "pmaddubsw " MEMACCESS([YuvConstants]) ",%%xmm1 \n" \ 1358 "pmaddubsw " MEMACCESS([yuvconstants]) ",%%xmm1 \n" \
1359 "psubw %%xmm1,%%xmm0 \n" \ 1359 "psubw %%xmm1,%%xmm0 \n" \
1360 "movdqa " MEMACCESS2(128, [YuvConstants]) ",%%xmm1 \n" \ 1360 "movdqa " MEMACCESS2(128, [yuvconstants]) ",%%xmm1 \n" \
1361 "pmaddubsw " MEMACCESS2(32, [YuvConstants]) ",%%xmm2 \n" \ 1361 "pmaddubsw " MEMACCESS2(32, [yuvconstants]) ",%%xmm2 \n" \
1362 "psubw %%xmm2,%%xmm1 \n" \ 1362 "psubw %%xmm2,%%xmm1 \n" \
1363 "movdqa " MEMACCESS2(160, [YuvConstants]) ",%%xmm2 \n" \ 1363 "movdqa " MEMACCESS2(160, [yuvconstants]) ",%%xmm2 \n" \
1364 "pmaddubsw " MEMACCESS2(64, [YuvConstants]) ",%%xmm3 \n" \ 1364 "pmaddubsw " MEMACCESS2(64, [yuvconstants]) ",%%xmm3 \n" \
1365 "psubw %%xmm3,%%xmm2 \n" \ 1365 "psubw %%xmm3,%%xmm2 \n" \
1366 "movq " MEMACCESS([y_buf]) ",%%xmm3 \n" \ 1366 "movq " MEMACCESS([y_buf]) ",%%xmm3 \n" \
1367 "lea " MEMLEA(0x8, [y_buf]) ",%[y_buf] \n" \ 1367 "lea " MEMLEA(0x8, [y_buf]) ",%[y_buf] \n" \
1368 "punpcklbw %%xmm3,%%xmm3 \n" \ 1368 "punpcklbw %%xmm3,%%xmm3 \n" \
1369 "pmulhuw " MEMACCESS2(192, [YuvConstants]) ",%%xmm3 \n" \ 1369 "pmulhuw " MEMACCESS2(192, [yuvconstants]) ",%%xmm3 \n" \
1370 "paddsw %%xmm3,%%xmm0 \n" \ 1370 "paddsw %%xmm3,%%xmm0 \n" \
1371 "paddsw %%xmm3,%%xmm1 \n" \ 1371 "paddsw %%xmm3,%%xmm1 \n" \
1372 "paddsw %%xmm3,%%xmm2 \n" \ 1372 "paddsw %%xmm3,%%xmm2 \n" \
1373 "psraw $0x6,%%xmm0 \n" \ 1373 "psraw $0x6,%%xmm0 \n" \
1374 "psraw $0x6,%%xmm1 \n" \ 1374 "psraw $0x6,%%xmm1 \n" \
1375 "psraw $0x6,%%xmm2 \n" \ 1375 "psraw $0x6,%%xmm2 \n" \
1376 "packuswb %%xmm0,%%xmm0 \n" \ 1376 "packuswb %%xmm0,%%xmm0 \n" \
1377 "packuswb %%xmm1,%%xmm1 \n" \ 1377 "packuswb %%xmm1,%%xmm1 \n" \
1378 "packuswb %%xmm2,%%xmm2 \n" 1378 "packuswb %%xmm2,%%xmm2 \n"
1379 1379
(...skipping 36 matching lines...) Expand 10 before | Expand all | Expand 10 after
1416 "pcmpeqb %%xmm5,%%xmm5 \n" \ 1416 "pcmpeqb %%xmm5,%%xmm5 \n" \
1417 "punpcklbw %%xmm2,%%xmm1 \n" \ 1417 "punpcklbw %%xmm2,%%xmm1 \n" \
1418 "punpcklbw %%xmm0,%%xmm5 \n" \ 1418 "punpcklbw %%xmm0,%%xmm5 \n" \
1419 "movdqa %%xmm5,%%xmm0 \n" \ 1419 "movdqa %%xmm5,%%xmm0 \n" \
1420 "punpcklwd %%xmm1,%%xmm5 \n" \ 1420 "punpcklwd %%xmm1,%%xmm5 \n" \
1421 "punpckhwd %%xmm1,%%xmm0 \n" \ 1421 "punpckhwd %%xmm1,%%xmm0 \n" \
1422 "movdqu %%xmm5," MEMACCESS([dst_rgba]) " \n" \ 1422 "movdqu %%xmm5," MEMACCESS([dst_rgba]) " \n" \
1423 "movdqu %%xmm0," MEMACCESS2(0x10, [dst_rgba]) " \n" \ 1423 "movdqu %%xmm0," MEMACCESS2(0x10, [dst_rgba]) " \n" \
1424 "lea " MEMLEA(0x20, [dst_rgba]) ",%[dst_rgba] \n" 1424 "lea " MEMLEA(0x20, [dst_rgba]) ",%[dst_rgba] \n"
1425 1425
1426 void OMITFP I444ToARGBMatrixRow_SSSE3(const uint8* y_buf, 1426 void OMITFP I444ToARGBRow_SSSE3(const uint8* y_buf,
1427 const uint8* u_buf, 1427 const uint8* u_buf,
1428 const uint8* v_buf, 1428 const uint8* v_buf,
1429 uint8* dst_argb, 1429 uint8* dst_argb,
1430 struct YuvConstants* YuvConstants, 1430 struct YuvConstants* yuvconstants,
1431 int width) { 1431 int width) {
1432 asm volatile ( 1432 asm volatile (
1433 "sub %[u_buf],%[v_buf] \n" 1433 "sub %[u_buf],%[v_buf] \n"
1434 "pcmpeqb %%xmm5,%%xmm5 \n" 1434 "pcmpeqb %%xmm5,%%xmm5 \n"
1435 LABELALIGN 1435 LABELALIGN
1436 "1: \n" 1436 "1: \n"
1437 READYUV444 1437 READYUV444
1438 YUVTORGB(YuvConstants) 1438 YUVTORGB(yuvconstants)
1439 STOREARGB 1439 STOREARGB
1440 "sub $0x8,%[width] \n" 1440 "sub $0x8,%[width] \n"
1441 "jg 1b \n" 1441 "jg 1b \n"
1442 : [y_buf]"+r"(y_buf), // %[y_buf] 1442 : [y_buf]"+r"(y_buf), // %[y_buf]
1443 [u_buf]"+r"(u_buf), // %[u_buf] 1443 [u_buf]"+r"(u_buf), // %[u_buf]
1444 [v_buf]"+r"(v_buf), // %[v_buf] 1444 [v_buf]"+r"(v_buf), // %[v_buf]
1445 [dst_argb]"+r"(dst_argb), // %[dst_argb] 1445 [dst_argb]"+r"(dst_argb), // %[dst_argb]
1446 [width]"+rm"(width) // %[width] 1446 [width]"+rm"(width) // %[width]
1447 : [YuvConstants]"r"(YuvConstants) // %[kYuvConstants] 1447 : [yuvconstants]"r"(yuvconstants) // %[yuvconstants]
1448 : "memory", "cc", NACL_R14 1448 : "memory", "cc", NACL_R14
1449 "xmm0", "xmm1", "xmm2", "xmm3", "xmm5" 1449 "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
1450 ); 1450 );
1451 } 1451 }
1452 1452
1453 void OMITFP I444ToABGRMatrixRow_SSSE3(const uint8* y_buf, 1453 void OMITFP I444ToABGRRow_SSSE3(const uint8* y_buf,
1454 const uint8* u_buf, 1454 const uint8* u_buf,
1455 const uint8* v_buf, 1455 const uint8* v_buf,
1456 uint8* dst_abgr, 1456 uint8* dst_abgr,
1457 struct YuvConstants* YuvConstants, 1457 struct YuvConstants* yuvconstants,
1458 int width) { 1458 int width) {
1459 asm volatile ( 1459 asm volatile (
1460 "sub %[u_buf],%[v_buf] \n" 1460 "sub %[u_buf],%[v_buf] \n"
1461 "pcmpeqb %%xmm5,%%xmm5 \n" 1461 "pcmpeqb %%xmm5,%%xmm5 \n"
1462 LABELALIGN 1462 LABELALIGN
1463 "1: \n" 1463 "1: \n"
1464 READYUV444 1464 READYUV444
1465 YUVTORGB(YuvConstants) 1465 YUVTORGB(yuvconstants)
1466 STOREABGR 1466 STOREABGR
1467 "sub $0x8,%[width] \n" 1467 "sub $0x8,%[width] \n"
1468 "jg 1b \n" 1468 "jg 1b \n"
1469 : [y_buf]"+r"(y_buf), // %[y_buf] 1469 : [y_buf]"+r"(y_buf), // %[y_buf]
1470 [u_buf]"+r"(u_buf), // %[u_buf] 1470 [u_buf]"+r"(u_buf), // %[u_buf]
1471 [v_buf]"+r"(v_buf), // %[v_buf] 1471 [v_buf]"+r"(v_buf), // %[v_buf]
1472 [dst_abgr]"+r"(dst_abgr), // %[dst_abgr] 1472 [dst_abgr]"+r"(dst_abgr), // %[dst_abgr]
1473 [width]"+rm"(width) // %[width] 1473 [width]"+rm"(width) // %[width]
1474 : [YuvConstants]"r"(YuvConstants) // %[kYuvConstants] 1474 : [yuvconstants]"r"(yuvconstants) // %[yuvconstants]
1475 : "memory", "cc", NACL_R14 1475 : "memory", "cc", NACL_R14
1476 "xmm0", "xmm1", "xmm2", "xmm3", "xmm5" 1476 "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
1477 ); 1477 );
1478 } 1478 }
1479 1479
1480 // TODO(fbarchard): Consider putting masks into constants. 1480 // TODO(fbarchard): Consider putting masks into constants.
1481 void OMITFP I422ToRGB24Row_SSSE3(const uint8* y_buf, 1481 void OMITFP I422ToRGB24Row_SSSE3(const uint8* y_buf,
1482 const uint8* u_buf, 1482 const uint8* u_buf,
1483 const uint8* v_buf, 1483 const uint8* v_buf,
1484 uint8* dst_rgb24, 1484 uint8* dst_rgb24,
1485 struct YuvConstants* yuvconstants,
1485 int width) { 1486 int width) {
1486 asm volatile ( 1487 asm volatile (
1487 "movdqa %[kShuffleMaskARGBToRGB24_0],%%xmm5 \n" 1488 "movdqa %[kShuffleMaskARGBToRGB24_0],%%xmm5 \n"
1488 "movdqa %[kShuffleMaskARGBToRGB24],%%xmm6 \n" 1489 "movdqa %[kShuffleMaskARGBToRGB24],%%xmm6 \n"
1489 "sub %[u_buf],%[v_buf] \n" 1490 "sub %[u_buf],%[v_buf] \n"
1490 LABELALIGN 1491 LABELALIGN
1491 "1: \n" 1492 "1: \n"
1492 READYUV422 1493 READYUV422
1493 YUVTORGB(kYuvConstants) 1494 YUVTORGB(yuvconstants)
1494 "punpcklbw %%xmm1,%%xmm0 \n" 1495 "punpcklbw %%xmm1,%%xmm0 \n"
1495 "punpcklbw %%xmm2,%%xmm2 \n" 1496 "punpcklbw %%xmm2,%%xmm2 \n"
1496 "movdqa %%xmm0,%%xmm1 \n" 1497 "movdqa %%xmm0,%%xmm1 \n"
1497 "punpcklwd %%xmm2,%%xmm0 \n" 1498 "punpcklwd %%xmm2,%%xmm0 \n"
1498 "punpckhwd %%xmm2,%%xmm1 \n" 1499 "punpckhwd %%xmm2,%%xmm1 \n"
1499 "pshufb %%xmm5,%%xmm0 \n" 1500 "pshufb %%xmm5,%%xmm0 \n"
1500 "pshufb %%xmm6,%%xmm1 \n" 1501 "pshufb %%xmm6,%%xmm1 \n"
1501 "palignr $0xc,%%xmm0,%%xmm1 \n" 1502 "palignr $0xc,%%xmm0,%%xmm1 \n"
1502 "movq %%xmm0," MEMACCESS([dst_rgb24]) "\n" 1503 "movq %%xmm0," MEMACCESS([dst_rgb24]) "\n"
1503 "movdqu %%xmm1," MEMACCESS2(0x8,[dst_rgb24]) "\n" 1504 "movdqu %%xmm1," MEMACCESS2(0x8,[dst_rgb24]) "\n"
1504 "lea " MEMLEA(0x18,[dst_rgb24]) ",%[dst_rgb24] \n" 1505 "lea " MEMLEA(0x18,[dst_rgb24]) ",%[dst_rgb24] \n"
1505 "subl $0x8,%[width] \n" 1506 "subl $0x8,%[width] \n"
1506 "jg 1b \n" 1507 "jg 1b \n"
1507 : [y_buf]"+r"(y_buf), // %[y_buf] 1508 : [y_buf]"+r"(y_buf), // %[y_buf]
1508 [u_buf]"+r"(u_buf), // %[u_buf] 1509 [u_buf]"+r"(u_buf), // %[u_buf]
1509 [v_buf]"+r"(v_buf), // %[v_buf] 1510 [v_buf]"+r"(v_buf), // %[v_buf]
1510 [dst_rgb24]"+r"(dst_rgb24), // %[dst_rgb24] 1511 [dst_rgb24]"+r"(dst_rgb24), // %[dst_rgb24]
1511 // TODO(fbarchard): Make width a register for 32 bit. 1512 // TODO(fbarchard): Make width a register for 32 bit.
1512 #if defined(__i386__) && defined(__pic__) 1513 #if defined(__i386__) && defined(__pic__)
1513 [width]"+m"(width) // %[width] 1514 [width]"+m"(width) // %[width]
1514 #else 1515 #else
1515 [width]"+rm"(width) // %[width] 1516 [width]"+rm"(width) // %[width]
1516 #endif 1517 #endif
1517 : [kYuvConstants]"r"(&kYuvConstants.kUVToB), 1518 : [yuvconstants]"r"(yuvconstants), // %[yuvconstants]
1518 [kShuffleMaskARGBToRGB24_0]"m"(kShuffleMaskARGBToRGB24_0), 1519 [kShuffleMaskARGBToRGB24_0]"m"(kShuffleMaskARGBToRGB24_0),
1519 [kShuffleMaskARGBToRGB24]"m"(kShuffleMaskARGBToRGB24) 1520 [kShuffleMaskARGBToRGB24]"m"(kShuffleMaskARGBToRGB24)
1520 : "memory", "cc", NACL_R14 1521 : "memory", "cc", NACL_R14
1521 "xmm0", "xmm1", "xmm2", "xmm3", "xmm5", "xmm6" 1522 "xmm0", "xmm1", "xmm2", "xmm3", "xmm5", "xmm6"
1522 ); 1523 );
1523 } 1524 }
1524 1525
1525 void OMITFP I422ToRAWRow_SSSE3(const uint8* y_buf, 1526 void OMITFP I422ToRAWRow_SSSE3(const uint8* y_buf,
1526 const uint8* u_buf, 1527 const uint8* u_buf,
1527 const uint8* v_buf, 1528 const uint8* v_buf,
1528 uint8* dst_raw, 1529 uint8* dst_raw,
1530 struct YuvConstants* yuvconstants,
1529 int width) { 1531 int width) {
1530 asm volatile ( 1532 asm volatile (
1531 "movdqa %[kShuffleMaskARGBToRAW_0],%%xmm5 \n" 1533 "movdqa %[kShuffleMaskARGBToRAW_0],%%xmm5 \n"
1532 "movdqa %[kShuffleMaskARGBToRAW],%%xmm6 \n" 1534 "movdqa %[kShuffleMaskARGBToRAW],%%xmm6 \n"
1533 "sub %[u_buf],%[v_buf] \n" 1535 "sub %[u_buf],%[v_buf] \n"
1534 LABELALIGN 1536 LABELALIGN
1535 "1: \n" 1537 "1: \n"
1536 READYUV422 1538 READYUV422
1537 YUVTORGB(kYuvConstants) 1539 YUVTORGB(yuvconstants)
1538 "punpcklbw %%xmm1,%%xmm0 \n" 1540 "punpcklbw %%xmm1,%%xmm0 \n"
1539 "punpcklbw %%xmm2,%%xmm2 \n" 1541 "punpcklbw %%xmm2,%%xmm2 \n"
1540 "movdqa %%xmm0,%%xmm1 \n" 1542 "movdqa %%xmm0,%%xmm1 \n"
1541 "punpcklwd %%xmm2,%%xmm0 \n" 1543 "punpcklwd %%xmm2,%%xmm0 \n"
1542 "punpckhwd %%xmm2,%%xmm1 \n" 1544 "punpckhwd %%xmm2,%%xmm1 \n"
1543 "pshufb %%xmm5,%%xmm0 \n" 1545 "pshufb %%xmm5,%%xmm0 \n"
1544 "pshufb %%xmm6,%%xmm1 \n" 1546 "pshufb %%xmm6,%%xmm1 \n"
1545 "palignr $0xc,%%xmm0,%%xmm1 \n" 1547 "palignr $0xc,%%xmm0,%%xmm1 \n"
1546 "movq %%xmm0," MEMACCESS([dst_raw]) " \n" 1548 "movq %%xmm0," MEMACCESS([dst_raw]) " \n"
1547 "movdqu %%xmm1," MEMACCESS2(0x8,[dst_raw]) "\n" 1549 "movdqu %%xmm1," MEMACCESS2(0x8,[dst_raw]) "\n"
1548 "lea " MEMLEA(0x18,[dst_raw]) ",%[dst_raw] \n" 1550 "lea " MEMLEA(0x18,[dst_raw]) ",%[dst_raw] \n"
1549 "subl $0x8,%[width] \n" 1551 "subl $0x8,%[width] \n"
1550 "jg 1b \n" 1552 "jg 1b \n"
1551 : [y_buf]"+r"(y_buf), // %[y_buf] 1553 : [y_buf]"+r"(y_buf), // %[y_buf]
1552 [u_buf]"+r"(u_buf), // %[u_buf] 1554 [u_buf]"+r"(u_buf), // %[u_buf]
1553 [v_buf]"+r"(v_buf), // %[v_buf] 1555 [v_buf]"+r"(v_buf), // %[v_buf]
1554 [dst_raw]"+r"(dst_raw), // %[dst_raw] 1556 [dst_raw]"+r"(dst_raw), // %[dst_raw]
1555 // TODO(fbarchard): Make width a register for 32 bit. 1557 // TODO(fbarchard): Make width a register for 32 bit.
1556 #if defined(__i386__) && defined(__pic__) 1558 #if defined(__i386__) && defined(__pic__)
1557 [width]"+m"(width) // %[width] 1559 [width]"+m"(width) // %[width]
1558 #else 1560 #else
1559 [width]"+rm"(width) // %[width] 1561 [width]"+rm"(width) // %[width]
1560 #endif 1562 #endif
1561 : [kYuvConstants]"r"(&kYuvConstants.kUVToB), 1563 : [yuvconstants]"r"(yuvconstants), // %[yuvconstants]
1562 [kShuffleMaskARGBToRAW_0]"m"(kShuffleMaskARGBToRAW_0), 1564 [kShuffleMaskARGBToRAW_0]"m"(kShuffleMaskARGBToRAW_0),
1563 [kShuffleMaskARGBToRAW]"m"(kShuffleMaskARGBToRAW) 1565 [kShuffleMaskARGBToRAW]"m"(kShuffleMaskARGBToRAW)
1564 : "memory", "cc", NACL_R14 1566 : "memory", "cc", NACL_R14
1565 "xmm0", "xmm1", "xmm2", "xmm3", "xmm5", "xmm6" 1567 "xmm0", "xmm1", "xmm2", "xmm3", "xmm5", "xmm6"
1566 ); 1568 );
1567 } 1569 }
1568 1570
1569 void OMITFP I422ToARGBMatrixRow_SSSE3(const uint8* y_buf, 1571 void OMITFP I422ToARGBRow_SSSE3(const uint8* y_buf,
1570 const uint8* u_buf, 1572 const uint8* u_buf,
1571 const uint8* v_buf, 1573 const uint8* v_buf,
1572 uint8* dst_argb, 1574 uint8* dst_argb,
1573 struct YuvConstants* YuvConstants, 1575 struct YuvConstants* yuvconstants,
1574 int width) { 1576 int width) {
1575 asm volatile ( 1577 asm volatile (
1576 "sub %[u_buf],%[v_buf] \n" 1578 "sub %[u_buf],%[v_buf] \n"
1577 "pcmpeqb %%xmm5,%%xmm5 \n" 1579 "pcmpeqb %%xmm5,%%xmm5 \n"
1578 LABELALIGN 1580 LABELALIGN
1579 "1: \n" 1581 "1: \n"
1580 READYUV422 1582 READYUV422
1581 YUVTORGB(YuvConstants) 1583 YUVTORGB(yuvconstants)
1582 STOREARGB 1584 STOREARGB
1583 "sub $0x8,%[width] \n" 1585 "sub $0x8,%[width] \n"
1584 "jg 1b \n" 1586 "jg 1b \n"
1585 : [y_buf]"+r"(y_buf), // %[y_buf] 1587 : [y_buf]"+r"(y_buf), // %[y_buf]
1586 [u_buf]"+r"(u_buf), // %[u_buf] 1588 [u_buf]"+r"(u_buf), // %[u_buf]
1587 [v_buf]"+r"(v_buf), // %[v_buf] 1589 [v_buf]"+r"(v_buf), // %[v_buf]
1588 [dst_argb]"+r"(dst_argb), // %[dst_argb] 1590 [dst_argb]"+r"(dst_argb), // %[dst_argb]
1589 [width]"+rm"(width) // %[width] 1591 [width]"+rm"(width) // %[width]
1590 : [YuvConstants]"r"(YuvConstants) // %[kYuvConstants] 1592 : [yuvconstants]"r"(yuvconstants) // %[yuvconstants]
1591 : "memory", "cc", NACL_R14 1593 : "memory", "cc", NACL_R14
1592 "xmm0", "xmm1", "xmm2", "xmm3", "xmm5" 1594 "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
1593 ); 1595 );
1594 } 1596 }
1595 1597
1596 void OMITFP I411ToARGBRow_SSSE3(const uint8* y_buf, 1598 void OMITFP I411ToARGBRow_SSSE3(const uint8* y_buf,
1597 const uint8* u_buf, 1599 const uint8* u_buf,
1598 const uint8* v_buf, 1600 const uint8* v_buf,
1599 uint8* dst_argb, 1601 uint8* dst_argb,
1602 struct YuvConstants* yuvconstants,
1600 int width) { 1603 int width) {
1601 asm volatile ( 1604 asm volatile (
1602 "sub %[u_buf],%[v_buf] \n" 1605 "sub %[u_buf],%[v_buf] \n"
1603 "pcmpeqb %%xmm5,%%xmm5 \n" 1606 "pcmpeqb %%xmm5,%%xmm5 \n"
1604 LABELALIGN 1607 LABELALIGN
1605 "1: \n" 1608 "1: \n"
1606 READYUV411 1609 READYUV411
1607 YUVTORGB(kYuvConstants) 1610 YUVTORGB(yuvconstants)
1608 STOREARGB 1611 STOREARGB
1609 "sub $0x8,%[width] \n" 1612 "sub $0x8,%[width] \n"
1610 "jg 1b \n" 1613 "jg 1b \n"
1611 : [y_buf]"+r"(y_buf), // %[y_buf] 1614 : [y_buf]"+r"(y_buf), // %[y_buf]
1612 [u_buf]"+r"(u_buf), // %[u_buf] 1615 [u_buf]"+r"(u_buf), // %[u_buf]
1613 [v_buf]"+r"(v_buf), // %[v_buf] 1616 [v_buf]"+r"(v_buf), // %[v_buf]
1614 [dst_argb]"+r"(dst_argb), // %[dst_argb] 1617 [dst_argb]"+r"(dst_argb), // %[dst_argb]
1615 [width]"+rm"(width) // %[width] 1618 [width]"+rm"(width) // %[width]
1616 : [kYuvConstants]"r"(&kYuvConstants.kUVToB) // %[kYuvConstants] 1619 : [yuvconstants]"r"(yuvconstants) // %[yuvconstants]
1617 : "memory", "cc", NACL_R14 1620 : "memory", "cc", NACL_R14
1618 "xmm0", "xmm1", "xmm2", "xmm3", "xmm5" 1621 "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
1619 ); 1622 );
1620 } 1623 }
1621 1624
1622 void OMITFP NV12ToARGBRow_SSSE3(const uint8* y_buf, 1625 void OMITFP NV12ToARGBRow_SSSE3(const uint8* y_buf,
1623 const uint8* uv_buf, 1626 const uint8* uv_buf,
1624 uint8* dst_argb, 1627 uint8* dst_argb,
1628 struct YuvConstants* yuvconstants,
1625 int width) { 1629 int width) {
1626 asm volatile ( 1630 asm volatile (
1627 "pcmpeqb %%xmm5,%%xmm5 \n" 1631 "pcmpeqb %%xmm5,%%xmm5 \n"
1628 LABELALIGN 1632 LABELALIGN
1629 "1: \n" 1633 "1: \n"
1630 READNV12 1634 READNV12
1631 YUVTORGB(kYuvConstants) 1635 YUVTORGB(yuvconstants)
1632 STOREARGB 1636 STOREARGB
1633 "sub $0x8,%[width] \n" 1637 "sub $0x8,%[width] \n"
1634 "jg 1b \n" 1638 "jg 1b \n"
1635 : [y_buf]"+r"(y_buf), // %[y_buf]
1636 [uv_buf]"+r"(uv_buf), // %[uv_buf]
1637 [dst_argb]"+r"(dst_argb), // %[dst_argb]
1638 [width]"+rm"(width) // %[width]
1639 : [kYuvConstants]"r"(&kYuvConstants.kUVToB) // %[kYuvConstants]
1640 // Does not use r14.
1641 : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
1642 );
1643 }
1644
1645 void OMITFP NV21ToARGBRow_SSSE3(const uint8* y_buf,
1646 const uint8* uv_buf,
1647 uint8* dst_argb,
1648 int width) {
1649 asm volatile (
1650 "pcmpeqb %%xmm5,%%xmm5 \n"
1651 LABELALIGN
1652 "1: \n"
1653 READNV12
1654 YUVTORGB(kYuvConstants)
1655 STOREARGB
1656 "sub $0x8,%[width] \n"
1657 "jg 1b \n"
1658 : [y_buf]"+r"(y_buf), // %[y_buf] 1639 : [y_buf]"+r"(y_buf), // %[y_buf]
1659 [uv_buf]"+r"(uv_buf), // %[uv_buf] 1640 [uv_buf]"+r"(uv_buf), // %[uv_buf]
1660 [dst_argb]"+r"(dst_argb), // %[dst_argb] 1641 [dst_argb]"+r"(dst_argb), // %[dst_argb]
1661 [width]"+rm"(width) // %[width] 1642 [width]"+rm"(width) // %[width]
1662 : [kYuvConstants]"r"(&kYvuConstants.kUVToB) // %[kYuvConstants] 1643 : [yuvconstants]"r"(yuvconstants) // %[yuvconstants]
1663 // Does not use r14. 1644 // Does not use r14.
1664 : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm5" 1645 : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
1665 ); 1646 );
1666 } 1647 }
1667 1648
1668 void OMITFP I422ToBGRARow_SSSE3(const uint8* y_buf, 1649 void OMITFP I422ToBGRARow_SSSE3(const uint8* y_buf,
1669 const uint8* u_buf, 1650 const uint8* u_buf,
1670 const uint8* v_buf, 1651 const uint8* v_buf,
1671 uint8* dst_bgra, 1652 uint8* dst_bgra,
1653 struct YuvConstants* yuvconstants,
1672 int width) { 1654 int width) {
1673 asm volatile ( 1655 asm volatile (
1674 "sub %[u_buf],%[v_buf] \n" 1656 "sub %[u_buf],%[v_buf] \n"
1675 "pcmpeqb %%xmm5,%%xmm5 \n" 1657 "pcmpeqb %%xmm5,%%xmm5 \n"
1676 LABELALIGN 1658 LABELALIGN
1677 "1: \n" 1659 "1: \n"
1678 READYUV422 1660 READYUV422
1679 YUVTORGB(kYuvConstants) 1661 YUVTORGB(yuvconstants)
1680 STOREBGRA 1662 STOREBGRA
1681 "sub $0x8,%[width] \n" 1663 "sub $0x8,%[width] \n"
1682 "jg 1b \n" 1664 "jg 1b \n"
1683 : [y_buf]"+r"(y_buf), // %[y_buf] 1665 : [y_buf]"+r"(y_buf), // %[y_buf]
1684 [u_buf]"+r"(u_buf), // %[u_buf] 1666 [u_buf]"+r"(u_buf), // %[u_buf]
1685 [v_buf]"+r"(v_buf), // %[v_buf] 1667 [v_buf]"+r"(v_buf), // %[v_buf]
1686 [dst_bgra]"+r"(dst_bgra), // %[dst_bgra] 1668 [dst_bgra]"+r"(dst_bgra), // %[dst_bgra]
1687 [width]"+rm"(width) // %[width] 1669 [width]"+rm"(width) // %[width]
1688 : [kYuvConstants]"r"(&kYuvConstants.kUVToB) // %[kYuvConstants] 1670 : [yuvconstants]"r"(yuvconstants) // %[yuvconstants]
1689 : "memory", "cc", NACL_R14 1671 : "memory", "cc", NACL_R14
1690 "xmm0", "xmm1", "xmm2", "xmm3", "xmm5" 1672 "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
1691 ); 1673 );
1692 } 1674 }
1693 1675
1694 void OMITFP I422ToABGRMatrixRow_SSSE3(const uint8* y_buf, 1676 void OMITFP I422ToABGRRow_SSSE3(const uint8* y_buf,
1695 const uint8* u_buf, 1677 const uint8* u_buf,
1696 const uint8* v_buf, 1678 const uint8* v_buf,
1697 uint8* dst_abgr, 1679 uint8* dst_abgr,
1698 struct YuvConstants* YuvConstants, 1680 struct YuvConstants* yuvconstants,
1699 int width) { 1681 int width) {
1700 asm volatile ( 1682 asm volatile (
1701 "sub %[u_buf],%[v_buf] \n" 1683 "sub %[u_buf],%[v_buf] \n"
1702 "pcmpeqb %%xmm5,%%xmm5 \n" 1684 "pcmpeqb %%xmm5,%%xmm5 \n"
1703 LABELALIGN 1685 LABELALIGN
1704 "1: \n" 1686 "1: \n"
1705 READYUV422 1687 READYUV422
1706 YUVTORGB(kYuvConstants) 1688 YUVTORGB(yuvconstants)
1707 STOREABGR 1689 STOREABGR
1708 "sub $0x8,%[width] \n" 1690 "sub $0x8,%[width] \n"
1709 "jg 1b \n" 1691 "jg 1b \n"
1710 : [y_buf]"+r"(y_buf), // %[y_buf] 1692 : [y_buf]"+r"(y_buf), // %[y_buf]
1711 [u_buf]"+r"(u_buf), // %[u_buf] 1693 [u_buf]"+r"(u_buf), // %[u_buf]
1712 [v_buf]"+r"(v_buf), // %[v_buf] 1694 [v_buf]"+r"(v_buf), // %[v_buf]
1713 [dst_abgr]"+r"(dst_abgr), // %[dst_abgr] 1695 [dst_abgr]"+r"(dst_abgr), // %[dst_abgr]
1714 [width]"+rm"(width) // %[width] 1696 [width]"+rm"(width) // %[width]
1715 : [kYuvConstants]"r"(YuvConstants) // %[kYuvConstants] 1697 : [yuvconstants]"r"(yuvconstants) // %[yuvconstants]
1716 : "memory", "cc", NACL_R14 1698 : "memory", "cc", NACL_R14
1717 "xmm0", "xmm1", "xmm2", "xmm3", "xmm5" 1699 "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
1718 ); 1700 );
1719 } 1701 }
1720 1702
1721 void OMITFP I422ToRGBARow_SSSE3(const uint8* y_buf, 1703 void OMITFP I422ToRGBARow_SSSE3(const uint8* y_buf,
1722 const uint8* u_buf, 1704 const uint8* u_buf,
1723 const uint8* v_buf, 1705 const uint8* v_buf,
1724 uint8* dst_rgba, 1706 uint8* dst_rgba,
1707 struct YuvConstants* yuvconstants,
1725 int width) { 1708 int width) {
1726 asm volatile ( 1709 asm volatile (
1727 "sub %[u_buf],%[v_buf] \n" 1710 "sub %[u_buf],%[v_buf] \n"
1728 "pcmpeqb %%xmm5,%%xmm5 \n" 1711 "pcmpeqb %%xmm5,%%xmm5 \n"
1729 LABELALIGN 1712 LABELALIGN
1730 "1: \n" 1713 "1: \n"
1731 READYUV422 1714 READYUV422
1732 YUVTORGB(kYuvConstants) 1715 YUVTORGB(yuvconstants)
1733 STORERGBA 1716 STORERGBA
1734 "sub $0x8,%[width] \n" 1717 "sub $0x8,%[width] \n"
1735 "jg 1b \n" 1718 "jg 1b \n"
1736 : [y_buf]"+r"(y_buf), // %[y_buf] 1719 : [y_buf]"+r"(y_buf), // %[y_buf]
1737 [u_buf]"+r"(u_buf), // %[u_buf] 1720 [u_buf]"+r"(u_buf), // %[u_buf]
1738 [v_buf]"+r"(v_buf), // %[v_buf] 1721 [v_buf]"+r"(v_buf), // %[v_buf]
1739 [dst_rgba]"+r"(dst_rgba), // %[dst_rgba] 1722 [dst_rgba]"+r"(dst_rgba), // %[dst_rgba]
1740 [width]"+rm"(width) // %[width] 1723 [width]"+rm"(width) // %[width]
1741 : [kYuvConstants]"r"(&kYuvConstants.kUVToB) // %[kYuvConstants] 1724 : [yuvconstants]"r"(yuvconstants) // %[yuvconstants]
1742 : "memory", "cc", NACL_R14 1725 : "memory", "cc", NACL_R14
1743 "xmm0", "xmm1", "xmm2", "xmm3", "xmm5" 1726 "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
1744 ); 1727 );
1745 } 1728 }
1746 1729
1747 #endif // HAS_I422TOARGBROW_SSSE3 1730 #endif // HAS_I422TOARGBROW_SSSE3
1748 1731
1749 // Read 8 UV from 422, upsample to 16 UV. 1732 // Read 8 UV from 422, upsample to 16 UV.
1750 #define READYUV422_AVX2 \ 1733 #define READYUV422_AVX2 \
1751 "vmovq " MEMACCESS([u_buf]) ",%%xmm0 \n" \ 1734 "vmovq " MEMACCESS([u_buf]) ",%%xmm0 \n" \
(...skipping 29 matching lines...) Expand all
1781 "vpackuswb %%ymm1,%%ymm1,%%ymm1 \n" \ 1764 "vpackuswb %%ymm1,%%ymm1,%%ymm1 \n" \
1782 "vpackuswb %%ymm2,%%ymm2,%%ymm2 \n" 1765 "vpackuswb %%ymm2,%%ymm2,%%ymm2 \n"
1783 1766
1784 #if defined(HAS_I422TOBGRAROW_AVX2) 1767 #if defined(HAS_I422TOBGRAROW_AVX2)
1785 // 16 pixels 1768 // 16 pixels
1786 // 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 BGRA (64 bytes). 1769 // 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 BGRA (64 bytes).
1787 void OMITFP I422ToBGRARow_AVX2(const uint8* y_buf, 1770 void OMITFP I422ToBGRARow_AVX2(const uint8* y_buf,
1788 const uint8* u_buf, 1771 const uint8* u_buf,
1789 const uint8* v_buf, 1772 const uint8* v_buf,
1790 uint8* dst_bgra, 1773 uint8* dst_bgra,
1774 struct YuvConstants* yuvconstants,
1791 int width) { 1775 int width) {
1792 asm volatile ( 1776 asm volatile (
1793 "sub %[u_buf],%[v_buf] \n" 1777 "sub %[u_buf],%[v_buf] \n"
1794 "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" 1778 "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n"
1795 LABELALIGN 1779 LABELALIGN
1796 "1: \n" 1780 "1: \n"
1797 READYUV422_AVX2 1781 READYUV422_AVX2
1798 YUVTORGB_AVX2(kYuvConstants) 1782 YUVTORGB_AVX2(yuvconstants)
1799 1783
1800 // Step 3: Weave into BGRA 1784 // Step 3: Weave into BGRA
1801 "vpunpcklbw %%ymm0,%%ymm1,%%ymm1 \n" // GB 1785 "vpunpcklbw %%ymm0,%%ymm1,%%ymm1 \n" // GB
1802 "vpermq $0xd8,%%ymm1,%%ymm1 \n" 1786 "vpermq $0xd8,%%ymm1,%%ymm1 \n"
1803 "vpunpcklbw %%ymm2,%%ymm5,%%ymm2 \n" // AR 1787 "vpunpcklbw %%ymm2,%%ymm5,%%ymm2 \n" // AR
1804 "vpermq $0xd8,%%ymm2,%%ymm2 \n" 1788 "vpermq $0xd8,%%ymm2,%%ymm2 \n"
1805 "vpunpcklwd %%ymm1,%%ymm2,%%ymm0 \n" // ARGB first 8 pixels 1789 "vpunpcklwd %%ymm1,%%ymm2,%%ymm0 \n" // ARGB first 8 pixels
1806 "vpunpckhwd %%ymm1,%%ymm2,%%ymm2 \n" // ARGB next 8 pixels 1790 "vpunpckhwd %%ymm1,%%ymm2,%%ymm2 \n" // ARGB next 8 pixels
1807 1791
1808 "vmovdqu %%ymm0," MEMACCESS([dst_bgra]) "\n" 1792 "vmovdqu %%ymm0," MEMACCESS([dst_bgra]) "\n"
1809 "vmovdqu %%ymm2," MEMACCESS2(0x20,[dst_bgra]) "\n" 1793 "vmovdqu %%ymm2," MEMACCESS2(0x20,[dst_bgra]) "\n"
1810 "lea " MEMLEA(0x40,[dst_bgra]) ",%[dst_bgra] \n" 1794 "lea " MEMLEA(0x40,[dst_bgra]) ",%[dst_bgra] \n"
1811 "sub $0x10,%[width] \n" 1795 "sub $0x10,%[width] \n"
1812 "jg 1b \n" 1796 "jg 1b \n"
1813 "vzeroupper \n" 1797 "vzeroupper \n"
1814 : [y_buf]"+r"(y_buf), // %[y_buf] 1798 : [y_buf]"+r"(y_buf), // %[y_buf]
1815 [u_buf]"+r"(u_buf), // %[u_buf] 1799 [u_buf]"+r"(u_buf), // %[u_buf]
1816 [v_buf]"+r"(v_buf), // %[v_buf] 1800 [v_buf]"+r"(v_buf), // %[v_buf]
1817 [dst_bgra]"+r"(dst_bgra), // %[dst_bgra] 1801 [dst_bgra]"+r"(dst_bgra), // %[dst_bgra]
1818 [width]"+rm"(width) // %[width] 1802 [width]"+rm"(width) // %[width]
1819 : [kYuvConstants]"r"(&kYuvConstants.kUVToB) // %[kYuvConstants] 1803 : [yuvconstants]"r"(yuvconstants) // %[yuvconstants]
1820 : "memory", "cc", NACL_R14 1804 : "memory", "cc", NACL_R14
1821 "xmm0", "xmm1", "xmm2", "xmm3", "xmm5" 1805 "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
1822 ); 1806 );
1823 } 1807 }
1824 #endif // HAS_I422TOBGRAROW_AVX2 1808 #endif // HAS_I422TOBGRAROW_AVX2
1825 1809
1826 #if defined(HAS_I422TOARGBMATRIXROW_AVX2) 1810 #if defined(HAS_I422TOARGBROW_AVX2)
1827 // 16 pixels 1811 // 16 pixels
1828 // 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes). 1812 // 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes).
1829 void OMITFP I422ToARGBMatrixRow_AVX2(const uint8* y_buf, 1813 void OMITFP I422ToARGBRow_AVX2(const uint8* y_buf,
1830 const uint8* u_buf, 1814 const uint8* u_buf,
1831 const uint8* v_buf, 1815 const uint8* v_buf,
1832 uint8* dst_argb, 1816 uint8* dst_argb,
1833 struct YuvConstants* YuvConstants, 1817 struct YuvConstants* yuvconstants,
1834 int width) { 1818 int width) {
1835 asm volatile ( 1819 asm volatile (
1836 "sub %[u_buf],%[v_buf] \n" 1820 "sub %[u_buf],%[v_buf] \n"
1837 "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" 1821 "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n"
1838 LABELALIGN 1822 LABELALIGN
1839 "1: \n" 1823 "1: \n"
1840 READYUV422_AVX2 1824 READYUV422_AVX2
1841 YUVTORGB_AVX2(kYuvConstants) 1825 YUVTORGB_AVX2(yuvconstants)
1842 1826
1843 // Step 3: Weave into ARGB 1827 // Step 3: Weave into ARGB
1844 "vpunpcklbw %%ymm1,%%ymm0,%%ymm0 \n" // BG 1828 "vpunpcklbw %%ymm1,%%ymm0,%%ymm0 \n" // BG
1845 "vpermq $0xd8,%%ymm0,%%ymm0 \n" 1829 "vpermq $0xd8,%%ymm0,%%ymm0 \n"
1846 "vpunpcklbw %%ymm5,%%ymm2,%%ymm2 \n" // RA 1830 "vpunpcklbw %%ymm5,%%ymm2,%%ymm2 \n" // RA
1847 "vpermq $0xd8,%%ymm2,%%ymm2 \n" 1831 "vpermq $0xd8,%%ymm2,%%ymm2 \n"
1848 "vpunpcklwd %%ymm2,%%ymm0,%%ymm1 \n" // BGRA first 8 pixels 1832 "vpunpcklwd %%ymm2,%%ymm0,%%ymm1 \n" // BGRA first 8 pixels
1849 "vpunpckhwd %%ymm2,%%ymm0,%%ymm0 \n" // BGRA next 8 pixels 1833 "vpunpckhwd %%ymm2,%%ymm0,%%ymm0 \n" // BGRA next 8 pixels
1850 1834
1851 "vmovdqu %%ymm1," MEMACCESS([dst_argb]) "\n" 1835 "vmovdqu %%ymm1," MEMACCESS([dst_argb]) "\n"
1852 "vmovdqu %%ymm0," MEMACCESS2(0x20,[dst_argb]) "\n" 1836 "vmovdqu %%ymm0," MEMACCESS2(0x20,[dst_argb]) "\n"
1853 "lea " MEMLEA(0x40,[dst_argb]) ",%[dst_argb] \n" 1837 "lea " MEMLEA(0x40,[dst_argb]) ",%[dst_argb] \n"
1854 "sub $0x10,%[width] \n" 1838 "sub $0x10,%[width] \n"
1855 "jg 1b \n" 1839 "jg 1b \n"
1856 "vzeroupper \n" 1840 "vzeroupper \n"
1857 : [y_buf]"+r"(y_buf), // %[y_buf] 1841 : [y_buf]"+r"(y_buf), // %[y_buf]
1858 [u_buf]"+r"(u_buf), // %[u_buf] 1842 [u_buf]"+r"(u_buf), // %[u_buf]
1859 [v_buf]"+r"(v_buf), // %[v_buf] 1843 [v_buf]"+r"(v_buf), // %[v_buf]
1860 [dst_argb]"+r"(dst_argb), // %[dst_argb] 1844 [dst_argb]"+r"(dst_argb), // %[dst_argb]
1861 [width]"+rm"(width) // %[width] 1845 [width]"+rm"(width) // %[width]
1862 : [kYuvConstants]"r"(YuvConstants) // %[kYuvConstants] 1846 : [yuvconstants]"r"(yuvconstants) // %[yuvconstants]
1863 : "memory", "cc", NACL_R14 1847 : "memory", "cc", NACL_R14
1864 "xmm0", "xmm1", "xmm2", "xmm3", "xmm5" 1848 "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
1865 ); 1849 );
1866 } 1850 }
1867 #endif // HAS_I422TOARGBMATRIXROW_AVX2 1851 #endif // HAS_I422TOARGBROW_AVX2
1868 1852
1869 #if defined(HAS_I422TOABGRROW_AVX2) 1853 #if defined(HAS_I422TOABGRROW_AVX2)
1870 // 16 pixels 1854 // 16 pixels
1871 // 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ABGR (64 bytes). 1855 // 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ABGR (64 bytes).
1872 void OMITFP I422ToABGRMatrixRow_AVX2(const uint8* y_buf, 1856 void OMITFP I422ToABGRRow_AVX2(const uint8* y_buf,
1873 const uint8* u_buf, 1857 const uint8* u_buf,
1874 const uint8* v_buf, 1858 const uint8* v_buf,
1875 uint8* dst_argb, 1859 uint8* dst_argb,
1876 struct YuvConstants* YuvConstants, 1860 struct YuvConstants* yuvconstants,
1877 int width) { 1861 int width) {
1878 asm volatile ( 1862 asm volatile (
1879 "sub %[u_buf],%[v_buf] \n" 1863 "sub %[u_buf],%[v_buf] \n"
1880 "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" 1864 "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n"
1881 LABELALIGN 1865 LABELALIGN
1882 "1: \n" 1866 "1: \n"
1883 READYUV422_AVX2 1867 READYUV422_AVX2
1884 YUVTORGB_AVX2(kYuvConstants) 1868 YUVTORGB_AVX2(yuvconstants)
1885 1869
1886 // Step 3: Weave into ABGR 1870 // Step 3: Weave into ABGR
1887 "vpunpcklbw %%ymm1,%%ymm2,%%ymm1 \n" // RG 1871 "vpunpcklbw %%ymm1,%%ymm2,%%ymm1 \n" // RG
1888 "vpermq $0xd8,%%ymm1,%%ymm1 \n" 1872 "vpermq $0xd8,%%ymm1,%%ymm1 \n"
1889 "vpunpcklbw %%ymm5,%%ymm0,%%ymm2 \n" // BA 1873 "vpunpcklbw %%ymm5,%%ymm0,%%ymm2 \n" // BA
1890 "vpermq $0xd8,%%ymm2,%%ymm2 \n" 1874 "vpermq $0xd8,%%ymm2,%%ymm2 \n"
1891 "vpunpcklwd %%ymm2,%%ymm1,%%ymm0 \n" // RGBA first 8 pixels 1875 "vpunpcklwd %%ymm2,%%ymm1,%%ymm0 \n" // RGBA first 8 pixels
1892 "vpunpckhwd %%ymm2,%%ymm1,%%ymm1 \n" // RGBA next 8 pixels 1876 "vpunpckhwd %%ymm2,%%ymm1,%%ymm1 \n" // RGBA next 8 pixels
1893 "vmovdqu %%ymm0," MEMACCESS([dst_argb]) "\n" 1877 "vmovdqu %%ymm0," MEMACCESS([dst_argb]) "\n"
1894 "vmovdqu %%ymm1," MEMACCESS2(0x20,[dst_argb]) "\n" 1878 "vmovdqu %%ymm1," MEMACCESS2(0x20,[dst_argb]) "\n"
1895 "lea " MEMLEA(0x40,[dst_argb]) ",%[dst_argb] \n" 1879 "lea " MEMLEA(0x40,[dst_argb]) ",%[dst_argb] \n"
1896 "sub $0x10,%[width] \n" 1880 "sub $0x10,%[width] \n"
1897 "jg 1b \n" 1881 "jg 1b \n"
1898 "vzeroupper \n" 1882 "vzeroupper \n"
1899 : [y_buf]"+r"(y_buf), // %[y_buf] 1883 : [y_buf]"+r"(y_buf), // %[y_buf]
1900 [u_buf]"+r"(u_buf), // %[u_buf] 1884 [u_buf]"+r"(u_buf), // %[u_buf]
1901 [v_buf]"+r"(v_buf), // %[v_buf] 1885 [v_buf]"+r"(v_buf), // %[v_buf]
1902 [dst_argb]"+r"(dst_argb), // %[dst_argb] 1886 [dst_argb]"+r"(dst_argb), // %[dst_argb]
1903 [width]"+rm"(width) // %[width] 1887 [width]"+rm"(width) // %[width]
1904 : [kYuvConstants]"r"(YuvConstants) // %[kYuvConstants] 1888 : [yuvconstants]"r"(yuvconstants) // %[yuvconstants]
1905 : "memory", "cc", NACL_R14 1889 : "memory", "cc", NACL_R14
1906 "xmm0", "xmm1", "xmm2", "xmm3", "xmm5" 1890 "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
1907 ); 1891 );
1908 } 1892 }
1909 #endif // HAS_I422TOABGRROW_AVX2 1893 #endif // HAS_I422TOABGRROW_AVX2
1910 1894
1911 #if defined(HAS_I422TORGBAROW_AVX2) 1895 #if defined(HAS_I422TORGBAROW_AVX2)
1912 // 16 pixels 1896 // 16 pixels
1913 // 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 RGBA (64 bytes). 1897 // 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 RGBA (64 bytes).
1914 void OMITFP I422ToRGBARow_AVX2(const uint8* y_buf, 1898 void OMITFP I422ToRGBARow_AVX2(const uint8* y_buf,
1915 const uint8* u_buf, 1899 const uint8* u_buf,
1916 const uint8* v_buf, 1900 const uint8* v_buf,
1917 uint8* dst_argb, 1901 uint8* dst_argb,
1902 struct YuvConstants* yuvconstants,
1918 int width) { 1903 int width) {
1919 asm volatile ( 1904 asm volatile (
1920 "sub %[u_buf],%[v_buf] \n" 1905 "sub %[u_buf],%[v_buf] \n"
1921 "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" 1906 "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n"
1922 LABELALIGN 1907 LABELALIGN
1923 "1: \n" 1908 "1: \n"
1924 READYUV422_AVX2 1909 READYUV422_AVX2
1925 YUVTORGB_AVX2(kYuvConstants) 1910 YUVTORGB_AVX2(yuvconstants)
1926 1911
1927 // Step 3: Weave into RGBA 1912 // Step 3: Weave into RGBA
1928 "vpunpcklbw %%ymm2,%%ymm1,%%ymm1 \n" 1913 "vpunpcklbw %%ymm2,%%ymm1,%%ymm1 \n"
1929 "vpermq $0xd8,%%ymm1,%%ymm1 \n" 1914 "vpermq $0xd8,%%ymm1,%%ymm1 \n"
1930 "vpunpcklbw %%ymm0,%%ymm5,%%ymm2 \n" 1915 "vpunpcklbw %%ymm0,%%ymm5,%%ymm2 \n"
1931 "vpermq $0xd8,%%ymm2,%%ymm2 \n" 1916 "vpermq $0xd8,%%ymm2,%%ymm2 \n"
1932 "vpunpcklwd %%ymm1,%%ymm2,%%ymm0 \n" 1917 "vpunpcklwd %%ymm1,%%ymm2,%%ymm0 \n"
1933 "vpunpckhwd %%ymm1,%%ymm2,%%ymm1 \n" 1918 "vpunpckhwd %%ymm1,%%ymm2,%%ymm1 \n"
1934 "vmovdqu %%ymm0," MEMACCESS([dst_argb]) "\n" 1919 "vmovdqu %%ymm0," MEMACCESS([dst_argb]) "\n"
1935 "vmovdqu %%ymm1," MEMACCESS2(0x20,[dst_argb]) "\n" 1920 "vmovdqu %%ymm1," MEMACCESS2(0x20,[dst_argb]) "\n"
1936 "lea " MEMLEA(0x40,[dst_argb]) ",%[dst_argb] \n" 1921 "lea " MEMLEA(0x40,[dst_argb]) ",%[dst_argb] \n"
1937 "sub $0x10,%[width] \n" 1922 "sub $0x10,%[width] \n"
1938 "jg 1b \n" 1923 "jg 1b \n"
1939 "vzeroupper \n" 1924 "vzeroupper \n"
1940 : [y_buf]"+r"(y_buf), // %[y_buf] 1925 : [y_buf]"+r"(y_buf), // %[y_buf]
1941 [u_buf]"+r"(u_buf), // %[u_buf] 1926 [u_buf]"+r"(u_buf), // %[u_buf]
1942 [v_buf]"+r"(v_buf), // %[v_buf] 1927 [v_buf]"+r"(v_buf), // %[v_buf]
1943 [dst_argb]"+r"(dst_argb), // %[dst_argb] 1928 [dst_argb]"+r"(dst_argb), // %[dst_argb]
1944 [width]"+rm"(width) // %[width] 1929 [width]"+rm"(width) // %[width]
1945 : [kYuvConstants]"r"(&kYuvConstants.kUVToB) // %[kYuvConstants] 1930 : [yuvconstants]"r"(yuvconstants) // %[yuvconstants]
1946 : "memory", "cc", NACL_R14 1931 : "memory", "cc", NACL_R14
1947 "xmm0", "xmm1", "xmm2", "xmm3", "xmm5" 1932 "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
1948 ); 1933 );
1949 } 1934 }
1950 #endif // HAS_I422TORGBAROW_AVX2 1935 #endif // HAS_I422TORGBAROW_AVX2
1951 1936
1952 #ifdef HAS_I400TOARGBROW_SSE2 1937 #ifdef HAS_I400TOARGBROW_SSE2
1953 void I400ToARGBRow_SSE2(const uint8* y_buf, uint8* dst_argb, int width) { 1938 void I400ToARGBRow_SSE2(const uint8* y_buf, uint8* dst_argb, int width) {
1954 asm volatile ( 1939 asm volatile (
1955 "mov $0x4a354a35,%%eax \n" // 4a35 = 18997 = 1.164 1940 "mov $0x4a354a35,%%eax \n" // 4a35 = 18997 = 1.164
(...skipping 3260 matching lines...) Expand 10 before | Expand all | Expand 10 after
5216 ); 5201 );
5217 } 5202 }
5218 #endif // HAS_ARGBLUMACOLORTABLEROW_SSSE3 5203 #endif // HAS_ARGBLUMACOLORTABLEROW_SSSE3
5219 5204
5220 #endif // defined(__x86_64__) || defined(__i386__) 5205 #endif // defined(__x86_64__) || defined(__i386__)
5221 5206
5222 #ifdef __cplusplus 5207 #ifdef __cplusplus
5223 } // extern "C" 5208 } // extern "C"
5224 } // namespace libyuv 5209 } // namespace libyuv
5225 #endif 5210 #endif
OLDNEW
« no previous file with comments | « source/row_common.cc ('k') | source/row_mips.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698