Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(450)

Side by Side Diff: source/row_gcc.cc

Issue 1355393002: yuy2 to rgb gcc versions (Closed) Base URL: https://chromium.googlesource.com/libyuv/libyuv@master
Patch Set: Created 5 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « source/row_common.cc ('k') | source/row_win.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // VERSION 2 1 // VERSION 2
2 /* 2 /*
3 * Copyright 2011 The LibYuv Project Authors. All rights reserved. 3 * Copyright 2011 The LibYuv Project Authors. All rights reserved.
4 * 4 *
5 * Use of this source code is governed by a BSD-style license 5 * Use of this source code is governed by a BSD-style license
6 * that can be found in the LICENSE file in the root of the source 6 * that can be found in the LICENSE file in the root of the source
7 * tree. An additional intellectual property rights grant can be found 7 * tree. An additional intellectual property rights grant can be found
8 * in the file PATENTS. All contributing project authors may 8 * in the file PATENTS. All contributing project authors may
9 * be found in the AUTHORS file in the root of the source tree. 9 * be found in the AUTHORS file in the root of the source tree.
10 */ 10 */
(...skipping 1306 matching lines...) Expand 10 before | Expand all | Expand 10 after
1317 ); 1317 );
1318 } 1318 }
1319 1319
1320 #if defined(HAS_I422TOARGBROW_SSSE3) || defined(HAS_I422TOARGBROW_AVX2) 1320 #if defined(HAS_I422TOARGBROW_SSSE3) || defined(HAS_I422TOARGBROW_AVX2)
1321 1321
1322 // Read 8 UV from 411 1322 // Read 8 UV from 411
1323 #define READYUV444 \ 1323 #define READYUV444 \
1324 "movq " MEMACCESS([u_buf]) ",%%xmm0 \n" \ 1324 "movq " MEMACCESS([u_buf]) ",%%xmm0 \n" \
1325 MEMOPREG(movq, 0x00, [u_buf], [v_buf], 1, xmm1) \ 1325 MEMOPREG(movq, 0x00, [u_buf], [v_buf], 1, xmm1) \
1326 "lea " MEMLEA(0x8, [u_buf]) ",%[u_buf] \n" \ 1326 "lea " MEMLEA(0x8, [u_buf]) ",%[u_buf] \n" \
1327 "punpcklbw %%xmm1,%%xmm0 \n" 1327 "punpcklbw %%xmm1,%%xmm0 \n" \
1328 "movq " MEMACCESS([y_buf]) ",%%xmm4 \n" \
1329 "punpcklbw %%xmm4,%%xmm4 \n" \
1330 "lea " MEMLEA(0x8, [y_buf]) ",%[y_buf] \n"
1328 1331
1329 // Read 4 UV from 422, upsample to 8 UV 1332 // Read 4 UV from 422, upsample to 8 UV
1330 #define READYUV422 \ 1333 #define READYUV422 \
1331 "movd " MEMACCESS([u_buf]) ",%%xmm0 \n" \ 1334 "movd " MEMACCESS([u_buf]) ",%%xmm0 \n" \
1332 MEMOPREG(movd, 0x00, [u_buf], [v_buf], 1, xmm1) \ 1335 MEMOPREG(movd, 0x00, [u_buf], [v_buf], 1, xmm1) \
1333 "lea " MEMLEA(0x4, [u_buf]) ",%[u_buf] \n" \ 1336 "lea " MEMLEA(0x4, [u_buf]) ",%[u_buf] \n" \
1334 "punpcklbw %%xmm1,%%xmm0 \n" \ 1337 "punpcklbw %%xmm1,%%xmm0 \n" \
1335 "punpcklwd %%xmm0,%%xmm0 \n" 1338 "punpcklwd %%xmm0,%%xmm0 \n" \
1339 "movq " MEMACCESS([y_buf]) ",%%xmm4 \n" \
1340 "punpcklbw %%xmm4,%%xmm4 \n" \
1341 "lea " MEMLEA(0x8, [y_buf]) ",%[y_buf] \n"
1336 1342
1337 // Read 2 UV from 411, upsample to 8 UV 1343 // Read 2 UV from 411, upsample to 8 UV
1338 #define READYUV411 \ 1344 #define READYUV411 \
1339 "movd " MEMACCESS([u_buf]) ",%%xmm0 \n" \ 1345 "movd " MEMACCESS([u_buf]) ",%%xmm0 \n" \
1340 MEMOPREG(movd, 0x00, [u_buf], [v_buf], 1, xmm1) \ 1346 MEMOPREG(movd, 0x00, [u_buf], [v_buf], 1, xmm1) \
1341 "lea " MEMLEA(0x2, [u_buf]) ",%[u_buf] \n" \ 1347 "lea " MEMLEA(0x2, [u_buf]) ",%[u_buf] \n" \
1342 "punpcklbw %%xmm1,%%xmm0 \n" \ 1348 "punpcklbw %%xmm1,%%xmm0 \n" \
1343 "punpcklwd %%xmm0,%%xmm0 \n" \ 1349 "punpcklwd %%xmm0,%%xmm0 \n" \
1344 "punpckldq %%xmm0,%%xmm0 \n" 1350 "punpckldq %%xmm0,%%xmm0 \n" \
1351 "movq " MEMACCESS([y_buf]) ",%%xmm4 \n" \
1352 "punpcklbw %%xmm4,%%xmm4 \n" \
1353 "lea " MEMLEA(0x8, [y_buf]) ",%[y_buf] \n"
1345 1354
1346 // Read 4 UV from NV12, upsample to 8 UV 1355 // Read 4 UV from NV12, upsample to 8 UV
1347 #define READNV12 \ 1356 #define READNV12 \
1348 "movq " MEMACCESS([uv_buf]) ",%%xmm0 \n" \ 1357 "movq " MEMACCESS([uv_buf]) ",%%xmm0 \n" \
1349 "lea " MEMLEA(0x8, [uv_buf]) ",%[uv_buf] \n" \ 1358 "lea " MEMLEA(0x8, [uv_buf]) ",%[uv_buf] \n" \
1350 "punpcklwd %%xmm0,%%xmm0 \n" 1359 "punpcklwd %%xmm0,%%xmm0 \n" \
1360 "movq " MEMACCESS([y_buf]) ",%%xmm4 \n" \
1361 "punpcklbw %%xmm4,%%xmm4 \n" \
1362 "lea " MEMLEA(0x8, [y_buf]) ",%[y_buf] \n"
1363
1364 // YUY2 shuf 8 Y to 16 Y.
1365 static const vec8 kShuffleYUY2Y = {
1366 0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14
1367 };
1368
1369 // YUY2 shuf 4 UV to 8 UV.
1370 static const vec8 kShuffleYUY2UV = {
1371 1, 3, 1, 3, 5, 7, 5, 7, 9, 11, 9, 11, 13, 15, 13, 15
1372 };
1373
1374 // Read 4 YUY2 with 8 Y and update 4 UV to 8 UV.
1375 #define READYUY2 \
1376 "movdqu " MEMACCESS([yuy2_buf]) ",%%xmm4 \n" \
1377 "pshufb %[kShuffleYUY2Y], %%xmm4 \n" \
1378 "movdqu " MEMACCESS([yuy2_buf]) ",%%xmm0 \n" \
1379 "pshufb %[kShuffleYUY2UV], %%xmm0 \n" \
1380 "lea " MEMLEA(0x10, [yuy2_buf]) ",%[yuy2_buf] \n"
1381
1382 // UYVY shuf 8 Y to 16 Y.
1383 static const vec8 kShuffleUYVYY = {
1384 1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15
1385 };
1386
1387 // UYVY shuf 4 UV to 8 UV.
1388 static const vec8 kShuffleUYVYUV = {
1389 0, 2, 0, 2, 4, 6, 4, 6, 8, 10, 8, 10, 12, 14, 12, 14
1390 };
1391
1392 // Read 4 UYVY with 8 Y and update 4 UV to 8 UV.
1393 #define READUYVY \
1394 "movdqu " MEMACCESS([uyvy_buf]) ",%%xmm4 \n" \
1395 "pshufb %[kShuffleUYVYY], %%xmm4 \n" \
1396 "movdqu " MEMACCESS([uyvy_buf]) ",%%xmm0 \n" \
1397 "pshufb %[kShuffleUYVYUV], %%xmm0 \n" \
1398 "lea " MEMLEA(0x10, [uyvy_buf]) ",%[uyvy_buf] \n"
1351 1399
1352 // Convert 8 pixels: 8 UV and 8 Y 1400 // Convert 8 pixels: 8 UV and 8 Y
1353 #define YUVTORGB(yuvconstants) \ 1401 #define YUVTORGB(yuvconstants) \
1354 "movdqa %%xmm0,%%xmm1 \n" \ 1402 "movdqa %%xmm0,%%xmm1 \n" \
1355 "movdqa %%xmm0,%%xmm2 \n" \ 1403 "movdqa %%xmm0,%%xmm2 \n" \
1356 "movdqa %%xmm0,%%xmm3 \n" \ 1404 "movdqa %%xmm0,%%xmm3 \n" \
1357 "movdqa " MEMACCESS2(96, [yuvconstants]) ",%%xmm0 \n" \ 1405 "movdqa " MEMACCESS2(96, [yuvconstants]) ",%%xmm0 \n" \
1358 "pmaddubsw " MEMACCESS([yuvconstants]) ",%%xmm1 \n" \ 1406 "pmaddubsw " MEMACCESS([yuvconstants]) ",%%xmm1 \n" \
1359 "psubw %%xmm1,%%xmm0 \n" \ 1407 "psubw %%xmm1,%%xmm0 \n" \
1360 "movdqa " MEMACCESS2(128, [yuvconstants]) ",%%xmm1 \n" \ 1408 "movdqa " MEMACCESS2(128, [yuvconstants]) ",%%xmm1 \n" \
1361 "pmaddubsw " MEMACCESS2(32, [yuvconstants]) ",%%xmm2 \n" \ 1409 "pmaddubsw " MEMACCESS2(32, [yuvconstants]) ",%%xmm2 \n" \
1362 "psubw %%xmm2,%%xmm1 \n" \ 1410 "psubw %%xmm2,%%xmm1 \n" \
1363 "movdqa " MEMACCESS2(160, [yuvconstants]) ",%%xmm2 \n" \ 1411 "movdqa " MEMACCESS2(160, [yuvconstants]) ",%%xmm2 \n" \
1364 "pmaddubsw " MEMACCESS2(64, [yuvconstants]) ",%%xmm3 \n" \ 1412 "pmaddubsw " MEMACCESS2(64, [yuvconstants]) ",%%xmm3 \n" \
1365 "psubw %%xmm3,%%xmm2 \n" \ 1413 "psubw %%xmm3,%%xmm2 \n" \
1366 "movq " MEMACCESS([y_buf]) ",%%xmm3 \n" \ 1414 "pmulhuw " MEMACCESS2(192, [yuvconstants]) ",%%xmm4 \n" \
1367 "lea " MEMLEA(0x8, [y_buf]) ",%[y_buf] \n" \ 1415 "paddsw %%xmm4,%%xmm0 \n" \
1368 "punpcklbw %%xmm3,%%xmm3 \n" \ 1416 "paddsw %%xmm4,%%xmm1 \n" \
1369 "pmulhuw " MEMACCESS2(192, [yuvconstants]) ",%%xmm3 \n" \ 1417 "paddsw %%xmm4,%%xmm2 \n" \
1370 "paddsw %%xmm3,%%xmm0 \n" \
1371 "paddsw %%xmm3,%%xmm1 \n" \
1372 "paddsw %%xmm3,%%xmm2 \n" \
1373 "psraw $0x6,%%xmm0 \n" \ 1418 "psraw $0x6,%%xmm0 \n" \
1374 "psraw $0x6,%%xmm1 \n" \ 1419 "psraw $0x6,%%xmm1 \n" \
1375 "psraw $0x6,%%xmm2 \n" \ 1420 "psraw $0x6,%%xmm2 \n" \
1376 "packuswb %%xmm0,%%xmm0 \n" \ 1421 "packuswb %%xmm0,%%xmm0 \n" \
1377 "packuswb %%xmm1,%%xmm1 \n" \ 1422 "packuswb %%xmm1,%%xmm1 \n" \
1378 "packuswb %%xmm2,%%xmm2 \n" 1423 "packuswb %%xmm2,%%xmm2 \n"
1379 1424
1380 // Store 8 ARGB values. Assumes XMM5 is zero. 1425 // Store 8 ARGB values. Assumes XMM5 is zero.
1381 #define STOREARGB \ 1426 #define STOREARGB \
1382 "punpcklbw %%xmm1,%%xmm0 \n" \ 1427 "punpcklbw %%xmm1,%%xmm0 \n" \
(...skipping 56 matching lines...) Expand 10 before | Expand all | Expand 10 after
1439 STOREARGB 1484 STOREARGB
1440 "sub $0x8,%[width] \n" 1485 "sub $0x8,%[width] \n"
1441 "jg 1b \n" 1486 "jg 1b \n"
1442 : [y_buf]"+r"(y_buf), // %[y_buf] 1487 : [y_buf]"+r"(y_buf), // %[y_buf]
1443 [u_buf]"+r"(u_buf), // %[u_buf] 1488 [u_buf]"+r"(u_buf), // %[u_buf]
1444 [v_buf]"+r"(v_buf), // %[v_buf] 1489 [v_buf]"+r"(v_buf), // %[v_buf]
1445 [dst_argb]"+r"(dst_argb), // %[dst_argb] 1490 [dst_argb]"+r"(dst_argb), // %[dst_argb]
1446 [width]"+rm"(width) // %[width] 1491 [width]"+rm"(width) // %[width]
1447 : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] 1492 : [yuvconstants]"r"(yuvconstants) // %[yuvconstants]
1448 : "memory", "cc", NACL_R14 1493 : "memory", "cc", NACL_R14
1449 "xmm0", "xmm1", "xmm2", "xmm3", "xmm5" 1494 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
1450 ); 1495 );
1451 } 1496 }
1452 1497
1453 void OMITFP I444ToABGRRow_SSSE3(const uint8* y_buf, 1498 void OMITFP I444ToABGRRow_SSSE3(const uint8* y_buf,
1454 const uint8* u_buf, 1499 const uint8* u_buf,
1455 const uint8* v_buf, 1500 const uint8* v_buf,
1456 uint8* dst_abgr, 1501 uint8* dst_abgr,
1457 struct YuvConstants* yuvconstants, 1502 struct YuvConstants* yuvconstants,
1458 int width) { 1503 int width) {
1459 asm volatile ( 1504 asm volatile (
1460 "sub %[u_buf],%[v_buf] \n" 1505 "sub %[u_buf],%[v_buf] \n"
1461 "pcmpeqb %%xmm5,%%xmm5 \n" 1506 "pcmpeqb %%xmm5,%%xmm5 \n"
1462 LABELALIGN 1507 LABELALIGN
1463 "1: \n" 1508 "1: \n"
1464 READYUV444 1509 READYUV444
1465 YUVTORGB(yuvconstants) 1510 YUVTORGB(yuvconstants)
1466 STOREABGR 1511 STOREABGR
1467 "sub $0x8,%[width] \n" 1512 "sub $0x8,%[width] \n"
1468 "jg 1b \n" 1513 "jg 1b \n"
1469 : [y_buf]"+r"(y_buf), // %[y_buf] 1514 : [y_buf]"+r"(y_buf), // %[y_buf]
1470 [u_buf]"+r"(u_buf), // %[u_buf] 1515 [u_buf]"+r"(u_buf), // %[u_buf]
1471 [v_buf]"+r"(v_buf), // %[v_buf] 1516 [v_buf]"+r"(v_buf), // %[v_buf]
1472 [dst_abgr]"+r"(dst_abgr), // %[dst_abgr] 1517 [dst_abgr]"+r"(dst_abgr), // %[dst_abgr]
1473 [width]"+rm"(width) // %[width] 1518 [width]"+rm"(width) // %[width]
1474 : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] 1519 : [yuvconstants]"r"(yuvconstants) // %[yuvconstants]
1475 : "memory", "cc", NACL_R14 1520 : "memory", "cc", NACL_R14
1476 "xmm0", "xmm1", "xmm2", "xmm3", "xmm5" 1521 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
1477 ); 1522 );
1478 } 1523 }
1479 1524
1480 // TODO(fbarchard): Consider putting masks into constants. 1525 // TODO(fbarchard): Consider putting masks into constants.
1481 void OMITFP I422ToRGB24Row_SSSE3(const uint8* y_buf, 1526 void OMITFP I422ToRGB24Row_SSSE3(const uint8* y_buf,
1482 const uint8* u_buf, 1527 const uint8* u_buf,
1483 const uint8* v_buf, 1528 const uint8* v_buf,
1484 uint8* dst_rgb24, 1529 uint8* dst_rgb24,
1485 struct YuvConstants* yuvconstants, 1530 struct YuvConstants* yuvconstants,
1486 int width) { 1531 int width) {
(...skipping 25 matching lines...) Expand all
1512 // TODO(fbarchard): Make width a register for 32 bit. 1557 // TODO(fbarchard): Make width a register for 32 bit.
1513 #if defined(__i386__) && defined(__pic__) 1558 #if defined(__i386__) && defined(__pic__)
1514 [width]"+m"(width) // %[width] 1559 [width]"+m"(width) // %[width]
1515 #else 1560 #else
1516 [width]"+rm"(width) // %[width] 1561 [width]"+rm"(width) // %[width]
1517 #endif 1562 #endif
1518 : [yuvconstants]"r"(yuvconstants), // %[yuvconstants] 1563 : [yuvconstants]"r"(yuvconstants), // %[yuvconstants]
1519 [kShuffleMaskARGBToRGB24_0]"m"(kShuffleMaskARGBToRGB24_0), 1564 [kShuffleMaskARGBToRGB24_0]"m"(kShuffleMaskARGBToRGB24_0),
1520 [kShuffleMaskARGBToRGB24]"m"(kShuffleMaskARGBToRGB24) 1565 [kShuffleMaskARGBToRGB24]"m"(kShuffleMaskARGBToRGB24)
1521 : "memory", "cc", NACL_R14 1566 : "memory", "cc", NACL_R14
1522 "xmm0", "xmm1", "xmm2", "xmm3", "xmm5", "xmm6" 1567 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6"
1523 ); 1568 );
1524 } 1569 }
1525 1570
1526 void OMITFP I422ToRAWRow_SSSE3(const uint8* y_buf, 1571 void OMITFP I422ToRAWRow_SSSE3(const uint8* y_buf,
1527 const uint8* u_buf, 1572 const uint8* u_buf,
1528 const uint8* v_buf, 1573 const uint8* v_buf,
1529 uint8* dst_raw, 1574 uint8* dst_raw,
1530 struct YuvConstants* yuvconstants, 1575 struct YuvConstants* yuvconstants,
1531 int width) { 1576 int width) {
1532 asm volatile ( 1577 asm volatile (
(...skipping 24 matching lines...) Expand all
1557 // TODO(fbarchard): Make width a register for 32 bit. 1602 // TODO(fbarchard): Make width a register for 32 bit.
1558 #if defined(__i386__) && defined(__pic__) 1603 #if defined(__i386__) && defined(__pic__)
1559 [width]"+m"(width) // %[width] 1604 [width]"+m"(width) // %[width]
1560 #else 1605 #else
1561 [width]"+rm"(width) // %[width] 1606 [width]"+rm"(width) // %[width]
1562 #endif 1607 #endif
1563 : [yuvconstants]"r"(yuvconstants), // %[yuvconstants] 1608 : [yuvconstants]"r"(yuvconstants), // %[yuvconstants]
1564 [kShuffleMaskARGBToRAW_0]"m"(kShuffleMaskARGBToRAW_0), 1609 [kShuffleMaskARGBToRAW_0]"m"(kShuffleMaskARGBToRAW_0),
1565 [kShuffleMaskARGBToRAW]"m"(kShuffleMaskARGBToRAW) 1610 [kShuffleMaskARGBToRAW]"m"(kShuffleMaskARGBToRAW)
1566 : "memory", "cc", NACL_R14 1611 : "memory", "cc", NACL_R14
1567 "xmm0", "xmm1", "xmm2", "xmm3", "xmm5", "xmm6" 1612 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6"
1568 ); 1613 );
1569 } 1614 }
1570 1615
1571 void OMITFP I422ToARGBRow_SSSE3(const uint8* y_buf, 1616 void OMITFP I422ToARGBRow_SSSE3(const uint8* y_buf,
1572 const uint8* u_buf, 1617 const uint8* u_buf,
1573 const uint8* v_buf, 1618 const uint8* v_buf,
1574 uint8* dst_argb, 1619 uint8* dst_argb,
1575 struct YuvConstants* yuvconstants, 1620 struct YuvConstants* yuvconstants,
1576 int width) { 1621 int width) {
1577 asm volatile ( 1622 asm volatile (
1578 "sub %[u_buf],%[v_buf] \n" 1623 "sub %[u_buf],%[v_buf] \n"
1579 "pcmpeqb %%xmm5,%%xmm5 \n" 1624 "pcmpeqb %%xmm5,%%xmm5 \n"
1580 LABELALIGN 1625 LABELALIGN
1581 "1: \n" 1626 "1: \n"
1582 READYUV422 1627 READYUV422
1583 YUVTORGB(yuvconstants) 1628 YUVTORGB(yuvconstants)
1584 STOREARGB 1629 STOREARGB
1585 "sub $0x8,%[width] \n" 1630 "sub $0x8,%[width] \n"
1586 "jg 1b \n" 1631 "jg 1b \n"
1587 : [y_buf]"+r"(y_buf), // %[y_buf] 1632 : [y_buf]"+r"(y_buf), // %[y_buf]
1588 [u_buf]"+r"(u_buf), // %[u_buf] 1633 [u_buf]"+r"(u_buf), // %[u_buf]
1589 [v_buf]"+r"(v_buf), // %[v_buf] 1634 [v_buf]"+r"(v_buf), // %[v_buf]
1590 [dst_argb]"+r"(dst_argb), // %[dst_argb] 1635 [dst_argb]"+r"(dst_argb), // %[dst_argb]
1591 [width]"+rm"(width) // %[width] 1636 [width]"+rm"(width) // %[width]
1592 : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] 1637 : [yuvconstants]"r"(yuvconstants) // %[yuvconstants]
1593 : "memory", "cc", NACL_R14 1638 : "memory", "cc", NACL_R14
1594 "xmm0", "xmm1", "xmm2", "xmm3", "xmm5" 1639 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
1595 ); 1640 );
1596 } 1641 }
1597 1642
1598 void OMITFP I411ToARGBRow_SSSE3(const uint8* y_buf, 1643 void OMITFP I411ToARGBRow_SSSE3(const uint8* y_buf,
1599 const uint8* u_buf, 1644 const uint8* u_buf,
1600 const uint8* v_buf, 1645 const uint8* v_buf,
1601 uint8* dst_argb, 1646 uint8* dst_argb,
1602 struct YuvConstants* yuvconstants, 1647 struct YuvConstants* yuvconstants,
1603 int width) { 1648 int width) {
1604 asm volatile ( 1649 asm volatile (
1605 "sub %[u_buf],%[v_buf] \n" 1650 "sub %[u_buf],%[v_buf] \n"
1606 "pcmpeqb %%xmm5,%%xmm5 \n" 1651 "pcmpeqb %%xmm5,%%xmm5 \n"
1607 LABELALIGN 1652 LABELALIGN
1608 "1: \n" 1653 "1: \n"
1609 READYUV411 1654 READYUV411
1610 YUVTORGB(yuvconstants) 1655 YUVTORGB(yuvconstants)
1611 STOREARGB 1656 STOREARGB
1612 "sub $0x8,%[width] \n" 1657 "sub $0x8,%[width] \n"
1613 "jg 1b \n" 1658 "jg 1b \n"
1614 : [y_buf]"+r"(y_buf), // %[y_buf] 1659 : [y_buf]"+r"(y_buf), // %[y_buf]
1615 [u_buf]"+r"(u_buf), // %[u_buf] 1660 [u_buf]"+r"(u_buf), // %[u_buf]
1616 [v_buf]"+r"(v_buf), // %[v_buf] 1661 [v_buf]"+r"(v_buf), // %[v_buf]
1617 [dst_argb]"+r"(dst_argb), // %[dst_argb] 1662 [dst_argb]"+r"(dst_argb), // %[dst_argb]
1618 [width]"+rm"(width) // %[width] 1663 [width]"+rm"(width) // %[width]
1619 : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] 1664 : [yuvconstants]"r"(yuvconstants) // %[yuvconstants]
1620 : "memory", "cc", NACL_R14 1665 : "memory", "cc", NACL_R14
1621 "xmm0", "xmm1", "xmm2", "xmm3", "xmm5" 1666 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
1622 ); 1667 );
1623 } 1668 }
1624 1669
1625 void OMITFP NV12ToARGBRow_SSSE3(const uint8* y_buf, 1670 void OMITFP NV12ToARGBRow_SSSE3(const uint8* y_buf,
1626 const uint8* uv_buf, 1671 const uint8* uv_buf,
1627 uint8* dst_argb, 1672 uint8* dst_argb,
1628 struct YuvConstants* yuvconstants, 1673 struct YuvConstants* yuvconstants,
1629 int width) { 1674 int width) {
1630 asm volatile ( 1675 asm volatile (
1631 "pcmpeqb %%xmm5,%%xmm5 \n" 1676 "pcmpeqb %%xmm5,%%xmm5 \n"
1632 LABELALIGN 1677 LABELALIGN
1633 "1: \n" 1678 "1: \n"
1634 READNV12 1679 READNV12
1635 YUVTORGB(yuvconstants) 1680 YUVTORGB(yuvconstants)
1636 STOREARGB 1681 STOREARGB
1637 "sub $0x8,%[width] \n" 1682 "sub $0x8,%[width] \n"
1638 "jg 1b \n" 1683 "jg 1b \n"
1639 : [y_buf]"+r"(y_buf), // %[y_buf] 1684 : [y_buf]"+r"(y_buf), // %[y_buf]
1640 [uv_buf]"+r"(uv_buf), // %[uv_buf] 1685 [uv_buf]"+r"(uv_buf), // %[uv_buf]
1641 [dst_argb]"+r"(dst_argb), // %[dst_argb] 1686 [dst_argb]"+r"(dst_argb), // %[dst_argb]
1642 [width]"+rm"(width) // %[width] 1687 [width]"+rm"(width) // %[width]
1643 : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] 1688 : [yuvconstants]"r"(yuvconstants) // %[yuvconstants]
1644 // Does not use r14. 1689 // Does not use r14.
1645 : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm5" 1690 : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
1646 ); 1691 );
1647 } 1692 }
1648 1693
1694 void OMITFP YUY2ToARGBRow_SSSE3(const uint8* yuy2_buf,
1695 uint8* dst_argb,
1696 struct YuvConstants* yuvconstants,
1697 int width) {
1698 asm volatile (
1699 "pcmpeqb %%xmm5,%%xmm5 \n"
1700 LABELALIGN
1701 "1: \n"
1702 READYUY2
1703 YUVTORGB(yuvconstants)
1704 STOREARGB
1705 "sub $0x8,%[width] \n"
1706 "jg 1b \n"
1707 : [yuy2_buf]"+r"(yuy2_buf), // %[yuy2_buf]
1708 [dst_argb]"+r"(dst_argb), // %[dst_argb]
1709 [width]"+rm"(width) // %[width]
1710 : [yuvconstants]"r"(yuvconstants), // %[yuvconstants]
1711 [kShuffleYUY2Y]"m"(kShuffleYUY2Y),
1712 [kShuffleYUY2UV]"m"(kShuffleYUY2UV)
1713 // Does not use r14.
1714 : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
1715 );
1716 }
1717
1718 void OMITFP UYVYToARGBRow_SSSE3(const uint8* uyvy_buf,
1719 uint8* dst_argb,
1720 struct YuvConstants* yuvconstants,
1721 int width) {
1722 asm volatile (
1723 "pcmpeqb %%xmm5,%%xmm5 \n"
1724 LABELALIGN
1725 "1: \n"
1726 READUYVY
1727 YUVTORGB(yuvconstants)
1728 STOREARGB
1729 "sub $0x8,%[width] \n"
1730 "jg 1b \n"
1731 : [uyvy_buf]"+r"(uyvy_buf), // %[uyvy_buf]
1732 [dst_argb]"+r"(dst_argb), // %[dst_argb]
1733 [width]"+rm"(width) // %[width]
1734 : [yuvconstants]"r"(yuvconstants), // %[yuvconstants]
1735 [kShuffleUYVYY]"m"(kShuffleUYVYY),
1736 [kShuffleUYVYUV]"m"(kShuffleUYVYUV)
1737 // Does not use r14.
1738 : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
1739 );
1740 }
1741
1649 void OMITFP I422ToBGRARow_SSSE3(const uint8* y_buf, 1742 void OMITFP I422ToBGRARow_SSSE3(const uint8* y_buf,
1650 const uint8* u_buf, 1743 const uint8* u_buf,
1651 const uint8* v_buf, 1744 const uint8* v_buf,
1652 uint8* dst_bgra, 1745 uint8* dst_bgra,
1653 struct YuvConstants* yuvconstants, 1746 struct YuvConstants* yuvconstants,
1654 int width) { 1747 int width) {
1655 asm volatile ( 1748 asm volatile (
1656 "sub %[u_buf],%[v_buf] \n" 1749 "sub %[u_buf],%[v_buf] \n"
1657 "pcmpeqb %%xmm5,%%xmm5 \n" 1750 "pcmpeqb %%xmm5,%%xmm5 \n"
1658 LABELALIGN 1751 LABELALIGN
1659 "1: \n" 1752 "1: \n"
1660 READYUV422 1753 READYUV422
1661 YUVTORGB(yuvconstants) 1754 YUVTORGB(yuvconstants)
1662 STOREBGRA 1755 STOREBGRA
1663 "sub $0x8,%[width] \n" 1756 "sub $0x8,%[width] \n"
1664 "jg 1b \n" 1757 "jg 1b \n"
1665 : [y_buf]"+r"(y_buf), // %[y_buf] 1758 : [y_buf]"+r"(y_buf), // %[y_buf]
1666 [u_buf]"+r"(u_buf), // %[u_buf] 1759 [u_buf]"+r"(u_buf), // %[u_buf]
1667 [v_buf]"+r"(v_buf), // %[v_buf] 1760 [v_buf]"+r"(v_buf), // %[v_buf]
1668 [dst_bgra]"+r"(dst_bgra), // %[dst_bgra] 1761 [dst_bgra]"+r"(dst_bgra), // %[dst_bgra]
1669 [width]"+rm"(width) // %[width] 1762 [width]"+rm"(width) // %[width]
1670 : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] 1763 : [yuvconstants]"r"(yuvconstants) // %[yuvconstants]
1671 : "memory", "cc", NACL_R14 1764 : "memory", "cc", NACL_R14
1672 "xmm0", "xmm1", "xmm2", "xmm3", "xmm5" 1765 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
1673 ); 1766 );
1674 } 1767 }
1675 1768
1676 void OMITFP I422ToABGRRow_SSSE3(const uint8* y_buf, 1769 void OMITFP I422ToABGRRow_SSSE3(const uint8* y_buf,
1677 const uint8* u_buf, 1770 const uint8* u_buf,
1678 const uint8* v_buf, 1771 const uint8* v_buf,
1679 uint8* dst_abgr, 1772 uint8* dst_abgr,
1680 struct YuvConstants* yuvconstants, 1773 struct YuvConstants* yuvconstants,
1681 int width) { 1774 int width) {
1682 asm volatile ( 1775 asm volatile (
1683 "sub %[u_buf],%[v_buf] \n" 1776 "sub %[u_buf],%[v_buf] \n"
1684 "pcmpeqb %%xmm5,%%xmm5 \n" 1777 "pcmpeqb %%xmm5,%%xmm5 \n"
1685 LABELALIGN 1778 LABELALIGN
1686 "1: \n" 1779 "1: \n"
1687 READYUV422 1780 READYUV422
1688 YUVTORGB(yuvconstants) 1781 YUVTORGB(yuvconstants)
1689 STOREABGR 1782 STOREABGR
1690 "sub $0x8,%[width] \n" 1783 "sub $0x8,%[width] \n"
1691 "jg 1b \n" 1784 "jg 1b \n"
1692 : [y_buf]"+r"(y_buf), // %[y_buf] 1785 : [y_buf]"+r"(y_buf), // %[y_buf]
1693 [u_buf]"+r"(u_buf), // %[u_buf] 1786 [u_buf]"+r"(u_buf), // %[u_buf]
1694 [v_buf]"+r"(v_buf), // %[v_buf] 1787 [v_buf]"+r"(v_buf), // %[v_buf]
1695 [dst_abgr]"+r"(dst_abgr), // %[dst_abgr] 1788 [dst_abgr]"+r"(dst_abgr), // %[dst_abgr]
1696 [width]"+rm"(width) // %[width] 1789 [width]"+rm"(width) // %[width]
1697 : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] 1790 : [yuvconstants]"r"(yuvconstants) // %[yuvconstants]
1698 : "memory", "cc", NACL_R14 1791 : "memory", "cc", NACL_R14
1699 "xmm0", "xmm1", "xmm2", "xmm3", "xmm5" 1792 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
1700 ); 1793 );
1701 } 1794 }
1702 1795
1703 void OMITFP I422ToRGBARow_SSSE3(const uint8* y_buf, 1796 void OMITFP I422ToRGBARow_SSSE3(const uint8* y_buf,
1704 const uint8* u_buf, 1797 const uint8* u_buf,
1705 const uint8* v_buf, 1798 const uint8* v_buf,
1706 uint8* dst_rgba, 1799 uint8* dst_rgba,
1707 struct YuvConstants* yuvconstants, 1800 struct YuvConstants* yuvconstants,
1708 int width) { 1801 int width) {
1709 asm volatile ( 1802 asm volatile (
1710 "sub %[u_buf],%[v_buf] \n" 1803 "sub %[u_buf],%[v_buf] \n"
1711 "pcmpeqb %%xmm5,%%xmm5 \n" 1804 "pcmpeqb %%xmm5,%%xmm5 \n"
1712 LABELALIGN 1805 LABELALIGN
1713 "1: \n" 1806 "1: \n"
1714 READYUV422 1807 READYUV422
1715 YUVTORGB(yuvconstants) 1808 YUVTORGB(yuvconstants)
1716 STORERGBA 1809 STORERGBA
1717 "sub $0x8,%[width] \n" 1810 "sub $0x8,%[width] \n"
1718 "jg 1b \n" 1811 "jg 1b \n"
1719 : [y_buf]"+r"(y_buf), // %[y_buf] 1812 : [y_buf]"+r"(y_buf), // %[y_buf]
1720 [u_buf]"+r"(u_buf), // %[u_buf] 1813 [u_buf]"+r"(u_buf), // %[u_buf]
1721 [v_buf]"+r"(v_buf), // %[v_buf] 1814 [v_buf]"+r"(v_buf), // %[v_buf]
1722 [dst_rgba]"+r"(dst_rgba), // %[dst_rgba] 1815 [dst_rgba]"+r"(dst_rgba), // %[dst_rgba]
1723 [width]"+rm"(width) // %[width] 1816 [width]"+rm"(width) // %[width]
1724 : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] 1817 : [yuvconstants]"r"(yuvconstants) // %[yuvconstants]
1725 : "memory", "cc", NACL_R14 1818 : "memory", "cc", NACL_R14
1726 "xmm0", "xmm1", "xmm2", "xmm3", "xmm5" 1819 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
1727 ); 1820 );
1728 } 1821 }
1729 1822
1730 #endif // HAS_I422TOARGBROW_SSSE3 1823 #endif // HAS_I422TOARGBROW_SSSE3
1731 1824
1732 // Read 8 UV from 422, upsample to 16 UV. 1825 // Read 8 UV from 422, upsample to 16 UV.
1733 #define READYUV422_AVX2 \ 1826 #define READYUV422_AVX2 \
1734 "vmovq " MEMACCESS([u_buf]) ",%%xmm0 \n" \ 1827 "vmovq " MEMACCESS([u_buf]) ",%%xmm0 \n" \
1735 MEMOPREG(vmovq, 0x00, [u_buf], [v_buf], 1, xmm1) \ 1828 MEMOPREG(vmovq, 0x00, [u_buf], [v_buf], 1, xmm1) \
1736 "lea " MEMLEA(0x8, [u_buf]) ",%[u_buf] \n" \ 1829 "lea " MEMLEA(0x8, [u_buf]) ",%[u_buf] \n" \
(...skipping 58 matching lines...) Expand 10 before | Expand all | Expand 10 after
1795 "sub $0x10,%[width] \n" 1888 "sub $0x10,%[width] \n"
1796 "jg 1b \n" 1889 "jg 1b \n"
1797 "vzeroupper \n" 1890 "vzeroupper \n"
1798 : [y_buf]"+r"(y_buf), // %[y_buf] 1891 : [y_buf]"+r"(y_buf), // %[y_buf]
1799 [u_buf]"+r"(u_buf), // %[u_buf] 1892 [u_buf]"+r"(u_buf), // %[u_buf]
1800 [v_buf]"+r"(v_buf), // %[v_buf] 1893 [v_buf]"+r"(v_buf), // %[v_buf]
1801 [dst_bgra]"+r"(dst_bgra), // %[dst_bgra] 1894 [dst_bgra]"+r"(dst_bgra), // %[dst_bgra]
1802 [width]"+rm"(width) // %[width] 1895 [width]"+rm"(width) // %[width]
1803 : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] 1896 : [yuvconstants]"r"(yuvconstants) // %[yuvconstants]
1804 : "memory", "cc", NACL_R14 1897 : "memory", "cc", NACL_R14
1805 "xmm0", "xmm1", "xmm2", "xmm3", "xmm5" 1898 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
1806 ); 1899 );
1807 } 1900 }
1808 #endif // HAS_I422TOBGRAROW_AVX2 1901 #endif // HAS_I422TOBGRAROW_AVX2
1809 1902
1810 #if defined(HAS_I422TOARGBROW_AVX2) 1903 #if defined(HAS_I422TOARGBROW_AVX2)
1811 // 16 pixels 1904 // 16 pixels
1812 // 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes). 1905 // 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes).
1813 void OMITFP I422ToARGBRow_AVX2(const uint8* y_buf, 1906 void OMITFP I422ToARGBRow_AVX2(const uint8* y_buf,
1814 const uint8* u_buf, 1907 const uint8* u_buf,
1815 const uint8* v_buf, 1908 const uint8* v_buf,
(...skipping 22 matching lines...) Expand all
1838 "sub $0x10,%[width] \n" 1931 "sub $0x10,%[width] \n"
1839 "jg 1b \n" 1932 "jg 1b \n"
1840 "vzeroupper \n" 1933 "vzeroupper \n"
1841 : [y_buf]"+r"(y_buf), // %[y_buf] 1934 : [y_buf]"+r"(y_buf), // %[y_buf]
1842 [u_buf]"+r"(u_buf), // %[u_buf] 1935 [u_buf]"+r"(u_buf), // %[u_buf]
1843 [v_buf]"+r"(v_buf), // %[v_buf] 1936 [v_buf]"+r"(v_buf), // %[v_buf]
1844 [dst_argb]"+r"(dst_argb), // %[dst_argb] 1937 [dst_argb]"+r"(dst_argb), // %[dst_argb]
1845 [width]"+rm"(width) // %[width] 1938 [width]"+rm"(width) // %[width]
1846 : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] 1939 : [yuvconstants]"r"(yuvconstants) // %[yuvconstants]
1847 : "memory", "cc", NACL_R14 1940 : "memory", "cc", NACL_R14
1848 "xmm0", "xmm1", "xmm2", "xmm3", "xmm5" 1941 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
1849 ); 1942 );
1850 } 1943 }
1851 #endif // HAS_I422TOARGBROW_AVX2 1944 #endif // HAS_I422TOARGBROW_AVX2
1852 1945
1853 #if defined(HAS_I422TOABGRROW_AVX2) 1946 #if defined(HAS_I422TOABGRROW_AVX2)
1854 // 16 pixels 1947 // 16 pixels
1855 // 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ABGR (64 bytes). 1948 // 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ABGR (64 bytes).
1856 void OMITFP I422ToABGRRow_AVX2(const uint8* y_buf, 1949 void OMITFP I422ToABGRRow_AVX2(const uint8* y_buf,
1857 const uint8* u_buf, 1950 const uint8* u_buf,
1858 const uint8* v_buf, 1951 const uint8* v_buf,
(...skipping 21 matching lines...) Expand all
1880 "sub $0x10,%[width] \n" 1973 "sub $0x10,%[width] \n"
1881 "jg 1b \n" 1974 "jg 1b \n"
1882 "vzeroupper \n" 1975 "vzeroupper \n"
1883 : [y_buf]"+r"(y_buf), // %[y_buf] 1976 : [y_buf]"+r"(y_buf), // %[y_buf]
1884 [u_buf]"+r"(u_buf), // %[u_buf] 1977 [u_buf]"+r"(u_buf), // %[u_buf]
1885 [v_buf]"+r"(v_buf), // %[v_buf] 1978 [v_buf]"+r"(v_buf), // %[v_buf]
1886 [dst_argb]"+r"(dst_argb), // %[dst_argb] 1979 [dst_argb]"+r"(dst_argb), // %[dst_argb]
1887 [width]"+rm"(width) // %[width] 1980 [width]"+rm"(width) // %[width]
1888 : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] 1981 : [yuvconstants]"r"(yuvconstants) // %[yuvconstants]
1889 : "memory", "cc", NACL_R14 1982 : "memory", "cc", NACL_R14
1890 "xmm0", "xmm1", "xmm2", "xmm3", "xmm5" 1983 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
1891 ); 1984 );
1892 } 1985 }
1893 #endif // HAS_I422TOABGRROW_AVX2 1986 #endif // HAS_I422TOABGRROW_AVX2
1894 1987
1895 #if defined(HAS_I422TORGBAROW_AVX2) 1988 #if defined(HAS_I422TORGBAROW_AVX2)
1896 // 16 pixels 1989 // 16 pixels
1897 // 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 RGBA (64 bytes). 1990 // 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 RGBA (64 bytes).
1898 void OMITFP I422ToRGBARow_AVX2(const uint8* y_buf, 1991 void OMITFP I422ToRGBARow_AVX2(const uint8* y_buf,
1899 const uint8* u_buf, 1992 const uint8* u_buf,
1900 const uint8* v_buf, 1993 const uint8* v_buf,
(...skipping 21 matching lines...) Expand all
1922 "sub $0x10,%[width] \n" 2015 "sub $0x10,%[width] \n"
1923 "jg 1b \n" 2016 "jg 1b \n"
1924 "vzeroupper \n" 2017 "vzeroupper \n"
1925 : [y_buf]"+r"(y_buf), // %[y_buf] 2018 : [y_buf]"+r"(y_buf), // %[y_buf]
1926 [u_buf]"+r"(u_buf), // %[u_buf] 2019 [u_buf]"+r"(u_buf), // %[u_buf]
1927 [v_buf]"+r"(v_buf), // %[v_buf] 2020 [v_buf]"+r"(v_buf), // %[v_buf]
1928 [dst_argb]"+r"(dst_argb), // %[dst_argb] 2021 [dst_argb]"+r"(dst_argb), // %[dst_argb]
1929 [width]"+rm"(width) // %[width] 2022 [width]"+rm"(width) // %[width]
1930 : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] 2023 : [yuvconstants]"r"(yuvconstants) // %[yuvconstants]
1931 : "memory", "cc", NACL_R14 2024 : "memory", "cc", NACL_R14
1932 "xmm0", "xmm1", "xmm2", "xmm3", "xmm5" 2025 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
1933 ); 2026 );
1934 } 2027 }
1935 #endif // HAS_I422TORGBAROW_AVX2 2028 #endif // HAS_I422TORGBAROW_AVX2
1936 2029
1937 #ifdef HAS_I400TOARGBROW_SSE2 2030 #ifdef HAS_I400TOARGBROW_SSE2
1938 void I400ToARGBRow_SSE2(const uint8* y_buf, uint8* dst_argb, int width) { 2031 void I400ToARGBRow_SSE2(const uint8* y_buf, uint8* dst_argb, int width) {
1939 asm volatile ( 2032 asm volatile (
1940 "mov $0x4a354a35,%%eax \n" // 4a35 = 18997 = 1.164 2033 "mov $0x4a354a35,%%eax \n" // 4a35 = 18997 = 1.164
1941 "movd %%eax,%%xmm2 \n" 2034 "movd %%eax,%%xmm2 \n"
1942 "pshufd $0x0,%%xmm2,%%xmm2 \n" 2035 "pshufd $0x0,%%xmm2,%%xmm2 \n"
(...skipping 3258 matching lines...) Expand 10 before | Expand all | Expand 10 after
5201 ); 5294 );
5202 } 5295 }
5203 #endif // HAS_ARGBLUMACOLORTABLEROW_SSSE3 5296 #endif // HAS_ARGBLUMACOLORTABLEROW_SSSE3
5204 5297
5205 #endif // defined(__x86_64__) || defined(__i386__) 5298 #endif // defined(__x86_64__) || defined(__i386__)
5206 5299
5207 #ifdef __cplusplus 5300 #ifdef __cplusplus
5208 } // extern "C" 5301 } // extern "C"
5209 } // namespace libyuv 5302 } // namespace libyuv
5210 #endif 5303 #endif
OLDNEW
« no previous file with comments | « source/row_common.cc ('k') | source/row_win.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698