Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(102)

Side by Side Diff: src/opts/SkBlitRow_opts_arm_neon.cpp

Issue 1277953002: Purge non-NEON ARM code. (Closed) Base URL: https://skia.googlesource.com/skia.git@master
Patch Set: Created 5 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/opts/SkBlitRow_opts_arm.cpp ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 * Copyright 2012 The Android Open Source Project 2 * Copyright 2012 The Android Open Source Project
3 * 3 *
4 * Use of this source code is governed by a BSD-style license that can be 4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file. 5 * found in the LICENSE file.
6 */ 6 */
7 7
8 #include "SkBlitRow_opts_arm_neon.h" 8 #include "SkBlitRow_opts_arm_neon.h"
9 9
10 #include "SkBlitMask.h" 10 #include "SkBlitMask.h"
(...skipping 617 matching lines...) Expand 10 before | Expand all | Expand 10 after
628 628
629 do { 629 do {
630 uint16x8_t vdst, vdst_r, vdst_g, vdst_b; 630 uint16x8_t vdst, vdst_r, vdst_g, vdst_b;
631 uint16x8_t vres_a, vres_r, vres_g, vres_b; 631 uint16x8_t vres_a, vres_r, vres_g, vres_b;
632 uint8x8x4_t vsrc; 632 uint8x8x4_t vsrc;
633 633
634 // load pixels 634 // load pixels
635 vdst = vld1q_u16(dst); 635 vdst = vld1q_u16(dst);
636 #ifdef SK_CPU_ARM64 636 #ifdef SK_CPU_ARM64
637 vsrc = sk_vld4_u8_arm64_4(src); 637 vsrc = sk_vld4_u8_arm64_4(src);
638 #else 638 #elif (__GNUC__ > 4) || ((__GNUC__ == 4) && (__GNUC_MINOR__ > 6))
639 #if (__GNUC__ > 4) || ((__GNUC__ == 4) && (__GNUC_MINOR__ > 6))
640 asm ( 639 asm (
641 "vld4.u8 %h[vsrc], [%[src]]!" 640 "vld4.u8 %h[vsrc], [%[src]]!"
642 : [vsrc] "=w" (vsrc), [src] "+&r" (src) 641 : [vsrc] "=w" (vsrc), [src] "+&r" (src)
643 : : 642 : :
644 ); 643 );
645 #else 644 #else
646 register uint8x8_t d0 asm("d0"); 645 register uint8x8_t d0 asm("d0");
647 register uint8x8_t d1 asm("d1"); 646 register uint8x8_t d1 asm("d1");
648 register uint8x8_t d2 asm("d2"); 647 register uint8x8_t d2 asm("d2");
649 register uint8x8_t d3 asm("d3"); 648 register uint8x8_t d3 asm("d3");
650 649
651 asm volatile ( 650 asm volatile (
652 "vld4.u8 {d0-d3},[%[src]]!;" 651 "vld4.u8 {d0-d3},[%[src]]!;"
653 : "=w" (d0), "=w" (d1), "=w" (d2), "=w" (d3), 652 : "=w" (d0), "=w" (d1), "=w" (d2), "=w" (d3),
654 [src] "+&r" (src) 653 [src] "+&r" (src)
655 : : 654 : :
656 ); 655 );
657 vsrc.val[0] = d0; 656 vsrc.val[0] = d0;
658 vsrc.val[1] = d1; 657 vsrc.val[1] = d1;
659 vsrc.val[2] = d2; 658 vsrc.val[2] = d2;
660 vsrc.val[3] = d3; 659 vsrc.val[3] = d3;
661 #endif 660 #endif
662 #endif // #ifdef SK_CPU_ARM64
663 661
664 662
665 // deinterleave dst 663 // deinterleave dst
666 vdst_g = vshlq_n_u16(vdst, SK_R16_BITS); // shift green to to p of lanes 664 vdst_g = vshlq_n_u16(vdst, SK_R16_BITS); // shift green to to p of lanes
667 vdst_b = vdst & vmask_blue; // extract blue 665 vdst_b = vdst & vmask_blue; // extract blue
668 vdst_r = vshrq_n_u16(vdst, SK_R16_SHIFT); // extract red 666 vdst_r = vshrq_n_u16(vdst, SK_R16_SHIFT); // extract red
669 vdst_g = vshrq_n_u16(vdst_g, SK_R16_BITS + SK_B16_BITS); // extract green 667 vdst_g = vshrq_n_u16(vdst_g, SK_R16_BITS + SK_B16_BITS); // extract green
670 668
671 // shift src to 565 669 // shift src to 565
672 vsrc.val[NEON_R] = vshr_n_u8(vsrc.val[NEON_R], 8 - SK_R16_BITS); 670 vsrc.val[NEON_R] = vshr_n_u8(vsrc.val[NEON_R], 8 - SK_R16_BITS);
(...skipping 631 matching lines...) Expand 10 before | Expand all | Expand 10 after
1304 1302
1305 src += 2; 1303 src += 2;
1306 dst += 2; 1304 dst += 2;
1307 count -= 2; 1305 count -= 2;
1308 } while(count); 1306 } while(count);
1309 } 1307 }
1310 } 1308 }
1311 1309
1312 /////////////////////////////////////////////////////////////////////////////// 1310 ///////////////////////////////////////////////////////////////////////////////
1313 1311
1314 #undef DEBUG_OPAQUE_DITHER
1315
1316 #if defined(DEBUG_OPAQUE_DITHER)
1317 static void showme8(char *str, void *p, int len)
1318 {
1319 static char buf[256];
1320 char tbuf[32];
1321 int i;
1322 char *pc = (char*) p;
1323 sprintf(buf,"%8s:", str);
1324 for(i=0;i<len;i++) {
1325 sprintf(tbuf, " %02x", pc[i]);
1326 strcat(buf, tbuf);
1327 }
1328 SkDebugf("%s\n", buf);
1329 }
1330 static void showme16(char *str, void *p, int len)
1331 {
1332 static char buf[256];
1333 char tbuf[32];
1334 int i;
1335 uint16_t *pc = (uint16_t*) p;
1336 sprintf(buf,"%8s:", str);
1337 len = (len / sizeof(uint16_t)); /* passed as bytes */
1338 for(i=0;i<len;i++) {
1339 sprintf(tbuf, " %04x", pc[i]);
1340 strcat(buf, tbuf);
1341 }
1342 SkDebugf("%s\n", buf);
1343 }
1344 #endif
1345 #endif // #ifdef SK_CPU_ARM32 1312 #endif // #ifdef SK_CPU_ARM32
1346 1313
1347 void S32A_D565_Opaque_Dither_neon (uint16_t * SK_RESTRICT dst, 1314 void S32A_D565_Opaque_Dither_neon (uint16_t * SK_RESTRICT dst,
1348 const SkPMColor* SK_RESTRICT src, 1315 const SkPMColor* SK_RESTRICT src,
1349 int count, U8CPU alpha, int x, int y) { 1316 int count, U8CPU alpha, int x, int y) {
1350 SkASSERT(255 == alpha); 1317 SkASSERT(255 == alpha);
1351 1318
1352 #define UNROLL 8 1319 #define UNROLL 8
1353 1320
1354 if (count >= UNROLL) { 1321 if (count >= UNROLL) {
1355 1322
1356 #if defined(DEBUG_OPAQUE_DITHER)
1357 uint16_t tmpbuf[UNROLL];
1358 int td[UNROLL];
1359 int tdv[UNROLL];
1360 int ta[UNROLL];
1361 int tap[UNROLL];
1362 uint16_t in_dst[UNROLL];
1363 int offset = 0;
1364 int noisy = 0;
1365 #endif
1366
1367 uint8x8_t dbase; 1323 uint8x8_t dbase;
1368 const uint8_t *dstart = &gDitherMatrix_Neon[(y&3)*12 + (x&3)]; 1324 const uint8_t *dstart = &gDitherMatrix_Neon[(y&3)*12 + (x&3)];
1369 dbase = vld1_u8(dstart); 1325 dbase = vld1_u8(dstart);
1370 1326
1371 do { 1327 do {
1372 uint8x8x4_t vsrc; 1328 uint8x8x4_t vsrc;
1373 uint8x8_t sr, sg, sb, sa, d; 1329 uint8x8_t sr, sg, sb, sa, d;
1374 uint16x8_t dst8, scale8, alpha8; 1330 uint16x8_t dst8, scale8, alpha8;
1375 uint16x8_t dst_r, dst_g, dst_b; 1331 uint16x8_t dst_r, dst_g, dst_b;
1376 1332
1377 #if defined(DEBUG_OPAQUE_DITHER)
1378 // calculate 8 elements worth into a temp buffer
1379 {
1380 int my_y = y;
1381 int my_x = x;
1382 SkPMColor* my_src = (SkPMColor*)src;
1383 uint16_t* my_dst = dst;
1384 int i;
1385
1386 DITHER_565_SCAN(my_y);
1387 for(i = 0; i < UNROLL; i++) {
1388 SkPMColor c = *my_src++;
1389 SkPMColorAssert(c);
1390 if (c) {
1391 unsigned a = SkGetPackedA32(c);
1392
1393 int d = SkAlphaMul(DITHER_VALUE(my_x), SkAlpha255To256(a));
1394 tdv[i] = DITHER_VALUE(my_x);
1395 ta[i] = a;
1396 tap[i] = SkAlpha255To256(a);
1397 td[i] = d;
1398
1399 unsigned sr = SkGetPackedR32(c);
1400 unsigned sg = SkGetPackedG32(c);
1401 unsigned sb = SkGetPackedB32(c);
1402 sr = SkDITHER_R32_FOR_565(sr, d);
1403 sg = SkDITHER_G32_FOR_565(sg, d);
1404 sb = SkDITHER_B32_FOR_565(sb, d);
1405
1406 uint32_t src_expanded = (sg << 24) | (sr << 13) | (sb << 2);
1407 uint32_t dst_expanded = SkExpand_rgb_16(*my_dst);
1408 dst_expanded = dst_expanded * (SkAlpha255To256(255 - a) >> 3);
1409 // now src and dst expanded are in g:11 r:10 x:1 b:10
1410 tmpbuf[i] = SkCompact_rgb_16((src_expanded + dst_expanded) >> 5) ;
1411 td[i] = d;
1412 } else {
1413 tmpbuf[i] = *my_dst;
1414 ta[i] = tdv[i] = td[i] = 0xbeef;
1415 }
1416 in_dst[i] = *my_dst;
1417 my_dst += 1;
1418 DITHER_INC_X(my_x);
1419 }
1420 }
1421 #endif
1422
1423 #ifdef SK_CPU_ARM64 1333 #ifdef SK_CPU_ARM64
1424 vsrc = sk_vld4_u8_arm64_4(src); 1334 vsrc = sk_vld4_u8_arm64_4(src);
1425 #else 1335 #else
1426 { 1336 {
1427 register uint8x8_t d0 asm("d0"); 1337 register uint8x8_t d0 asm("d0");
1428 register uint8x8_t d1 asm("d1"); 1338 register uint8x8_t d1 asm("d1");
1429 register uint8x8_t d2 asm("d2"); 1339 register uint8x8_t d2 asm("d2");
1430 register uint8x8_t d3 asm("d3"); 1340 register uint8x8_t d3 asm("d3");
1431 1341
1432 asm ("vld4.8 {d0-d3},[%[src]]! " 1342 asm ("vld4.8 {d0-d3},[%[src]]! "
(...skipping 49 matching lines...) Expand 10 before | Expand all | Expand 10 after
1482 dst_g = vmlaq_u16(vshll_n_u8(sg,3), dst_g, scale8); 1392 dst_g = vmlaq_u16(vshll_n_u8(sg,3), dst_g, scale8);
1483 dst_r = vmlaq_u16(vshll_n_u8(sr,2), dst_r, scale8); 1393 dst_r = vmlaq_u16(vshll_n_u8(sr,2), dst_r, scale8);
1484 1394
1485 // repack to store 1395 // repack to store
1486 dst8 = vshrq_n_u16(dst_b, 5); 1396 dst8 = vshrq_n_u16(dst_b, 5);
1487 dst8 = vsliq_n_u16(dst8, vshrq_n_u16(dst_g, 5), 5); 1397 dst8 = vsliq_n_u16(dst8, vshrq_n_u16(dst_g, 5), 5);
1488 dst8 = vsliq_n_u16(dst8, vshrq_n_u16(dst_r,5), 11); 1398 dst8 = vsliq_n_u16(dst8, vshrq_n_u16(dst_r,5), 11);
1489 1399
1490 vst1q_u16(dst, dst8); 1400 vst1q_u16(dst, dst8);
1491 1401
1492 #if defined(DEBUG_OPAQUE_DITHER)
1493 // verify my 8 elements match the temp buffer
1494 {
1495 int i, bad=0;
1496 static int invocation;
1497
1498 for (i = 0; i < UNROLL; i++) {
1499 if (tmpbuf[i] != dst[i]) {
1500 bad=1;
1501 }
1502 }
1503 if (bad) {
1504 SkDebugf("BAD S32A_D565_Opaque_Dither_neon(); invocation %d offset % d\n",
1505 invocation, offset);
1506 SkDebugf(" alpha 0x%x\n", alpha);
1507 for (i = 0; i < UNROLL; i++)
1508 SkDebugf("%2d: %s %04x w %04x id %04x s %08x d %04x %04x %04x %0 4x\n",
1509 i, ((tmpbuf[i] != dst[i])?"BAD":"got"), dst[i], tmpbuf[ i],
1510 in_dst[i], src[i-8], td[i], tdv[i], tap[i], ta[i]);
1511
1512 showme16("alpha8", &alpha8, sizeof(alpha8));
1513 showme16("scale8", &scale8, sizeof(scale8));
1514 showme8("d", &d, sizeof(d));
1515 showme16("dst8", &dst8, sizeof(dst8));
1516 showme16("dst_b", &dst_b, sizeof(dst_b));
1517 showme16("dst_g", &dst_g, sizeof(dst_g));
1518 showme16("dst_r", &dst_r, sizeof(dst_r));
1519 showme8("sb", &sb, sizeof(sb));
1520 showme8("sg", &sg, sizeof(sg));
1521 showme8("sr", &sr, sizeof(sr));
1522
1523 return;
1524 }
1525 offset += UNROLL;
1526 invocation++;
1527 }
1528 #endif
1529 dst += UNROLL; 1402 dst += UNROLL;
1530 count -= UNROLL; 1403 count -= UNROLL;
1531 // skip x += UNROLL, since it's unchanged mod-4 1404 // skip x += UNROLL, since it's unchanged mod-4
1532 } while (count >= UNROLL); 1405 } while (count >= UNROLL);
1533 } 1406 }
1534 #undef UNROLL 1407 #undef UNROLL
1535 1408
1536 // residuals 1409 // residuals
1537 if (count > 0) { 1410 if (count > 0) {
1538 DITHER_565_SCAN(y); 1411 DITHER_565_SCAN(y);
(...skipping 23 matching lines...) Expand all
1562 *dst = SkCompact_rgb_16((src_expanded + dst_expanded) >> 5); 1435 *dst = SkCompact_rgb_16((src_expanded + dst_expanded) >> 5);
1563 } 1436 }
1564 dst += 1; 1437 dst += 1;
1565 DITHER_INC_X(x); 1438 DITHER_INC_X(x);
1566 } while (--count != 0); 1439 } while (--count != 0);
1567 } 1440 }
1568 } 1441 }
1569 1442
1570 /////////////////////////////////////////////////////////////////////////////// 1443 ///////////////////////////////////////////////////////////////////////////////
1571 1444
1572 #undef DEBUG_S32_OPAQUE_DITHER
1573
1574 void S32_D565_Opaque_Dither_neon(uint16_t* SK_RESTRICT dst, 1445 void S32_D565_Opaque_Dither_neon(uint16_t* SK_RESTRICT dst,
1575 const SkPMColor* SK_RESTRICT src, 1446 const SkPMColor* SK_RESTRICT src,
1576 int count, U8CPU alpha, int x, int y) { 1447 int count, U8CPU alpha, int x, int y) {
1577 SkASSERT(255 == alpha); 1448 SkASSERT(255 == alpha);
1578 1449
1579 #define UNROLL 8 1450 #define UNROLL 8
1580 if (count >= UNROLL) { 1451 if (count >= UNROLL) {
1581 uint8x8_t d; 1452 uint8x8_t d;
1582 const uint8_t *dstart = &gDitherMatrix_Neon[(y&3)*12 + (x&3)]; 1453 const uint8_t *dstart = &gDitherMatrix_Neon[(y&3)*12 + (x&3)];
1583 d = vld1_u8(dstart); 1454 d = vld1_u8(dstart);
(...skipping 46 matching lines...) Expand 10 before | Expand all | Expand 10 after
1630 dg = vaddl_u8(sg, vshr_n_u8(d, 1)); 1501 dg = vaddl_u8(sg, vshr_n_u8(d, 1));
1631 1502
1632 // pack high bits of each into 565 format (rgb, b is lsb) 1503 // pack high bits of each into 565 format (rgb, b is lsb)
1633 dst8 = vshrq_n_u16(db, 3); 1504 dst8 = vshrq_n_u16(db, 3);
1634 dst8 = vsliq_n_u16(dst8, vshrq_n_u16(dg, 2), 5); 1505 dst8 = vsliq_n_u16(dst8, vshrq_n_u16(dg, 2), 5);
1635 dst8 = vsliq_n_u16(dst8, vshrq_n_u16(dr, 3), 11); 1506 dst8 = vsliq_n_u16(dst8, vshrq_n_u16(dr, 3), 11);
1636 1507
1637 // store it 1508 // store it
1638 vst1q_u16(dst, dst8); 1509 vst1q_u16(dst, dst8);
1639 1510
1640 #if defined(DEBUG_S32_OPAQUE_DITHER)
1641 // always good to know if we generated good results
1642 {
1643 int i, myx = x, myy = y;
1644 DITHER_565_SCAN(myy);
1645 for (i=0;i<UNROLL;i++) {
1646 // the '!' in the asm block above post-incremented src by the 8 pixe ls it reads.
1647 SkPMColor c = src[i-8];
1648 unsigned dither = DITHER_VALUE(myx);
1649 uint16_t val = SkDitherRGB32To565(c, dither);
1650 if (val != dst[i]) {
1651 SkDebugf("RBE: src %08x dither %02x, want %04x got %04x dbas[i] %02x \n",
1652 c, dither, val, dst[i], dstart[i]);
1653 }
1654 DITHER_INC_X(myx);
1655 }
1656 }
1657 #endif
1658
1659 dst += UNROLL; 1511 dst += UNROLL;
1660 // we don't need to increment src as the asm above has already done it 1512 // we don't need to increment src as the asm above has already done it
1661 count -= UNROLL; 1513 count -= UNROLL;
1662 x += UNROLL; // probably superfluous 1514 x += UNROLL; // probably superfluous
1663 } 1515 }
1664 } 1516 }
1665 #undef UNROLL 1517 #undef UNROLL
1666 1518
1667 // residuals 1519 // residuals
1668 if (count > 0) { 1520 if (count > 0) {
(...skipping 71 matching lines...) Expand 10 before | Expand all | Expand 10 after
1740 invA += invA >> 7; 1592 invA += invA >> 7;
1741 SkASSERT(invA < 256); // Our caller has already handled the alpha == 0 case . 1593 SkASSERT(invA < 256); // Our caller has already handled the alpha == 0 case .
1742 1594
1743 Sk16h colorHighAndRound = Sk4px::DupPMColor(color).widenHi() + Sk16h(128); 1595 Sk16h colorHighAndRound = Sk4px::DupPMColor(color).widenHi() + Sk16h(128);
1744 Sk16b invA_16x(invA); 1596 Sk16b invA_16x(invA);
1745 1597
1746 Sk4px::MapSrc(count, dst, src, [&](const Sk4px& src4) -> Sk4px { 1598 Sk4px::MapSrc(count, dst, src, [&](const Sk4px& src4) -> Sk4px {
1747 return (src4 * invA_16x).addNarrowHi(colorHighAndRound); 1599 return (src4 * invA_16x).addNarrowHi(colorHighAndRound);
1748 }); 1600 });
1749 } 1601 }
OLDNEW
« no previous file with comments | « src/opts/SkBlitRow_opts_arm.cpp ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698