Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(3)

Side by Side Diff: src/opts/SkBlitRow_opts_arm_neon.cpp

Issue 346843003: ARM Skia NEON patches - 40 - arm64: S32A_D565_Opaque (Closed) Base URL: https://skia.googlesource.com/skia.git@master
Patch Set: Created 6 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « no previous file | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 * Copyright 2012 The Android Open Source Project 2 * Copyright 2012 The Android Open Source Project
3 * 3 *
4 * Use of this source code is governed by a BSD-style license that can be 4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file. 5 * found in the LICENSE file.
6 */ 6 */
7 7
8 #include "SkBlitRow_opts_arm_neon.h" 8 #include "SkBlitRow_opts_arm_neon.h"
9 9
10 #include "SkBlitMask.h" 10 #include "SkBlitMask.h"
(...skipping 347 matching lines...) Expand 10 before | Expand all | Expand 10 after
358 358
359 "21: \n\t" 359 "21: \n\t"
360 : [count] "+r" (count) 360 : [count] "+r" (count)
361 : [dst] "r" (dst), [keep_dst] "r" (keep_dst), [src] "r" (s rc) 361 : [dst] "r" (dst), [keep_dst] "r" (keep_dst), [src] "r" (s rc)
362 : "ip", "cc", "memory", "d0","d1","d2","d3","d4","d5","d6" ,"d7", 362 : "ip", "cc", "memory", "d0","d1","d2","d3","d4","d5","d6" ,"d7",
363 "d16","d17","d18","d19","d20","d21","d22","d23","d24","d25 ","d26","d27","d28","d29", 363 "d16","d17","d18","d19","d20","d21","d22","d23","d24","d25 ","d26","d27","d28","d29",
364 "d30","d31" 364 "d30","d31"
365 ); 365 );
366 } 366 }
367 } 367 }
368 #endif 368
369 #else // #ifdef SK_CPU_ARM32
370
371 void S32A_D565_Opaque_neon(uint16_t* SK_RESTRICT dst,
372 const SkPMColor* SK_RESTRICT src, int count,
373 U8CPU alpha, int /*x*/, int /*y*/) {
374 SkASSERT(255 == alpha);
375
376 if (count >= 16) {
377 asm (
378 "movi v4.8h, #0x80 \t\n"
379
380 "1: \t\n"
381 "sub %[count], %[count], #16 \t\n"
382 "ld1 {v16.8h-v17.8h}, [%[dst]] \t\n"
383 "ld4 {v0.16b-v3.16b}, [%[src]], #64 \t\n"
384 "prfm pldl1keep, [%[src],#512] \t\n"
385 "prfm pldl1keep, [%[dst],#256] \t\n"
386 "ushr v20.8h, v17.8h, #5 \t\n"
387 "ushr v31.8h, v16.8h, #5 \t\n"
388 "xtn v6.8b, v31.8h \t\n"
389 "xtn2 v6.16b, v20.8h \t\n"
390 "ushr v20.8h, v17.8h, #11 \t\n"
391 "shl v19.16b, v6.16b, #2 \t\n"
392 "ushr v31.8h, v16.8h, #11 \t\n"
393 "xtn v22.8b, v31.8h \t\n"
394 "xtn2 v22.16b, v20.8h \t\n"
395 "shl v18.16b, v22.16b, #3 \t\n"
396 "mvn v3.16b, v3.16b \t\n"
397 "xtn v16.8b, v16.8h \t\n"
398 "mov v7.16b, v4.16b \t\n"
399 "xtn2 v16.16b, v17.8h \t\n"
400 "umlal v7.8h, v3.8b, v19.8b \t\n"
401 "shl v16.16b, v16.16b, #3 \t\n"
402 "mov v22.16b, v4.16b \t\n"
403 "ushr v24.8h, v7.8h, #6 \t\n"
404 "umlal v22.8h, v3.8b, v18.8b \t\n"
405 "ushr v20.8h, v22.8h, #5 \t\n"
406 "addhn v20.8b, v22.8h, v20.8h \t\n"
407 "cmp %[count], #16 \t\n"
408 "mov v6.16b, v4.16b \t\n"
409 "mov v5.16b, v4.16b \t\n"
410 "umlal v6.8h, v3.8b, v16.8b \t\n"
411 "umlal2 v5.8h, v3.16b, v19.16b \t\n"
412 "mov v17.16b, v4.16b \t\n"
413 "ushr v19.8h, v6.8h, #5 \t\n"
414 "umlal2 v17.8h, v3.16b, v18.16b \t\n"
415 "addhn v7.8b, v7.8h, v24.8h \t\n"
416 "ushr v18.8h, v5.8h, #6 \t\n"
417 "ushr v21.8h, v17.8h, #5 \t\n"
418 "addhn2 v7.16b, v5.8h, v18.8h \t\n"
419 "addhn2 v20.16b, v17.8h, v21.8h \t\n"
420 "mov v22.16b, v4.16b \t\n"
421 "addhn v6.8b, v6.8h, v19.8h \t\n"
422 "umlal2 v22.8h, v3.16b, v16.16b \t\n"
423 "ushr v5.8h, v22.8h, #5 \t\n"
424 "addhn2 v6.16b, v22.8h, v5.8h \t\n"
425 "uqadd v7.16b, v1.16b, v7.16b \t\n"
426 "uqadd v20.16b, v2.16b, v20.16b \t\n"
427 "uqadd v6.16b, v0.16b, v6.16b \t\n"
428 "shll v22.8h, v20.8b, #8 \t\n"
429 "shll v5.8h, v7.8b, #8 \t\n"
430 "sri v22.8h, v5.8h, #5 \t\n"
431 "shll v17.8h, v6.8b, #8 \t\n"
432 "shll2 v23.8h, v20.16b, #8 \t\n"
433 "shll2 v7.8h, v7.16b, #8 \t\n"
434 "sri v22.8h, v17.8h, #11 \t\n"
435 "sri v23.8h, v7.8h, #5 \t\n"
436 "shll2 v6.8h, v6.16b, #8 \t\n"
437 "st1 {v22.8h}, [%[dst]], #16 \t\n"
438 "sri v23.8h, v6.8h, #11 \t\n"
439 "st1 {v23.8h}, [%[dst]], #16 \t\n"
440 "b.ge 1b \t\n"
441 : [dst] "+&r" (dst), [src] "+&r" (src), [count] "+&r" (count)
442 :: "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
443 "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24",
444 "v31"
445 );
446 }
447 // Leftovers
448 if (count > 0) {
449 do {
450 SkPMColor c = *src++;
451 SkPMColorAssert(c);
452 if (c) {
453 *dst = SkSrcOver32To16(c, *dst);
454 }
455 dst += 1;
456 } while (--count != 0);
457 }
458 }
459 #endif // #ifdef SK_CPU_ARM32
369 460
370 static inline uint16x8_t SkDiv255Round_neon8(uint16x8_t prod) { 461 static inline uint16x8_t SkDiv255Round_neon8(uint16x8_t prod) {
371 prod += vdupq_n_u16(128); 462 prod += vdupq_n_u16(128);
372 prod += vshrq_n_u16(prod, 8); 463 prod += vshrq_n_u16(prod, 8);
373 return vshrq_n_u16(prod, 8); 464 return vshrq_n_u16(prod, 8);
374 } 465 }
375 466
376 void S32A_D565_Blend_neon(uint16_t* SK_RESTRICT dst, 467 void S32A_D565_Blend_neon(uint16_t* SK_RESTRICT dst,
377 const SkPMColor* SK_RESTRICT src, int count, 468 const SkPMColor* SK_RESTRICT src, int count,
378 U8CPU alpha, int /*x*/, int /*y*/) { 469 U8CPU alpha, int /*x*/, int /*y*/) {
(...skipping 1166 matching lines...) Expand 10 before | Expand all | Expand 10 after
1545 count--; 1636 count--;
1546 } 1637 }
1547 } 1638 }
1548 1639
1549 /////////////////////////////////////////////////////////////////////////////// 1640 ///////////////////////////////////////////////////////////////////////////////
1550 1641
1551 const SkBlitRow::Proc sk_blitrow_platform_565_procs_arm_neon[] = { 1642 const SkBlitRow::Proc sk_blitrow_platform_565_procs_arm_neon[] = {
1552 // no dither 1643 // no dither
1553 S32_D565_Opaque_neon, 1644 S32_D565_Opaque_neon,
1554 S32_D565_Blend_neon, 1645 S32_D565_Blend_neon,
1555 #ifdef SK_CPU_ARM32
1556 S32A_D565_Opaque_neon, 1646 S32A_D565_Opaque_neon,
1557 #else
1558 NULL,
1559 #endif
1560 S32A_D565_Blend_neon, 1647 S32A_D565_Blend_neon,
1561 1648
1562 // dither 1649 // dither
1563 S32_D565_Opaque_Dither_neon, 1650 S32_D565_Opaque_Dither_neon,
1564 S32_D565_Blend_Dither_neon, 1651 S32_D565_Blend_Dither_neon,
1565 S32A_D565_Opaque_Dither_neon, 1652 S32A_D565_Opaque_Dither_neon,
1566 NULL, // S32A_D565_Blend_Dither 1653 NULL, // S32A_D565_Blend_Dither
1567 }; 1654 };
1568 1655
1569 const SkBlitRow::Proc32 sk_blitrow_platform_32_procs_arm_neon[] = { 1656 const SkBlitRow::Proc32 sk_blitrow_platform_32_procs_arm_neon[] = {
(...skipping 13 matching lines...) Expand all
1583 S32A_Opaque_BlitRow32_neon_src_alpha, // S32A_Opaque, 1670 S32A_Opaque_BlitRow32_neon_src_alpha, // S32A_Opaque,
1584 #else 1671 #else
1585 S32A_Opaque_BlitRow32_neon, // S32A_Opaque, 1672 S32A_Opaque_BlitRow32_neon, // S32A_Opaque,
1586 #endif 1673 #endif
1587 #ifdef SK_CPU_ARM32 1674 #ifdef SK_CPU_ARM32
1588 S32A_Blend_BlitRow32_neon // S32A_Blend 1675 S32A_Blend_BlitRow32_neon // S32A_Blend
1589 #else 1676 #else
1590 NULL 1677 NULL
1591 #endif 1678 #endif
1592 }; 1679 };
OLDNEW
« no previous file with comments | « no previous file | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698