src/opts/SkBitmapProcState_filter_neon.h - Issue 280403005: Always inline Filter_32_*_neon functions

Side by Side Diff: src/opts/SkBitmapProcState_filter_neon.h

Issue 280403005: Always inline Filter_32_*_neon functions (Closed) Base URL: https://skia.googlesource.com/skia.git@master

Patch Set: Created 6 years, 7 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1	1

2 /*	2 /*

3 * Copyright 2012 The Android Open Source Project	3 * Copyright 2012 The Android Open Source Project

4 *	4 *

5 * Use of this source code is governed by a BSD-style license that can be	5 * Use of this source code is governed by a BSD-style license that can be

6 * found in the LICENSE file.	6 * found in the LICENSE file.

7 */	7 */

8	8

9	9

10 #include <arm_neon.h>	10 #include <arm_neon.h>

11 #include "SkColorPriv.h"	11 #include "SkColorPriv.h"

12	12

13 /*	13 /*

14 * Filter_32_opaque	14 * Filter_32_opaque

15 *	15 *

16 * There is no hard-n-fast rule that the filtering must produce	16 * There is no hard-n-fast rule that the filtering must produce

17 * exact results for the color components, but if the 4 incoming colors are	17 * exact results for the color components, but if the 4 incoming colors are

18 * all opaque, then the output color must also be opaque. Subsequent parts of	18 * all opaque, then the output color must also be opaque. Subsequent parts of

19 * the drawing pipeline may rely on this (e.g. which blitrow proc to use).	19 * the drawing pipeline may rely on this (e.g. which blitrow proc to use).

	20 *

20 */	21 */

21	22 // Chrome on Android uses -Os so we need to force these inline. Otherwise

22 static inline void Filter_32_opaque_neon(unsigned x, unsigned y,	23 // calling the function in the inner loops will cause significant overhead on

23 SkPMColor a00, SkPMColor a01,	24 // some platforms.

24 SkPMColor a10, SkPMColor a11,	25 static SK_ALWAYS_INLINE void Filter_32_opaque_neon(unsigned x, unsigned y,

25 SkPMColor *dst) {	26 SkPMColor a00, SkPMColor a01,

	27 SkPMColor a10, SkPMColor a11,

	28 SkPMColor *dst) {

26 uint8x8_t vy, vconst16_8, v16_y, vres;	29 uint8x8_t vy, vconst16_8, v16_y, vres;

27 uint16x4_t vx, vconst16_16, v16_x, tmp;	30 uint16x4_t vx, vconst16_16, v16_x, tmp;

28 uint32x2_t va0, va1;	31 uint32x2_t va0, va1;

29 uint16x8_t tmp1, tmp2;	32 uint16x8_t tmp1, tmp2;

30	33

31 vy = vdup_n_u8(y); // duplicate y into vy	34 vy = vdup_n_u8(y); // duplicate y into vy

32 vconst16_8 = vmov_n_u8(16); // set up constant in vconst16_8	35 vconst16_8 = vmov_n_u8(16); // set up constant in vconst16_8

33 v16_y = vsub_u8(vconst16_8, vy); // v16_y = 16-y	36 v16_y = vsub_u8(vconst16_8, vy); // v16_y = 16-y

34	37

35 va0 = vdup_n_u32(a00); // duplicate a00	38 va0 = vdup_n_u32(a00); // duplicate a00

(...skipping 10 matching lines...) Expand all Loading...
46	49

47 tmp = vmul_u16(vget_high_u16(tmp1), vx); // tmp = a01 * x	50 tmp = vmul_u16(vget_high_u16(tmp1), vx); // tmp = a01 * x

48 tmp = vmla_u16(tmp, vget_high_u16(tmp2), vx); // tmp += a11 * x	51 tmp = vmla_u16(tmp, vget_high_u16(tmp2), vx); // tmp += a11 * x

49 tmp = vmla_u16(tmp, vget_low_u16(tmp1), v16_x); // tmp += a00 * (16-x)	52 tmp = vmla_u16(tmp, vget_low_u16(tmp1), v16_x); // tmp += a00 * (16-x)

50 tmp = vmla_u16(tmp, vget_low_u16(tmp2), v16_x); // tmp += a10 * (16-x)	53 tmp = vmla_u16(tmp, vget_low_u16(tmp2), v16_x); // tmp += a10 * (16-x)

51	54

52 vres = vshrn_n_u16(vcombine_u16(tmp, vcreate_u16(0)), 8); // shift down resu lt by 8	55 vres = vshrn_n_u16(vcombine_u16(tmp, vcreate_u16(0)), 8); // shift down resu lt by 8

53 vst1_lane_u32(dst, vreinterpret_u32_u8(vres), 0); // store result	56 vst1_lane_u32(dst, vreinterpret_u32_u8(vres), 0); // store result

54 }	57 }

55	58

56 static inline void Filter_32_alpha_neon(unsigned x, unsigned y,	59 static SK_ALWAYS_INLINE void Filter_32_alpha_neon(unsigned x, unsigned y,

57 SkPMColor a00, SkPMColor a01,	60 SkPMColor a00, SkPMColor a01,

58 SkPMColor a10, SkPMColor a11,	61 SkPMColor a10, SkPMColor a11,

59 SkPMColor *dst, uint16_t scale) {	62 SkPMColor *dst,

	63 uint16_t scale) {

60 uint8x8_t vy, vconst16_8, v16_y, vres;	64 uint8x8_t vy, vconst16_8, v16_y, vres;

61 uint16x4_t vx, vconst16_16, v16_x, tmp, vscale;	65 uint16x4_t vx, vconst16_16, v16_x, tmp, vscale;

62 uint32x2_t va0, va1;	66 uint32x2_t va0, va1;

63 uint16x8_t tmp1, tmp2;	67 uint16x8_t tmp1, tmp2;

64	68

65 vy = vdup_n_u8(y); // duplicate y into vy	69 vy = vdup_n_u8(y); // duplicate y into vy

66 vconst16_8 = vmov_n_u8(16); // set up constant in vconst16_8	70 vconst16_8 = vmov_n_u8(16); // set up constant in vconst16_8

67 v16_y = vsub_u8(vconst16_8, vy); // v16_y = 16-y	71 v16_y = vsub_u8(vconst16_8, vy); // v16_y = 16-y

68	72

69 va0 = vdup_n_u32(a00); // duplicate a00	73 va0 = vdup_n_u32(a00); // duplicate a00

(...skipping 13 matching lines...) Expand all Loading...
83 tmp = vmla_u16(tmp, vget_low_u16(tmp1), v16_x); // tmp += a00 * (16-x)	87 tmp = vmla_u16(tmp, vget_low_u16(tmp1), v16_x); // tmp += a00 * (16-x)

84 tmp = vmla_u16(tmp, vget_low_u16(tmp2), v16_x); // tmp += a10 * (16-x)	88 tmp = vmla_u16(tmp, vget_low_u16(tmp2), v16_x); // tmp += a10 * (16-x)

85	89

86 vscale = vdup_n_u16(scale); // duplicate scale	90 vscale = vdup_n_u16(scale); // duplicate scale

87 tmp = vshr_n_u16(tmp, 8); // shift down result by 8	91 tmp = vshr_n_u16(tmp, 8); // shift down result by 8

88 tmp = vmul_u16(tmp, vscale); // multiply result by scale	92 tmp = vmul_u16(tmp, vscale); // multiply result by scale

89	93

90 vres = vshrn_n_u16(vcombine_u16(tmp, vcreate_u16(0)), 8); // shift down resu lt by 8	94 vres = vshrn_n_u16(vcombine_u16(tmp, vcreate_u16(0)), 8); // shift down resu lt by 8

91 vst1_lane_u32(dst, vreinterpret_u32_u8(vres), 0); // store result	95 vst1_lane_u32(dst, vreinterpret_u32_u8(vres), 0); // store result

92 }	96 }

OLD	NEW

« no previous file with comments | « no previous file | no next file » | no next file with comments »