source/libvpx/vp8/common/arm/armv6/vp8_variance16x16_armv6.asm - Issue 1162573005: libvpx: Pull from upstream

Side by Side Diff: source/libvpx/vp8/common/arm/armv6/vp8_variance16x16_armv6.asm

Issue 1162573005: libvpx: Pull from upstream (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/libvpx.git@master

Patch Set: Created 5 years, 6 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
	(Empty)
1 ;

2 ; Copyright (c) 2011 The WebM project authors. All Rights Reserved.

3 ;

4 ; Use of this source code is governed by a BSD-style license

5 ; that can be found in the LICENSE file in the root of the source

6 ; tree. An additional intellectual property rights grant can be found

7 ; in the file PATENTS. All contributing project authors may

8 ; be found in the AUTHORS file in the root of the source tree.

9 ;

10

11

12 EXPORT \|vp8_variance16x16_armv6\|

13

14 ARM

15 REQUIRE8

16 PRESERVE8

17

18 AREA \|\|.text\|\|, CODE, READONLY, ALIGN=2

19

20 ; r0 unsigned char *src_ptr

21 ; r1 int source_stride

22 ; r2 unsigned char *ref_ptr

23 ; r3 int recon_stride

24 ; stack unsigned int *sse

25 \|vp8_variance16x16_armv6\| PROC

26

27 stmfd sp!, {r4-r12, lr}

28

29 pld [r0, r1, lsl #0]

30 pld [r2, r3, lsl #0]

31

32 mov r8, #0 ; initialize sum = 0

33 mov r11, #0 ; initialize sse = 0

34 mov r12, #16 ; set loop counter to 16 (=block height)

35

36 loop

37 ; 1st 4 pixels

38 ldr r4, [r0, #0] ; load 4 src pixels

39 ldr r5, [r2, #0] ; load 4 ref pixels

40

41 mov lr, #0 ; constant zero

42

43 usub8 r6, r4, r5 ; calculate difference

44 pld [r0, r1, lsl #1]

45 sel r7, r6, lr ; select bytes with positive difference

46 usub8 r9, r5, r4 ; calculate difference with reversed operands

47 pld [r2, r3, lsl #1]

48 sel r6, r9, lr ; select bytes with negative difference

49

50 ; calculate partial sums

51 usad8 r4, r7, lr ; calculate sum of positive differences

52 usad8 r5, r6, lr ; calculate sum of negative differences

53 orr r6, r6, r7 ; differences of all 4 pixels

54 ; calculate total sum

55 adds r8, r8, r4 ; add positive differences to sum

56 subs r8, r8, r5 ; subtract negative differences from sum

57

58 ; calculate sse

59 uxtb16 r5, r6 ; byte (two pixels) to halfwords

60 uxtb16 r10, r6, ror #8 ; another two pixels to halfwords

61 smlad r11, r5, r5, r11 ; dual signed multiply, add and accumulate (1)

62

63 ; 2nd 4 pixels

64 ldr r4, [r0, #4] ; load 4 src pixels

65 ldr r5, [r2, #4] ; load 4 ref pixels

66 smlad r11, r10, r10, r11 ; dual signed multiply, add and accumulate (2)

67

68 usub8 r6, r4, r5 ; calculate difference

69 sel r7, r6, lr ; select bytes with positive difference

70 usub8 r9, r5, r4 ; calculate difference with reversed operands

71 sel r6, r9, lr ; select bytes with negative difference

72

73 ; calculate partial sums

74 usad8 r4, r7, lr ; calculate sum of positive differences

75 usad8 r5, r6, lr ; calculate sum of negative differences

76 orr r6, r6, r7 ; differences of all 4 pixels

77

78 ; calculate total sum

79 add r8, r8, r4 ; add positive differences to sum

80 sub r8, r8, r5 ; subtract negative differences from sum

81

82 ; calculate sse

83 uxtb16 r5, r6 ; byte (two pixels) to halfwords

84 uxtb16 r10, r6, ror #8 ; another two pixels to halfwords

85 smlad r11, r5, r5, r11 ; dual signed multiply, add and accumulate (1)

86

87 ; 3rd 4 pixels

88 ldr r4, [r0, #8] ; load 4 src pixels

89 ldr r5, [r2, #8] ; load 4 ref pixels

90 smlad r11, r10, r10, r11 ; dual signed multiply, add and accumulate (2)

91

92 usub8 r6, r4, r5 ; calculate difference

93 sel r7, r6, lr ; select bytes with positive difference

94 usub8 r9, r5, r4 ; calculate difference with reversed operands

95 sel r6, r9, lr ; select bytes with negative difference

96

97 ; calculate partial sums

98 usad8 r4, r7, lr ; calculate sum of positive differences

99 usad8 r5, r6, lr ; calculate sum of negative differences

100 orr r6, r6, r7 ; differences of all 4 pixels

101

102 ; calculate total sum

103 add r8, r8, r4 ; add positive differences to sum

104 sub r8, r8, r5 ; subtract negative differences from sum

105

106 ; calculate sse

107 uxtb16 r5, r6 ; byte (two pixels) to halfwords

108 uxtb16 r10, r6, ror #8 ; another two pixels to halfwords

109 smlad r11, r5, r5, r11 ; dual signed multiply, add and accumulate (1)

110

111 ; 4th 4 pixels

112 ldr r4, [r0, #12] ; load 4 src pixels

113 ldr r5, [r2, #12] ; load 4 ref pixels

114 smlad r11, r10, r10, r11 ; dual signed multiply, add and accumulate (2)

115

116 usub8 r6, r4, r5 ; calculate difference

117 add r0, r0, r1 ; set src_ptr to next row

118 sel r7, r6, lr ; select bytes with positive difference

119 usub8 r9, r5, r4 ; calculate difference with reversed operands

120 add r2, r2, r3 ; set dst_ptr to next row

121 sel r6, r9, lr ; select bytes with negative difference

122

123 ; calculate partial sums

124 usad8 r4, r7, lr ; calculate sum of positive differences

125 usad8 r5, r6, lr ; calculate sum of negative differences

126 orr r6, r6, r7 ; differences of all 4 pixels

127

128 ; calculate total sum

129 add r8, r8, r4 ; add positive differences to sum

130 sub r8, r8, r5 ; subtract negative differences from sum

131

132 ; calculate sse

133 uxtb16 r5, r6 ; byte (two pixels) to halfwords

134 uxtb16 r10, r6, ror #8 ; another two pixels to halfwords

135 smlad r11, r5, r5, r11 ; dual signed multiply, add and accumulate (1)

136 smlad r11, r10, r10, r11 ; dual signed multiply, add and accumulate (2)

137

138

139 subs r12, r12, #1

140

141 bne loop

142

143 ; return stuff

144 ldr r6, [sp, #40] ; get address of sse

145 mul r0, r8, r8 ; sum * sum

146 str r11, [r6] ; store sse

147 sub r0, r11, r0, lsr #8 ; return (sse - ((sum * sum) >> 8))

148

149 ldmfd sp!, {r4-r12, pc}

150

151 ENDP

152

153 END

154

OLD	NEW

« no previous file with comments | « source/libvpx/vp8/common/alloccommon.c ('k') | source/libvpx/vp8/common/arm/armv6/vp8_variance8x8_armv6.asm » ('j') | no next file with comments »