Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(882)

Side by Side Diff: source/libvpx/vp8/encoder/arm/armv6/vp8_mse16x16_armv6.asm

Issue 1162573005: libvpx: Pull from upstream (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/libvpx.git@master
Patch Set: Created 5 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 ;
2 ; Copyright (c) 2011 The WebM project authors. All Rights Reserved.
3 ;
4 ; Use of this source code is governed by a BSD-style license
5 ; that can be found in the LICENSE file in the root of the source
6 ; tree. An additional intellectual property rights grant can be found
7 ; in the file PATENTS. All contributing project authors may
8 ; be found in the AUTHORS file in the root of the source tree.
9 ;
10
11
12 EXPORT |vp8_mse16x16_armv6|
13
14 ARM
15
16 AREA ||.text||, CODE, READONLY, ALIGN=2
17
18 ; r0 unsigned char *src_ptr
19 ; r1 int source_stride
20 ; r2 unsigned char *ref_ptr
21 ; r3 int recon_stride
22 ; stack unsigned int *sse
23 ;
24 ;note: Based on vp8_variance16x16_armv6. In this function, sum is never used.
25 ; So, we can remove this part of calculation.
26
27 |vp8_mse16x16_armv6| PROC
28
29 push {r4-r9, lr}
30
31 pld [r0, r1, lsl #0]
32 pld [r2, r3, lsl #0]
33
34 mov r12, #16 ; set loop counter to 16 (=block height)
35 mov r4, #0 ; initialize sse = 0
36
37 loop
38 ; 1st 4 pixels
39 ldr r5, [r0, #0x0] ; load 4 src pixels
40 ldr r6, [r2, #0x0] ; load 4 ref pixels
41
42 mov lr, #0 ; constant zero
43
44 usub8 r8, r5, r6 ; calculate difference
45 pld [r0, r1, lsl #1]
46 sel r7, r8, lr ; select bytes with positive difference
47 usub8 r9, r6, r5 ; calculate difference with reversed operands
48 pld [r2, r3, lsl #1]
49 sel r8, r9, lr ; select bytes with negative difference
50
51 ; calculate partial sums
52 usad8 r5, r7, lr ; calculate sum of positive differences
53 usad8 r6, r8, lr ; calculate sum of negative differences
54 orr r8, r8, r7 ; differences of all 4 pixels
55
56 ldr r5, [r0, #0x4] ; load 4 src pixels
57
58 ; calculate sse
59 uxtb16 r6, r8 ; byte (two pixels) to halfwords
60 uxtb16 r7, r8, ror #8 ; another two pixels to halfwords
61 smlad r4, r6, r6, r4 ; dual signed multiply, add and accumulate (1)
62
63 ; 2nd 4 pixels
64 ldr r6, [r2, #0x4] ; load 4 ref pixels
65 smlad r4, r7, r7, r4 ; dual signed multiply, add and accumulate (2)
66
67 usub8 r8, r5, r6 ; calculate difference
68 sel r7, r8, lr ; select bytes with positive difference
69 usub8 r9, r6, r5 ; calculate difference with reversed operands
70 sel r8, r9, lr ; select bytes with negative difference
71
72 ; calculate partial sums
73 usad8 r5, r7, lr ; calculate sum of positive differences
74 usad8 r6, r8, lr ; calculate sum of negative differences
75 orr r8, r8, r7 ; differences of all 4 pixels
76 ldr r5, [r0, #0x8] ; load 4 src pixels
77 ; calculate sse
78 uxtb16 r6, r8 ; byte (two pixels) to halfwords
79 uxtb16 r7, r8, ror #8 ; another two pixels to halfwords
80 smlad r4, r6, r6, r4 ; dual signed multiply, add and accumulate (1)
81
82 ; 3rd 4 pixels
83 ldr r6, [r2, #0x8] ; load 4 ref pixels
84 smlad r4, r7, r7, r4 ; dual signed multiply, add and accumulate (2)
85
86 usub8 r8, r5, r6 ; calculate difference
87 sel r7, r8, lr ; select bytes with positive difference
88 usub8 r9, r6, r5 ; calculate difference with reversed operands
89 sel r8, r9, lr ; select bytes with negative difference
90
91 ; calculate partial sums
92 usad8 r5, r7, lr ; calculate sum of positive differences
93 usad8 r6, r8, lr ; calculate sum of negative differences
94 orr r8, r8, r7 ; differences of all 4 pixels
95
96 ldr r5, [r0, #0xc] ; load 4 src pixels
97
98 ; calculate sse
99 uxtb16 r6, r8 ; byte (two pixels) to halfwords
100 uxtb16 r7, r8, ror #8 ; another two pixels to halfwords
101 smlad r4, r6, r6, r4 ; dual signed multiply, add and accumulate (1)
102
103 ; 4th 4 pixels
104 ldr r6, [r2, #0xc] ; load 4 ref pixels
105 smlad r4, r7, r7, r4 ; dual signed multiply, add and accumulate (2)
106
107 usub8 r8, r5, r6 ; calculate difference
108 add r0, r0, r1 ; set src_ptr to next row
109 sel r7, r8, lr ; select bytes with positive difference
110 usub8 r9, r6, r5 ; calculate difference with reversed operands
111 add r2, r2, r3 ; set dst_ptr to next row
112 sel r8, r9, lr ; select bytes with negative difference
113
114 ; calculate partial sums
115 usad8 r5, r7, lr ; calculate sum of positive differences
116 usad8 r6, r8, lr ; calculate sum of negative differences
117 orr r8, r8, r7 ; differences of all 4 pixels
118
119 subs r12, r12, #1 ; next row
120
121 ; calculate sse
122 uxtb16 r6, r8 ; byte (two pixels) to halfwords
123 uxtb16 r7, r8, ror #8 ; another two pixels to halfwords
124 smlad r4, r6, r6, r4 ; dual signed multiply, add and accumulate (1)
125 smlad r4, r7, r7, r4 ; dual signed multiply, add and accumulate (2)
126
127 bne loop
128
129 ; return stuff
130 ldr r1, [sp, #28] ; get address of sse
131 mov r0, r4 ; return sse
132 str r4, [r1] ; store sse
133
134 pop {r4-r9, pc}
135
136 ENDP
137
138 END
OLDNEW
« no previous file with comments | « source/libvpx/vp8/decoder/onyxd_if.c ('k') | source/libvpx/vp8/encoder/arm/neon/vp8_mse16x16_neon.c » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698