OLD | NEW |
1 ; | 1 ; |
2 ; Copyright (c) 2013 The WebM project authors. All Rights Reserved. | 2 ; Copyright (c) 2013 The WebM project authors. All Rights Reserved. |
3 ; | 3 ; |
4 ; Use of this source code is governed by a BSD-style license and patent | 4 ; Use of this source code is governed by a BSD-style license and patent |
5 ; grant that can be found in the LICENSE file in the root of the source | 5 ; grant that can be found in the LICENSE file in the root of the source |
6 ; tree. All contributing project authors may be found in the AUTHORS | 6 ; tree. All contributing project authors may be found in the AUTHORS |
7 ; file in the root of the source tree. | 7 ; file in the root of the source tree. |
8 ; | 8 ; |
9 | 9 |
10 EXPORT |vp9_idct32x32_1_add_neon| | 10 EXPORT |vpx_idct32x32_1_add_neon| |
11 ARM | 11 ARM |
12 REQUIRE8 | 12 REQUIRE8 |
13 PRESERVE8 | 13 PRESERVE8 |
14 | 14 |
15 AREA ||.text||, CODE, READONLY, ALIGN=2 | 15 AREA ||.text||, CODE, READONLY, ALIGN=2 |
16 | 16 |
17 ;TODO(hkuang): put the following macros in a seperate | 17 ;TODO(hkuang): put the following macros in a seperate |
18 ;file so other idct function could also use them. | 18 ;file so other idct function could also use them. |
19 MACRO | 19 MACRO |
20 LD_16x8 $src, $stride | 20 LD_16x8 $src, $stride |
(...skipping 36 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
57 vst1.8 {q8}, [$dst], $stride | 57 vst1.8 {q8}, [$dst], $stride |
58 vst1.8 {q9}, [$dst], $stride | 58 vst1.8 {q9}, [$dst], $stride |
59 vst1.8 {q10},[$dst], $stride | 59 vst1.8 {q10},[$dst], $stride |
60 vst1.8 {q11},[$dst], $stride | 60 vst1.8 {q11},[$dst], $stride |
61 vst1.8 {q12},[$dst], $stride | 61 vst1.8 {q12},[$dst], $stride |
62 vst1.8 {q13},[$dst], $stride | 62 vst1.8 {q13},[$dst], $stride |
63 vst1.8 {q14},[$dst], $stride | 63 vst1.8 {q14},[$dst], $stride |
64 vst1.8 {q15},[$dst], $stride | 64 vst1.8 {q15},[$dst], $stride |
65 MEND | 65 MEND |
66 | 66 |
67 ;void vp9_idct32x32_1_add_neon(int16_t *input, uint8_t *dest, | 67 ;void vpx_idct32x32_1_add_neon(int16_t *input, uint8_t *dest, |
68 ; int dest_stride) | 68 ; int dest_stride) |
69 ; | 69 ; |
70 ; r0 int16_t input | 70 ; r0 int16_t input |
71 ; r1 uint8_t *dest | 71 ; r1 uint8_t *dest |
72 ; r2 int dest_stride | 72 ; r2 int dest_stride |
73 | 73 |
74 |vp9_idct32x32_1_add_neon| PROC | 74 |vpx_idct32x32_1_add_neon| PROC |
75 push {lr} | 75 push {lr} |
76 pld [r1] | 76 pld [r1] |
77 add r3, r1, #16 ; r3 dest + 16 for second loop | 77 add r3, r1, #16 ; r3 dest + 16 for second loop |
78 ldrsh r0, [r0] | 78 ldrsh r0, [r0] |
79 | 79 |
80 ; generate cospi_16_64 = 11585 | 80 ; generate cospi_16_64 = 11585 |
81 mov r12, #0x2d00 | 81 mov r12, #0x2d00 |
82 add r12, #0x41 | 82 add r12, #0x41 |
83 | 83 |
84 ; out = dct_const_round_shift(input[0] * cospi_16_64) | 84 ; out = dct_const_round_shift(input[0] * cospi_16_64) |
(...skipping 48 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
133 LD_16x8 r1, r2 | 133 LD_16x8 r1, r2 |
134 ADD_DIFF_16x8 q0 | 134 ADD_DIFF_16x8 q0 |
135 ST_16x8 r12, r2 | 135 ST_16x8 r12, r2 |
136 cmp r0, #2 | 136 cmp r0, #2 |
137 moveq r1, r3 | 137 moveq r1, r3 |
138 moveq r12, r3 | 138 moveq r12, r3 |
139 cmp r0, #0 | 139 cmp r0, #0 |
140 bne diff_positive_32_32_loop | 140 bne diff_positive_32_32_loop |
141 pop {pc} | 141 pop {pc} |
142 | 142 |
143 ENDP ; |vp9_idct32x32_1_add_neon| | 143 ENDP ; |vpx_idct32x32_1_add_neon| |
144 END | 144 END |
OLD | NEW |