OLD | NEW |
1 ; Copyright (c) 2007-2008 CSIRO | 1 ; Copyright (c) 2007-2008 CSIRO |
2 ; Copyright (c) 2007-2009 Xiph.Org Foundation | 2 ; Copyright (c) 2007-2009 Xiph.Org Foundation |
3 ; Copyright (c) 2013 Parrot | 3 ; Copyright (c) 2013 Parrot |
4 ; Written by Aurélien Zanelli | 4 ; Written by Aurélien Zanelli |
5 ; | 5 ; |
6 ; Redistribution and use in source and binary forms, with or without | 6 ; Redistribution and use in source and binary forms, with or without |
7 ; modification, are permitted provided that the following conditions | 7 ; modification, are permitted provided that the following conditions |
8 ; are met: | 8 ; are met: |
9 ; | 9 ; |
10 ; - Redistributions of source code must retain the above copyright | 10 ; - Redistributions of source code must retain the above copyright |
(...skipping 135 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
146 ; Load last *y | 146 ; Load last *y |
147 VLD1.16 {d4[]}, [r5]! | 147 VLD1.16 {d4[]}, [r5]! |
148 VSRI.64 d4, d5, #16 | 148 VSRI.64 d4, d5, #16 |
149 ; Load last *x | 149 ; Load last *x |
150 VLD1.16 {d6[]}, [r4]! | 150 VLD1.16 {d6[]}, [r4]! |
151 VMLAL.S16 q0, d4, d6 | 151 VMLAL.S16 q0, d4, d6 |
152 MOV pc, lr | 152 MOV pc, lr |
153 ENDP | 153 ENDP |
154 | 154 |
155 ; opus_val32 celt_pitch_xcorr_neon(opus_val16 *_x, opus_val16 *_y, | 155 ; opus_val32 celt_pitch_xcorr_neon(opus_val16 *_x, opus_val16 *_y, |
156 ; opus_val32 *xcorr, int len, int max_pitch) | 156 ; opus_val32 *xcorr, int len, int max_pitch, int arch) |
157 celt_pitch_xcorr_neon PROC | 157 celt_pitch_xcorr_neon PROC |
158 ; input: | 158 ; input: |
159 ; r0 = opus_val16 *_x | 159 ; r0 = opus_val16 *_x |
160 ; r1 = opus_val16 *_y | 160 ; r1 = opus_val16 *_y |
161 ; r2 = opus_val32 *xcorr | 161 ; r2 = opus_val32 *xcorr |
162 ; r3 = int len | 162 ; r3 = int len |
163 ; output: | 163 ; output: |
164 ; r0 = int maxcorr | 164 ; r0 = int maxcorr |
165 ; internal usage: | 165 ; internal usage: |
166 ; r4 = opus_val16 *x (for xcorr_kernel_neon()) | 166 ; r4 = opus_val16 *x (for xcorr_kernel_neon()) |
167 ; r5 = opus_val16 *y (for xcorr_kernel_neon()) | 167 ; r5 = opus_val16 *y (for xcorr_kernel_neon()) |
168 ; r6 = int max_pitch | 168 ; r6 = int max_pitch |
169 ; r12 = int j | 169 ; r12 = int j |
170 ; q15 = int maxcorr[4] (q15 is not used by xcorr_kernel_neon()) | 170 ; q15 = int maxcorr[4] (q15 is not used by xcorr_kernel_neon()) |
| 171 ; ignored: |
| 172 ; int arch |
171 STMFD sp!, {r4-r6, lr} | 173 STMFD sp!, {r4-r6, lr} |
172 LDR r6, [sp, #16] | 174 LDR r6, [sp, #16] |
173 VMOV.S32 q15, #1 | 175 VMOV.S32 q15, #1 |
174 ; if (max_pitch < 4) goto celt_pitch_xcorr_neon_process4_done | 176 ; if (max_pitch < 4) goto celt_pitch_xcorr_neon_process4_done |
175 SUBS r6, r6, #4 | 177 SUBS r6, r6, #4 |
176 BLT celt_pitch_xcorr_neon_process4_done | 178 BLT celt_pitch_xcorr_neon_process4_done |
177 celt_pitch_xcorr_neon_process4 | 179 celt_pitch_xcorr_neon_process4 |
178 ; xcorr_kernel_neon parameters: | 180 ; xcorr_kernel_neon parameters: |
179 ; r3 = len, r4 = _x, r5 = _y, q0 = {0, 0, 0, 0} | 181 ; r3 = len, r4 = _x, r5 = _y, q0 = {0, 0, 0, 0} |
180 MOV r4, r0 | 182 MOV r4, r0 |
(...skipping 170 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
351 ; r0 = maxcorr | 353 ; r0 = maxcorr |
352 ; internal usage | 354 ; internal usage |
353 ; r4 = opus_val16 *x | 355 ; r4 = opus_val16 *x |
354 ; r5 = opus_val16 *y | 356 ; r5 = opus_val16 *y |
355 ; r6 = opus_val32 sum0 | 357 ; r6 = opus_val32 sum0 |
356 ; r7 = opus_val32 sum1 | 358 ; r7 = opus_val32 sum1 |
357 ; r8 = opus_val32 sum2 | 359 ; r8 = opus_val32 sum2 |
358 ; r9 = opus_val32 sum3 | 360 ; r9 = opus_val32 sum3 |
359 ; r1 = int max_pitch | 361 ; r1 = int max_pitch |
360 ; r12 = int j | 362 ; r12 = int j |
| 363 ; ignored: |
| 364 ; int arch |
361 STMFD sp!, {r4-r11, lr} | 365 STMFD sp!, {r4-r11, lr} |
362 MOV r5, r1 | 366 MOV r5, r1 |
363 LDR r1, [sp, #36] | 367 LDR r1, [sp, #36] |
364 MOV r4, r0 | 368 MOV r4, r0 |
365 TST r5, #3 | 369 TST r5, #3 |
366 ; maxcorr = 1 | 370 ; maxcorr = 1 |
367 MOV r0, #1 | 371 MOV r0, #1 |
368 BEQ celt_pitch_xcorr_edsp_process1u_done | 372 BEQ celt_pitch_xcorr_edsp_process1u_done |
369 ; Compute one sum at the start to make y 32-bit aligned. | 373 ; Compute one sum at the start to make y 32-bit aligned. |
370 SUBS r12, r3, #4 | 374 SUBS r12, r3, #4 |
(...skipping 167 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
538 ; xcorr[i] = sum | 542 ; xcorr[i] = sum |
539 STR r14, [r2], #4 | 543 STR r14, [r2], #4 |
540 MOVLT r0, r14 | 544 MOVLT r0, r14 |
541 celt_pitch_xcorr_edsp_done | 545 celt_pitch_xcorr_edsp_done |
542 LDMFD sp!, {r4-r11, pc} | 546 LDMFD sp!, {r4-r11, pc} |
543 ENDP | 547 ENDP |
544 | 548 |
545 ENDIF | 549 ENDIF |
546 | 550 |
547 END | 551 END |
OLD | NEW |