OLD | NEW |
1 ; Copyright (c) 2007-2008 CSIRO | 1 ; Copyright (c) 2007-2008 CSIRO |
2 ; Copyright (c) 2007-2009 Xiph.Org Foundation | 2 ; Copyright (c) 2007-2009 Xiph.Org Foundation |
3 ; Copyright (c) 2013 Parrot | 3 ; Copyright (c) 2013 Parrot |
4 ; Written by Aurélien Zanelli | 4 ; Written by Aurélien Zanelli |
5 ; | 5 ; |
6 ; Redistribution and use in source and binary forms, with or without | 6 ; Redistribution and use in source and binary forms, with or without |
7 ; modification, are permitted provided that the following conditions | 7 ; modification, are permitted provided that the following conditions |
8 ; are met: | 8 ; are met: |
9 ; | 9 ; |
10 ; - Redistributions of source code must retain the above copyright | 10 ; - Redistributions of source code must retain the above copyright |
(...skipping 291 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
302 SMLATT r8, r14, r10, r8 ; sum[2] = MAC16_16(sum[2],x_3,y_5) | 302 SMLATT r8, r14, r10, r8 ; sum[2] = MAC16_16(sum[2],x_3,y_5) |
303 SMLATB r9, r14, r11, r9 ; sum[3] = MAC16_16(sum[3],x_3,y_6) | 303 SMLATB r9, r14, r11, r9 ; sum[3] = MAC16_16(sum[3],x_3,y_6) |
304 BGT xcorr_kernel_edsp_process4 | 304 BGT xcorr_kernel_edsp_process4 |
305 xcorr_kernel_edsp_process4_done | 305 xcorr_kernel_edsp_process4_done |
306 ADDS r2, r2, #4 | 306 ADDS r2, r2, #4 |
307 BLE xcorr_kernel_edsp_done | 307 BLE xcorr_kernel_edsp_done |
308 LDRH r12, [r4], #2 ; r12 = *x++ | 308 LDRH r12, [r4], #2 ; r12 = *x++ |
309 SUBS r2, r2, #1 ; j-- | 309 SUBS r2, r2, #1 ; j-- |
310 ; Stall | 310 ; Stall |
311 SMLABB r6, r12, r10, r6 ; sum[0] = MAC16_16(sum[0],x,y_0) | 311 SMLABB r6, r12, r10, r6 ; sum[0] = MAC16_16(sum[0],x,y_0) |
312 LDRGTH r14, [r4], #2 ; r14 = *x++ | 312 LDRHGT r14, [r4], #2 ; r14 = *x++ |
313 SMLABT r7, r12, r10, r7 ; sum[1] = MAC16_16(sum[1],x,y_1) | 313 SMLABT r7, r12, r10, r7 ; sum[1] = MAC16_16(sum[1],x,y_1) |
314 SMLABB r8, r12, r11, r8 ; sum[2] = MAC16_16(sum[2],x,y_2) | 314 SMLABB r8, r12, r11, r8 ; sum[2] = MAC16_16(sum[2],x,y_2) |
315 SMLABT r9, r12, r11, r9 ; sum[3] = MAC16_16(sum[3],x,y_3) | 315 SMLABT r9, r12, r11, r9 ; sum[3] = MAC16_16(sum[3],x,y_3) |
316 BLE xcorr_kernel_edsp_done | 316 BLE xcorr_kernel_edsp_done |
317 SMLABT r6, r14, r10, r6 ; sum[0] = MAC16_16(sum[0],x,y_1) | 317 SMLABT r6, r14, r10, r6 ; sum[0] = MAC16_16(sum[0],x,y_1) |
318 SUBS r2, r2, #1 ; j-- | 318 SUBS r2, r2, #1 ; j-- |
319 SMLABB r7, r14, r11, r7 ; sum[1] = MAC16_16(sum[1],x,y_2) | 319 SMLABB r7, r14, r11, r7 ; sum[1] = MAC16_16(sum[1],x,y_2) |
320 LDRH r10, [r5], #2 ; r10 = y_4 = *y++ | 320 LDRH r10, [r5], #2 ; r10 = y_4 = *y++ |
321 SMLABT r8, r14, r11, r8 ; sum[2] = MAC16_16(sum[2],x,y_3) | 321 SMLABT r8, r14, r11, r8 ; sum[2] = MAC16_16(sum[2],x,y_3) |
322 LDRGTH r12, [r4], #2 ; r12 = *x++ | 322 LDRHGT r12, [r4], #2 ; r12 = *x++ |
323 SMLABB r9, r14, r10, r9 ; sum[3] = MAC16_16(sum[3],x,y_4) | 323 SMLABB r9, r14, r10, r9 ; sum[3] = MAC16_16(sum[3],x,y_4) |
324 BLE xcorr_kernel_edsp_done | 324 BLE xcorr_kernel_edsp_done |
325 SMLABB r6, r12, r11, r6 ; sum[0] = MAC16_16(sum[0],tmp,y_2) | 325 SMLABB r6, r12, r11, r6 ; sum[0] = MAC16_16(sum[0],tmp,y_2) |
326 CMP r2, #1 ; j-- | 326 CMP r2, #1 ; j-- |
327 SMLABT r7, r12, r11, r7 ; sum[1] = MAC16_16(sum[1],tmp,y_3) | 327 SMLABT r7, r12, r11, r7 ; sum[1] = MAC16_16(sum[1],tmp,y_3) |
328 LDRH r2, [r5], #2 ; r2 = y_5 = *y++ | 328 LDRH r2, [r5], #2 ; r2 = y_5 = *y++ |
329 SMLABB r8, r12, r10, r8 ; sum[2] = MAC16_16(sum[2],tmp,y_4) | 329 SMLABB r8, r12, r10, r8 ; sum[2] = MAC16_16(sum[2],tmp,y_4) |
330 LDRGTH r14, [r4] ; r14 = *x | 330 LDRHGT r14, [r4] ; r14 = *x |
331 SMLABB r9, r12, r2, r9 ; sum[3] = MAC16_16(sum[3],tmp,y_5) | 331 SMLABB r9, r12, r2, r9 ; sum[3] = MAC16_16(sum[3],tmp,y_5) |
332 BLE xcorr_kernel_edsp_done | 332 BLE xcorr_kernel_edsp_done |
333 SMLABT r6, r14, r11, r6 ; sum[0] = MAC16_16(sum[0],tmp,y_3) | 333 SMLABT r6, r14, r11, r6 ; sum[0] = MAC16_16(sum[0],tmp,y_3) |
334 LDRH r11, [r5] ; r11 = y_6 = *y | 334 LDRH r11, [r5] ; r11 = y_6 = *y |
335 SMLABB r7, r14, r10, r7 ; sum[1] = MAC16_16(sum[1],tmp,y_4) | 335 SMLABB r7, r14, r10, r7 ; sum[1] = MAC16_16(sum[1],tmp,y_4) |
336 SMLABB r8, r14, r2, r8 ; sum[2] = MAC16_16(sum[2],tmp,y_5) | 336 SMLABB r8, r14, r2, r8 ; sum[2] = MAC16_16(sum[2],tmp,y_5) |
337 SMLABB r9, r14, r11, r9 ; sum[3] = MAC16_16(sum[3],tmp,y_6) | 337 SMLABB r9, r14, r11, r9 ; sum[3] = MAC16_16(sum[3],tmp,y_6) |
338 xcorr_kernel_edsp_done | 338 xcorr_kernel_edsp_done |
339 LDMFD sp!, {r2,r4,r5,pc} | 339 LDMFD sp!, {r2,r4,r5,pc} |
340 ENDP | 340 ENDP |
(...skipping 39 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
380 LDR r8, [r5], #4 | 380 LDR r8, [r5], #4 |
381 SMLABT r14, r7, r9, r14 ; sum = MAC16_16(sum, x_2, y_2) | 381 SMLABT r14, r7, r9, r14 ; sum = MAC16_16(sum, x_2, y_2) |
382 SUBS r12, r12, #4 ; j-=4 | 382 SUBS r12, r12, #4 ; j-=4 |
383 SMLATB r14, r7, r8, r14 ; sum = MAC16_16(sum, x_3, y_3) | 383 SMLATB r14, r7, r8, r14 ; sum = MAC16_16(sum, x_3, y_3) |
384 LDRGT r6, [r4], #4 | 384 LDRGT r6, [r4], #4 |
385 BGT celt_pitch_xcorr_edsp_process1u_loop4 | 385 BGT celt_pitch_xcorr_edsp_process1u_loop4 |
386 MOV r8, r8, LSR #16 | 386 MOV r8, r8, LSR #16 |
387 celt_pitch_xcorr_edsp_process1u_loop4_done | 387 celt_pitch_xcorr_edsp_process1u_loop4_done |
388 ADDS r12, r12, #4 | 388 ADDS r12, r12, #4 |
389 celt_pitch_xcorr_edsp_process1u_loop1 | 389 celt_pitch_xcorr_edsp_process1u_loop1 |
390 LDRGEH r6, [r4], #2 | 390 LDRHGE r6, [r4], #2 |
391 ; Stall | 391 ; Stall |
392 SMLABBGE r14, r6, r8, r14 ; sum = MAC16_16(sum, *x, *y) | 392 SMLABBGE r14, r6, r8, r14 ; sum = MAC16_16(sum, *x, *y) |
393 SUBGES r12, r12, #1 | 393 SUBSGE r12, r12, #1 |
394 LDRGTH r8, [r5], #2 | 394 LDRHGT r8, [r5], #2 |
395 BGT celt_pitch_xcorr_edsp_process1u_loop1 | 395 BGT celt_pitch_xcorr_edsp_process1u_loop1 |
396 ; Restore _x | 396 ; Restore _x |
397 SUB r4, r4, r3, LSL #1 | 397 SUB r4, r4, r3, LSL #1 |
398 ; Restore and advance _y | 398 ; Restore and advance _y |
399 SUB r5, r5, r3, LSL #1 | 399 SUB r5, r5, r3, LSL #1 |
400 ; maxcorr = max(maxcorr, sum) | 400 ; maxcorr = max(maxcorr, sum) |
401 CMP r0, r14 | 401 CMP r0, r14 |
402 ADD r5, r5, #2 | 402 ADD r5, r5, #2 |
403 MOVLT r0, r14 | 403 MOVLT r0, r14 |
404 SUBS r1, r1, #1 | 404 SUBS r1, r1, #1 |
(...skipping 62 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
467 SMLABT r11, r6, r8, r11 ; sum1 = MAC16_16(sum1, x_0, y_1) | 467 SMLABT r11, r6, r8, r11 ; sum1 = MAC16_16(sum1, x_0, y_1) |
468 SUB r12, r12, #2 | 468 SUB r12, r12, #2 |
469 SMLATT r10, r6, r8, r10 ; sum0 = MAC16_16(sum0, x_1, y_1) | 469 SMLATT r10, r6, r8, r10 ; sum0 = MAC16_16(sum0, x_1, y_1) |
470 MOV r8, r9 | 470 MOV r8, r9 |
471 SMLATB r11, r6, r9, r11 ; sum1 = MAC16_16(sum1, x_1, y_2) | 471 SMLATB r11, r6, r9, r11 ; sum1 = MAC16_16(sum1, x_1, y_2) |
472 celt_pitch_xcorr_edsp_process2_1 | 472 celt_pitch_xcorr_edsp_process2_1 |
473 LDRH r6, [r4], #2 | 473 LDRH r6, [r4], #2 |
474 ADDS r12, r12, #1 | 474 ADDS r12, r12, #1 |
475 ; Stall | 475 ; Stall |
476 SMLABB r10, r6, r8, r10 ; sum0 = MAC16_16(sum0, x_0, y_0) | 476 SMLABB r10, r6, r8, r10 ; sum0 = MAC16_16(sum0, x_0, y_0) |
477 LDRGTH r7, [r4], #2 | 477 LDRHGT r7, [r4], #2 |
478 SMLABT r11, r6, r8, r11 ; sum1 = MAC16_16(sum1, x_0, y_1) | 478 SMLABT r11, r6, r8, r11 ; sum1 = MAC16_16(sum1, x_0, y_1) |
479 BLE celt_pitch_xcorr_edsp_process2_done | 479 BLE celt_pitch_xcorr_edsp_process2_done |
480 LDRH r9, [r5], #2 | 480 LDRH r9, [r5], #2 |
481 SMLABT r10, r7, r8, r10 ; sum0 = MAC16_16(sum0, x_0, y_1) | 481 SMLABT r10, r7, r8, r10 ; sum0 = MAC16_16(sum0, x_0, y_1) |
482 SMLABB r11, r7, r9, r11 ; sum1 = MAC16_16(sum1, x_0, y_2) | 482 SMLABB r11, r7, r9, r11 ; sum1 = MAC16_16(sum1, x_0, y_2) |
483 celt_pitch_xcorr_edsp_process2_done | 483 celt_pitch_xcorr_edsp_process2_done |
484 ; Restore _x | 484 ; Restore _x |
485 SUB r4, r4, r3, LSL #1 | 485 SUB r4, r4, r3, LSL #1 |
486 ; Restore and advance _y | 486 ; Restore and advance _y |
487 SUB r5, r5, r3, LSL #1 | 487 SUB r5, r5, r3, LSL #1 |
(...skipping 32 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
520 BGE celt_pitch_xcorr_edsp_process1a_loop4 | 520 BGE celt_pitch_xcorr_edsp_process1a_loop4 |
521 celt_pitch_xcorr_edsp_process1a_loop_done | 521 celt_pitch_xcorr_edsp_process1a_loop_done |
522 ADDS r12, r12, #2 | 522 ADDS r12, r12, #2 |
523 LDRGE r6, [r4], #4 | 523 LDRGE r6, [r4], #4 |
524 LDRGE r8, [r5], #4 | 524 LDRGE r8, [r5], #4 |
525 ; Stall | 525 ; Stall |
526 SMLABBGE r14, r6, r8, r14 ; sum = MAC16_16(sum, x_0, y_0) | 526 SMLABBGE r14, r6, r8, r14 ; sum = MAC16_16(sum, x_0, y_0) |
527 SUBGE r12, r12, #2 | 527 SUBGE r12, r12, #2 |
528 SMLATTGE r14, r6, r8, r14 ; sum = MAC16_16(sum, x_1, y_1) | 528 SMLATTGE r14, r6, r8, r14 ; sum = MAC16_16(sum, x_1, y_1) |
529 ADDS r12, r12, #1 | 529 ADDS r12, r12, #1 |
530 LDRGEH r6, [r4], #2 | 530 LDRHGE r6, [r4], #2 |
531 LDRGEH r8, [r5], #2 | 531 LDRHGE r8, [r5], #2 |
532 ; Stall | 532 ; Stall |
533 SMLABBGE r14, r6, r8, r14 ; sum = MAC16_16(sum, *x, *y) | 533 SMLABBGE r14, r6, r8, r14 ; sum = MAC16_16(sum, *x, *y) |
534 ; maxcorr = max(maxcorr, sum) | 534 ; maxcorr = max(maxcorr, sum) |
535 CMP r0, r14 | 535 CMP r0, r14 |
536 ; xcorr[i] = sum | 536 ; xcorr[i] = sum |
537 STR r14, [r2], #4 | 537 STR r14, [r2], #4 |
538 MOVLT r0, r14 | 538 MOVLT r0, r14 |
539 celt_pitch_xcorr_edsp_done | 539 celt_pitch_xcorr_edsp_done |
540 LDMFD sp!, {r4-r11, pc} | 540 LDMFD sp!, {r4-r11, pc} |
541 ENDP | 541 ENDP |
542 | 542 |
543 ENDIF | 543 ENDIF |
544 | 544 |
545 END | 545 END |
OLD | NEW |