source/libvpx/vp8/common/arm/neon/sixtappredict16x16_neon.asm - Issue 7671004: Update libvpx snapshot to v0.9.7-p1 (Cayuga).

Side by Side Diff: source/libvpx/vp8/common/arm/neon/sixtappredict16x16_neon.asm

Issue 7671004: Update libvpx snapshot to v0.9.7-p1 (Cayuga). (Closed) Base URL: svn://chrome-svn/chrome/trunk/deps/third_party/libvpx/

Patch Set: '' Created 9 years, 4 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 ;	1 ;

2 ; Copyright (c) 2010 The WebM project authors. All Rights Reserved.	2 ; Copyright (c) 2010 The WebM project authors. All Rights Reserved.

3 ;	3 ;

4 ; Use of this source code is governed by a BSD-style license	4 ; Use of this source code is governed by a BSD-style license

5 ; that can be found in the LICENSE file in the root of the source	5 ; that can be found in the LICENSE file in the root of the source

6 ; tree. An additional intellectual property rights grant can be found	6 ; tree. An additional intellectual property rights grant can be found

7 ; in the file PATENTS. All contributing project authors may	7 ; in the file PATENTS. All contributing project authors may

8 ; be found in the AUTHORS file in the root of the source tree.	8 ; be found in the AUTHORS file in the root of the source tree.

9 ;	9 ;

10	10

11	11

12 EXPORT \|vp8_sixtap_predict16x16_neon\|	12 EXPORT \|vp8_sixtap_predict16x16_neon\|

13 ARM	13 ARM

14 REQUIRE8	14 REQUIRE8

15 PRESERVE8	15 PRESERVE8

16	16

17 AREA \|\|.text\|\|, CODE, READONLY, ALIGN=2	17 AREA \|\|.text\|\|, CODE, READONLY, ALIGN=2

	18

	19 filter16_coeff

	20 DCD 0, 0, 128, 0, 0, 0, 0, 0

	21 DCD 0, -6, 123, 12, -1, 0, 0, 0

	22 DCD 2, -11, 108, 36, -8, 1, 0, 0

	23 DCD 0, -9, 93, 50, -6, 0, 0, 0

	24 DCD 3, -16, 77, 77, -16, 3, 0, 0

	25 DCD 0, -6, 50, 93, -9, 0, 0, 0

	26 DCD 1, -8, 36, 108, -11, 2, 0, 0

	27 DCD 0, -1, 12, 123, -6, 0, 0, 0

	28

18 ; r0 unsigned char *src_ptr,	29 ; r0 unsigned char *src_ptr,

19 ; r1 int src_pixels_per_line,	30 ; r1 int src_pixels_per_line,

20 ; r2 int xoffset,	31 ; r2 int xoffset,

21 ; r3 int yoffset,	32 ; r3 int yoffset,

22 ; r4 unsigned char *dst_ptr,	33 ; r4 unsigned char *dst_ptr,

23 ; stack(r5) int dst_pitch	34 ; stack(r5) int dst_pitch

24	35

25 ;Note: To take advantage of 8-bit mulplication instruction in NEON. First apply abs() to	36 ;Note: To take advantage of 8-bit mulplication instruction in NEON. First apply abs() to

26 ; filter coeffs to make them u8. Then, use vmlsl for negtive coeffs. After multi plication,	37 ; filter coeffs to make them u8. Then, use vmlsl for negtive coeffs. After multi plication,

27 ; the result can be negtive. So, I treat the result as s16. But, since it is als o possible	38 ; the result can be negtive. So, I treat the result as s16. But, since it is als o possible

28 ; that the result can be a large positive number (> 2^15-1), which could be conf used as a	39 ; that the result can be a large positive number (> 2^15-1), which could be conf used as a

29 ; negtive number. To avoid that error, apply filter coeffs in the order of 0, 1, 4 ,5 ,2,	40 ; negtive number. To avoid that error, apply filter coeffs in the order of 0, 1, 4 ,5 ,2,

30 ; which ensures that the result stays in s16 range. Finally, saturated add the r esult by	41 ; which ensures that the result stays in s16 range. Finally, saturated add the r esult by

31 ; applying 3rd filter coeff. Same applys to other filter functions.	42 ; applying 3rd filter coeff. Same applys to other filter functions.

32	43

33 \|vp8_sixtap_predict16x16_neon\| PROC	44 \|vp8_sixtap_predict16x16_neon\| PROC

34 push {r4-r5, lr}	45 push {r4-r5, lr}

35	46

36 ldr r12, _filter16_coeff_	47 adr r12, filter16_coeff

37 ldr r4, [sp, #12] ;load parameters from stack	48 ldr r4, [sp, #12] ;load parameters from stack

38 ldr r5, [sp, #16] ;load parameters from stack	49 ldr r5, [sp, #16] ;load parameters from stack

39	50

40 cmp r2, #0 ;skip first_pass filter if xoffset=0	51 cmp r2, #0 ;skip first_pass filter if xoffset=0

41 beq secondpass_filter16x16_only	52 beq secondpass_filter16x16_only

42	53

43 add r2, r12, r2, lsl #5 ;calculate filter location	54 add r2, r12, r2, lsl #5 ;calculate filter location

44	55

45 cmp r3, #0 ;skip second_pass filter if yoffset= 0	56 cmp r3, #0 ;skip second_pass filter if yoffset= 0

46	57

(...skipping 422 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
469 sub r4, r4, r5, lsl #4	480 sub r4, r4, r5, lsl #4

470 add r4, r4, #8	481 add r4, r4, #8

471	482

472 bne filt_blk2d_spo16x16_outloop_neon	483 bne filt_blk2d_spo16x16_outloop_neon

473	484

474 pop {r4-r5,pc}	485 pop {r4-r5,pc}

475	486

476 ENDP	487 ENDP

477	488

478 ;-----------------	489 ;-----------------

479

480 _filter16_coeff_

481 DCD filter16_coeff

482 filter16_coeff

483 DCD 0, 0, 128, 0, 0, 0, 0, 0

484 DCD 0, -6, 123, 12, -1, 0, 0, 0

485 DCD 2, -11, 108, 36, -8, 1, 0, 0

486 DCD 0, -9, 93, 50, -6, 0, 0, 0

487 DCD 3, -16, 77, 77, -16, 3, 0, 0

488 DCD 0, -6, 50, 93, -9, 0, 0, 0

489 DCD 1, -8, 36, 108, -11, 2, 0, 0

490 DCD 0, -1, 12, 123, -6, 0, 0, 0

491

492 END	490 END

OLD	NEW