source/libvpx/vp8/encoder/arm/armv6/vp8_variance_halfpixvar16x16_hv_armv6.asm - Issue 7671004: Update libvpx snapshot to v0.9.7-p1 (Cayuga).

Side by Side Diff: source/libvpx/vp8/encoder/arm/armv6/vp8_variance_halfpixvar16x16_hv_armv6.asm

Issue 7671004: Update libvpx snapshot to v0.9.7-p1 (Cayuga). (Closed) Base URL: svn://chrome-svn/chrome/trunk/deps/third_party/libvpx/

Patch Set: '' Created 9 years, 4 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

« no previous file with comments | « source/libvpx/vp8/encoder/arm/armv6/vp8_variance_halfpixvar16x16_h_armv6.asm ('k') | source/libvpx/vp8/encoder/arm/armv6/vp8_variance_halfpixvar16x16_v_armv6.asm » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

OLD	NEW
1 ;	1 ;

2 ; Copyright (c) 2011 The WebM project authors. All Rights Reserved.	2 ; Copyright (c) 2011 The WebM project authors. All Rights Reserved.

3 ;	3 ;

4 ; Use of this source code is governed by a BSD-style license	4 ; Use of this source code is governed by a BSD-style license

5 ; that can be found in the LICENSE file in the root of the source	5 ; that can be found in the LICENSE file in the root of the source

6 ; tree. An additional intellectual property rights grant can be found	6 ; tree. An additional intellectual property rights grant can be found

7 ; in the file PATENTS. All contributing project authors may	7 ; in the file PATENTS. All contributing project authors may

8 ; be found in the AUTHORS file in the root of the source tree.	8 ; be found in the AUTHORS file in the root of the source tree.

9 ;	9 ;

10	10

11	11

12 EXPORT \|vp8_variance_halfpixvar16x16_hv_armv6\|	12 EXPORT \|vp8_variance_halfpixvar16x16_hv_armv6\|

13	13

14 ARM	14 ARM

15 REQUIRE8	15 REQUIRE8

16 PRESERVE8	16 PRESERVE8

17	17

18 AREA \|\|.text\|\|, CODE, READONLY, ALIGN=2	18 AREA \|\|.text\|\|, CODE, READONLY, ALIGN=2

19	19

20 ; r0 unsigned char *src_ptr	20 ; r0 unsigned char *src_ptr

21 ; r1 int source_stride	21 ; r1 int source_stride

22 ; r2 unsigned char *ref_ptr	22 ; r2 unsigned char *ref_ptr

23 ; r3 int recon_stride	23 ; r3 int recon_stride

24 ; stack unsigned int *sse	24 ; stack unsigned int *sse

25 \|vp8_variance_halfpixvar16x16_hv_armv6\| PROC	25 \|vp8_variance_halfpixvar16x16_hv_armv6\| PROC

26	26

27 stmfd sp!, {r4-r12, lr}	27 stmfd sp!, {r4-r12, lr}

	28

	29 pld [r0, r1, lsl #0]

	30 pld [r2, r3, lsl #0]

	31

28 mov r8, #0 ; initialize sum = 0	32 mov r8, #0 ; initialize sum = 0

29 ldr r10, c80808080	33 ldr r10, c80808080

30 mov r11, #0 ; initialize sse = 0	34 mov r11, #0 ; initialize sse = 0

31 mov r12, #16 ; set loop counter to 16 (=block height)	35 mov r12, #16 ; set loop counter to 16 (=block height)

32 mov lr, #0 ; constant zero	36 mov lr, #0 ; constant zero

33 loop	37 loop

34 add r9, r0, r1 ; pointer to pixels on the next row	38 add r9, r0, r1 ; pointer to pixels on the next row

35 ; 1st 4 pixels	39 ; 1st 4 pixels

36 ldr r4, [r0, #0] ; load source pixels a, row N	40 ldr r4, [r0, #0] ; load source pixels a, row N

37 ldr r6, [r0, #1] ; load source pixels b, row N	41 ldr r6, [r0, #1] ; load source pixels b, row N

38 ldr r5, [r9, #0] ; load source pixels c, row N+1	42 ldr r5, [r9, #0] ; load source pixels c, row N+1

39 ldr r7, [r9, #1] ; load source pixels d, row N+1	43 ldr r7, [r9, #1] ; load source pixels d, row N+1

40	44

41 ; x = (a + b + 1) >> 1, interpolate pixels horizontally on row N	45 ; x = (a + b + 1) >> 1, interpolate pixels horizontally on row N

42 mvn r6, r6	46 mvn r6, r6

43 uhsub8 r4, r4, r6	47 uhsub8 r4, r4, r6

44 eor r4, r4, r10	48 eor r4, r4, r10

45 ; y = (c + d + 1) >> 1, interpolate pixels horizontally on row N+1	49 ; y = (c + d + 1) >> 1, interpolate pixels horizontally on row N+1

46 mvn r7, r7	50 mvn r7, r7

47 uhsub8 r5, r5, r7	51 uhsub8 r5, r5, r7

48 eor r5, r5, r10	52 eor r5, r5, r10

49 ; z = (x + y + 1) >> 1, interpolate half pixel values vertically	53 ; z = (x + y + 1) >> 1, interpolate half pixel values vertically

50 mvn r5, r5	54 mvn r5, r5

51 uhsub8 r4, r4, r5	55 uhsub8 r4, r4, r5

52 ldr r5, [r2, #0] ; load 4 ref pixels	56 ldr r5, [r2, #0] ; load 4 ref pixels

53 eor r4, r4, r10	57 eor r4, r4, r10

54	58

55 usub8 r6, r4, r5 ; calculate difference	59 usub8 r6, r4, r5 ; calculate difference

	60 pld [r0, r1, lsl #1]

56 sel r7, r6, lr ; select bytes with positive difference	61 sel r7, r6, lr ; select bytes with positive difference

57 usub8 r6, r5, r4 ; calculate difference with reversed operands	62 usub8 r6, r5, r4 ; calculate difference with reversed operands

	63 pld [r2, r3, lsl #1]

58 sel r6, r6, lr ; select bytes with negative difference	64 sel r6, r6, lr ; select bytes with negative difference

59	65

60 ; calculate partial sums	66 ; calculate partial sums

61 usad8 r4, r7, lr ; calculate sum of positive differences	67 usad8 r4, r7, lr ; calculate sum of positive differences

62 usad8 r5, r6, lr ; calculate sum of negative differences	68 usad8 r5, r6, lr ; calculate sum of negative differences

63 orr r6, r6, r7 ; differences of all 4 pixels	69 orr r6, r6, r7 ; differences of all 4 pixels

64 ; calculate total sum	70 ; calculate total sum

65 adds r8, r8, r4 ; add positive differences to sum	71 adds r8, r8, r4 ; add positive differences to sum

66 subs r8, r8, r5 ; substract negative differences from sum	72 subs r8, r8, r5 ; substract negative differences from sum

67	73

(...skipping 139 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
207 sub r0, r11, r0, asr #8 ; return (sse - ((sum * sum) >> 8))	213 sub r0, r11, r0, asr #8 ; return (sse - ((sum * sum) >> 8))

208	214

209 ldmfd sp!, {r4-r12, pc}	215 ldmfd sp!, {r4-r12, pc}

210	216

211 ENDP	217 ENDP

212	218

213 c80808080	219 c80808080

214 DCD 0x80808080	220 DCD 0x80808080

215	221

216 END	222 END

OLD	NEW