source/libvpx/vpx_dsp/arm/idct32x32_add_neon.asm - Issue 1302353004: libvpx: Pull from upstream

Side by Side Diff: source/libvpx/vpx_dsp/arm/idct32x32_add_neon.asm

Issue 1302353004: libvpx: Pull from upstream (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/libvpx.git@master

Patch Set: Created 5 years, 3 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 ;	1 ;

2 ; Copyright (c) 2013 The WebM project authors. All Rights Reserved.	2 ; Copyright (c) 2013 The WebM project authors. All Rights Reserved.

3 ;	3 ;

4 ; Use of this source code is governed by a BSD-style license	4 ; Use of this source code is governed by a BSD-style license

5 ; that can be found in the LICENSE file in the root of the source	5 ; that can be found in the LICENSE file in the root of the source

6 ; tree. An additional intellectual property rights grant can be found	6 ; tree. An additional intellectual property rights grant can be found

7 ; in the file PATENTS. All contributing project authors may	7 ; in the file PATENTS. All contributing project authors may

8 ; be found in the AUTHORS file in the root of the source tree.	8 ; be found in the AUTHORS file in the root of the source tree.

9 ;	9 ;

10	10

(...skipping 25 matching lines...) Expand all Loading...
36 cospi_24_64 EQU 6270	36 cospi_24_64 EQU 6270

37 cospi_25_64 EQU 5520	37 cospi_25_64 EQU 5520

38 cospi_26_64 EQU 4756	38 cospi_26_64 EQU 4756

39 cospi_27_64 EQU 3981	39 cospi_27_64 EQU 3981

40 cospi_28_64 EQU 3196	40 cospi_28_64 EQU 3196

41 cospi_29_64 EQU 2404	41 cospi_29_64 EQU 2404

42 cospi_30_64 EQU 1606	42 cospi_30_64 EQU 1606

43 cospi_31_64 EQU 804	43 cospi_31_64 EQU 804

44	44

45	45

46 EXPORT \|vp9_idct32x32_1024_add_neon\|	46 EXPORT \|vpx_idct32x32_1024_add_neon\|

47 ARM	47 ARM

48 REQUIRE8	48 REQUIRE8

49 PRESERVE8	49 PRESERVE8

50	50

51 AREA \|\|.text\|\|, CODE, READONLY, ALIGN=2	51 AREA \|\|.text\|\|, CODE, READONLY, ALIGN=2

52	52

53 AREA Block, CODE, READONLY	53 AREA Block, CODE, READONLY

54	54

55 ; --------------------------------------------------------------------------	55 ; --------------------------------------------------------------------------

56 ; Load from transposed_buffer	56 ; Load from transposed_buffer

(...skipping 224 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
281 MEND	281 MEND

282 ; --------------------------------------------------------------------------	282 ; --------------------------------------------------------------------------

283 ; Touches q8-q12, q15 (q13-q14 are preserved)	283 ; Touches q8-q12, q15 (q13-q14 are preserved)

284 ; valid output registers are anything but q8-q11	284 ; valid output registers are anything but q8-q11

285 MACRO	285 MACRO

286 DO_BUTTERFLY_STD $first_constant, $second_constant, $reg1, $reg2, $reg3, $re g4	286 DO_BUTTERFLY_STD $first_constant, $second_constant, $reg1, $reg2, $reg3, $re g4

287 DO_BUTTERFLY d28, d29, d26, d27, $first_constant, $second_constant, $reg1, $ reg2, $reg3, $reg4	287 DO_BUTTERFLY d28, d29, d26, d27, $first_constant, $second_constant, $reg1, $ reg2, $reg3, $reg4

288 MEND	288 MEND

289 ; --------------------------------------------------------------------------	289 ; --------------------------------------------------------------------------

290	290

291 ;void vp9_idct32x32_1024_add_neon(int16_t input, uint8_t dest, int dest_stride );	291 ;void vpx_idct32x32_1024_add_neon(int16_t input, uint8_t dest, int dest_stride );

292 ;	292 ;

293 ; r0 int16_t *input,	293 ; r0 int16_t *input,

294 ; r1 uint8_t *dest,	294 ; r1 uint8_t *dest,

295 ; r2 int dest_stride)	295 ; r2 int dest_stride)

296 ; loop counters	296 ; loop counters

297 ; r4 bands loop counter	297 ; r4 bands loop counter

298 ; r5 pass loop counter	298 ; r5 pass loop counter

299 ; r8 transpose loop counter	299 ; r8 transpose loop counter

300 ; combine-add pointers	300 ; combine-add pointers

301 ; r6 dest + 31 * dest_stride, descending (30, 29, 28, ...)	301 ; r6 dest + 31 * dest_stride, descending (30, 29, 28, ...)

302 ; r7 dest + 0 * dest_stride, ascending (1, 2, 3, ...)	302 ; r7 dest + 0 * dest_stride, ascending (1, 2, 3, ...)

303 ; r9 dest + 15 * dest_stride, descending (14, 13, 12, ...)	303 ; r9 dest + 15 * dest_stride, descending (14, 13, 12, ...)

304 ; r10 dest + 16 * dest_stride, ascending (17, 18, 19, ...)	304 ; r10 dest + 16 * dest_stride, ascending (17, 18, 19, ...)

305	305

306 \|vp9_idct32x32_1024_add_neon\| PROC	306 \|vpx_idct32x32_1024_add_neon\| PROC

307 ; This function does one pass of idct32x32 transform.	307 ; This function does one pass of idct32x32 transform.

308 ;	308 ;

309 ; This is done by transposing the input and then doing a 1d transform on	309 ; This is done by transposing the input and then doing a 1d transform on

310 ; columns. In the first pass, the transposed columns are the original	310 ; columns. In the first pass, the transposed columns are the original

311 ; rows. In the second pass, after the transposition, the colums are the	311 ; rows. In the second pass, after the transposition, the colums are the

312 ; original columns.	312 ; original columns.

313 ; The 1d transform is done by looping over bands of eight columns (the	313 ; The 1d transform is done by looping over bands of eight columns (the

314 ; idct32_bands loop). For each band, the transform input transposition	314 ; idct32_bands loop). For each band, the transform input transposition

315 ; is done on demand, one band of four 8x8 matrices at a time. The four	315 ; is done on demand, one band of four 8x8 matrices at a time. The four

316 ; matrices are transposed by pairs (the idct32_transpose_pair loop).	316 ; matrices are transposed by pairs (the idct32_transpose_pair loop).

(...skipping 971 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1288	1288

1289 ; bands loop processing	1289 ; bands loop processing

1290 subs r4, r4, #1	1290 subs r4, r4, #1

1291 bne idct32_bands_loop	1291 bne idct32_bands_loop

1292	1292

1293 ; stack operation	1293 ; stack operation

1294 add sp, sp, #512+2048+2048	1294 add sp, sp, #512+2048+2048

1295 vpop {d8-d15}	1295 vpop {d8-d15}

1296 pop {r4-r11}	1296 pop {r4-r11}

1297 bx lr	1297 bx lr

1298 ENDP ; \|vp9_idct32x32_1024_add_neon\|	1298 ENDP ; \|vpx_idct32x32_1024_add_neon\|

1299 END	1299 END

OLD	NEW

« no previous file with comments | « source/libvpx/vpx_dsp/arm/idct32x32_1_add_neon.c ('k') | source/libvpx/vpx_dsp/arm/idct32x32_add_neon.c » ('j') | no next file with comments »