source/libvpx/vpx_dsp/arm/idct8x8_add_neon.asm - Issue 1302353004: libvpx: Pull from upstream

Side by Side Diff: source/libvpx/vpx_dsp/arm/idct8x8_add_neon.asm

Issue 1302353004: libvpx: Pull from upstream (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/libvpx.git@master

Patch Set: Created 5 years, 3 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 ;	1 ;

2 ; Copyright (c) 2013 The WebM project authors. All Rights Reserved.	2 ; Copyright (c) 2013 The WebM project authors. All Rights Reserved.

3 ;	3 ;

4 ; Use of this source code is governed by a BSD-style license	4 ; Use of this source code is governed by a BSD-style license

5 ; that can be found in the LICENSE file in the root of the source	5 ; that can be found in the LICENSE file in the root of the source

6 ; tree. An additional intellectual property rights grant can be found	6 ; tree. An additional intellectual property rights grant can be found

7 ; in the file PATENTS. All contributing project authors may	7 ; in the file PATENTS. All contributing project authors may

8 ; be found in the AUTHORS file in the root of the source tree.	8 ; be found in the AUTHORS file in the root of the source tree.

9 ;	9 ;

10	10

11 EXPORT \|vp9_idct8x8_64_add_neon\|	11 EXPORT \|vpx_idct8x8_64_add_neon\|

12 EXPORT \|vp9_idct8x8_12_add_neon\|	12 EXPORT \|vpx_idct8x8_12_add_neon\|

13 ARM	13 ARM

14 REQUIRE8	14 REQUIRE8

15 PRESERVE8	15 PRESERVE8

16	16

17 AREA \|\|.text\|\|, CODE, READONLY, ALIGN=2	17 AREA \|\|.text\|\|, CODE, READONLY, ALIGN=2

18	18

19 ; Parallel 1D IDCT on all the columns of a 8x8 16bit data matrix which are	19 ; Parallel 1D IDCT on all the columns of a 8x8 16bit data matrix which are

20 ; loaded in q8-q15. The output will be stored back into q8-q15 registers.	20 ; loaded in q8-q15. The output will be stored back into q8-q15 registers.

21 ; This macro will touch q0-q7 registers and use them as buffer during	21 ; This macro will touch q0-q7 registers and use them as buffer during

22 ; calculation.	22 ; calculation.

(...skipping 168 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
191 vtrn.32 q9, q11	191 vtrn.32 q9, q11

192 vtrn.32 q12, q14	192 vtrn.32 q12, q14

193 vtrn.32 q13, q15	193 vtrn.32 q13, q15

194 vtrn.16 q8, q9	194 vtrn.16 q8, q9

195 vtrn.16 q10, q11	195 vtrn.16 q10, q11

196 vtrn.16 q12, q13	196 vtrn.16 q12, q13

197 vtrn.16 q14, q15	197 vtrn.16 q14, q15

198 MEND	198 MEND

199	199

200 AREA Block, CODE, READONLY ; name this block of code	200 AREA Block, CODE, READONLY ; name this block of code

201 ;void vp9_idct8x8_64_add_neon(int16_t input, uint8_t dest, int dest_stride)	201 ;void vpx_idct8x8_64_add_neon(int16_t input, uint8_t dest, int dest_stride)

202 ;	202 ;

203 ; r0 int16_t input	203 ; r0 int16_t input

204 ; r1 uint8_t *dest	204 ; r1 uint8_t *dest

205 ; r2 int dest_stride)	205 ; r2 int dest_stride)

206	206

207 \|vp9_idct8x8_64_add_neon\| PROC	207 \|vpx_idct8x8_64_add_neon\| PROC

208 push {r4-r9}	208 push {r4-r9}

209 vpush {d8-d15}	209 vpush {d8-d15}

210 vld1.s16 {q8,q9}, [r0]!	210 vld1.s16 {q8,q9}, [r0]!

211 vld1.s16 {q10,q11}, [r0]!	211 vld1.s16 {q10,q11}, [r0]!

212 vld1.s16 {q12,q13}, [r0]!	212 vld1.s16 {q12,q13}, [r0]!

213 vld1.s16 {q14,q15}, [r0]!	213 vld1.s16 {q14,q15}, [r0]!

214	214

215 ; transpose the input data	215 ; transpose the input data

216 TRANSPOSE8X8	216 TRANSPOSE8X8

217	217

(...skipping 83 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
301 vst1.64 {d2}, [r0], r2	301 vst1.64 {d2}, [r0], r2

302 vst1.64 {d3}, [r0], r2	302 vst1.64 {d3}, [r0], r2

303 vst1.64 {d4}, [r0], r2	303 vst1.64 {d4}, [r0], r2

304 vst1.64 {d5}, [r0], r2	304 vst1.64 {d5}, [r0], r2

305 vst1.64 {d6}, [r0], r2	305 vst1.64 {d6}, [r0], r2

306 vst1.64 {d7}, [r0], r2	306 vst1.64 {d7}, [r0], r2

307	307

308 vpop {d8-d15}	308 vpop {d8-d15}

309 pop {r4-r9}	309 pop {r4-r9}

310 bx lr	310 bx lr

311 ENDP ; \|vp9_idct8x8_64_add_neon\|	311 ENDP ; \|vpx_idct8x8_64_add_neon\|

312	312

313 ;void vp9_idct8x8_12_add_neon(int16_t input, uint8_t dest, int dest_stride)	313 ;void vpx_idct8x8_12_add_neon(int16_t input, uint8_t dest, int dest_stride)

314 ;	314 ;

315 ; r0 int16_t input	315 ; r0 int16_t input

316 ; r1 uint8_t *dest	316 ; r1 uint8_t *dest

317 ; r2 int dest_stride)	317 ; r2 int dest_stride)

318	318

319 \|vp9_idct8x8_12_add_neon\| PROC	319 \|vpx_idct8x8_12_add_neon\| PROC

320 push {r4-r9}	320 push {r4-r9}

321 vpush {d8-d15}	321 vpush {d8-d15}

322 vld1.s16 {q8,q9}, [r0]!	322 vld1.s16 {q8,q9}, [r0]!

323 vld1.s16 {q10,q11}, [r0]!	323 vld1.s16 {q10,q11}, [r0]!

324 vld1.s16 {q12,q13}, [r0]!	324 vld1.s16 {q12,q13}, [r0]!

325 vld1.s16 {q14,q15}, [r0]!	325 vld1.s16 {q14,q15}, [r0]!

326	326

327 ; transpose the input data	327 ; transpose the input data

328 TRANSPOSE8X8	328 TRANSPOSE8X8

329	329

(...skipping 177 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
507 vst1.64 {d2}, [r0], r2	507 vst1.64 {d2}, [r0], r2

508 vst1.64 {d3}, [r0], r2	508 vst1.64 {d3}, [r0], r2

509 vst1.64 {d4}, [r0], r2	509 vst1.64 {d4}, [r0], r2

510 vst1.64 {d5}, [r0], r2	510 vst1.64 {d5}, [r0], r2

511 vst1.64 {d6}, [r0], r2	511 vst1.64 {d6}, [r0], r2

512 vst1.64 {d7}, [r0], r2	512 vst1.64 {d7}, [r0], r2

513	513

514 vpop {d8-d15}	514 vpop {d8-d15}

515 pop {r4-r9}	515 pop {r4-r9}

516 bx lr	516 bx lr

517 ENDP ; \|vp9_idct8x8_12_add_neon\|	517 ENDP ; \|vpx_idct8x8_12_add_neon\|

518	518

519 END	519 END

OLD	NEW

« no previous file with comments | « source/libvpx/vpx_dsp/arm/idct8x8_1_add_neon.c ('k') | source/libvpx/vpx_dsp/arm/idct8x8_add_neon.c » ('j') | no next file with comments »