source/libvpx/vp9/common/arm/neon/vp9_idct16x16_neon.c - Issue 23600008: libvpx: Pull from upstream

Side by Side Diff: source/libvpx/vp9/common/arm/neon/vp9_idct16x16_neon.c

Issue 23600008: libvpx: Pull from upstream (Closed) Base URL: svn://chrome-svn/chrome/trunk/deps/third_party/libvpx/

Patch Set: Created 7 years, 3 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
(Empty)
	1 /*

	2 * Copyright (c) 2013 The WebM project authors. All Rights Reserved.

	3 *

	4 * Use of this source code is governed by a BSD-style license

	5 * that can be found in the LICENSE file in the root of the source

	6 * tree. An additional intellectual property rights grant can be found

	7 * in the file PATENTS. All contributing project authors may

	8 * be found in the AUTHORS file in the root of the source tree.

	9 */

	10

	11 #include "./vp9_rtcd.h"

	12 #include "vp9/common/vp9_common.h"

	13

	14 extern void vp9_short_idct16x16_add_neon_pass1(int16_t *input,

	15 int16_t *output,

	16 int output_stride);

	17 extern void vp9_short_idct16x16_add_neon_pass2(int16_t *src,

	18 int16_t *output,

	19 int16_t *pass1Output,

	20 int16_t skip_adding,

	21 uint8_t *dest,

	22 int dest_stride);

	23 extern void vp9_short_idct10_16x16_add_neon_pass1(int16_t *input,

	24 int16_t *output,

	25 int output_stride);

	26 extern void vp9_short_idct10_16x16_add_neon_pass2(int16_t *src,

	27 int16_t *output,

	28 int16_t *pass1Output,

	29 int16_t skip_adding,

	30 uint8_t *dest,

	31 int dest_stride);

	32 extern void save_registers();

	33 extern void restore_registers();

	34

	35

	36 void vp9_short_idct16x16_add_neon(int16_t *input,

	37 uint8_t *dest, int dest_stride) {

	38 int16_t pass1_output[16*16] = {0};

	39 int16_t row_idct_output[16*16] = {0};

	40

	41 // save d8-d15 register values.

	42 save_registers();

	43

	44 /* Parallel idct on the upper 8 rows */

	45 // First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the

	46 // stage 6 result in pass1_output.

	47 vp9_short_idct16x16_add_neon_pass1(input, pass1_output, 8);

	48

	49 // Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines

	50 // with result in pass1(pass1_output) to calculate final result in stage 7

	51 // which will be saved into row_idct_output.

	52 vp9_short_idct16x16_add_neon_pass2(input+1,

	53 row_idct_output,

	54 pass1_output,

	55 0,

	56 dest,

	57 dest_stride);

	58

	59 /* Parallel idct on the lower 8 rows */

	60 // First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the

	61 // stage 6 result in pass1_output.

	62 vp9_short_idct16x16_add_neon_pass1(input+8*16, pass1_output, 8);

	63

	64 // Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines

	65 // with result in pass1(pass1_output) to calculate final result in stage 7

	66 // which will be saved into row_idct_output.

	67 vp9_short_idct16x16_add_neon_pass2(input+8*16+1,

	68 row_idct_output+8,

	69 pass1_output,

	70 0,

	71 dest,

	72 dest_stride);

	73

	74 /* Parallel idct on the left 8 columns */

	75 // First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the

	76 // stage 6 result in pass1_output.

	77 vp9_short_idct16x16_add_neon_pass1(row_idct_output, pass1_output, 8);

	78

	79 // Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines

	80 // with result in pass1(pass1_output) to calculate final result in stage 7.

	81 // Then add the result to the destination data.

	82 vp9_short_idct16x16_add_neon_pass2(row_idct_output+1,

	83 row_idct_output,

	84 pass1_output,

	85 1,

	86 dest,

	87 dest_stride);

	88

	89 /* Parallel idct on the right 8 columns */

	90 // First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the

	91 // stage 6 result in pass1_output.

	92 vp9_short_idct16x16_add_neon_pass1(row_idct_output+8*16, pass1_output, 8);

	93

	94 // Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines

	95 // with result in pass1(pass1_output) to calculate final result in stage 7.

	96 // Then add the result to the destination data.

	97 vp9_short_idct16x16_add_neon_pass2(row_idct_output+8*16+1,

	98 row_idct_output+8,

	99 pass1_output,

	100 1,

	101 dest+8,

	102 dest_stride);

	103

	104 // restore d8-d15 register values.

	105 restore_registers();

	106

	107 return;

	108 }

	109

	110 void vp9_short_idct10_16x16_add_neon(int16_t *input,

	111 uint8_t *dest, int dest_stride) {

	112 int16_t pass1_output[16*16] = {0};

	113 int16_t row_idct_output[16*16] = {0};

	114

	115 // save d8-d15 register values.

	116 save_registers();

	117

	118 /* Parallel idct on the upper 8 rows */

	119 // First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the

	120 // stage 6 result in pass1_output.

	121 vp9_short_idct10_16x16_add_neon_pass1(input, pass1_output, 8);

	122

	123 // Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines

	124 // with result in pass1(pass1_output) to calculate final result in stage 7

	125 // which will be saved into row_idct_output.

	126 vp9_short_idct10_16x16_add_neon_pass2(input+1,

	127 row_idct_output,

	128 pass1_output,

	129 0,

	130 dest,

	131 dest_stride);

	132

	133 /* Skip Parallel idct on the lower 8 rows as they are all 0s */

	134

	135 /* Parallel idct on the left 8 columns */

	136 // First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the

	137 // stage 6 result in pass1_output.

	138 vp9_short_idct16x16_add_neon_pass1(row_idct_output, pass1_output, 8);

	139

	140 // Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines

	141 // with result in pass1(pass1_output) to calculate final result in stage 7.

	142 // Then add the result to the destination data.

	143 vp9_short_idct16x16_add_neon_pass2(row_idct_output+1,

	144 row_idct_output,

	145 pass1_output,

	146 1,

	147 dest,

	148 dest_stride);

	149

	150 /* Parallel idct on the right 8 columns */

	151 // First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the

	152 // stage 6 result in pass1_output.

	153 vp9_short_idct16x16_add_neon_pass1(row_idct_output+8*16, pass1_output, 8);

	154

	155 // Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines

	156 // with result in pass1(pass1_output) to calculate final result in stage 7.

	157 // Then add the result to the destination data.

	158 vp9_short_idct16x16_add_neon_pass2(row_idct_output+8*16+1,

	159 row_idct_output+8,

	160 pass1_output,

	161 1,

	162 dest+8,

	163 dest_stride);

	164

	165 // restore d8-d15 register values.

	166 restore_registers();

	167

	168 return;

	169 }

OLD	NEW

« no previous file with comments | « source/libvpx/vp9/common/arm/neon/vp9_copy_neon.asm ('k') | source/libvpx/vp9/common/arm/neon/vp9_mb_lpf_neon.asm » ('j') | no next file with comments »