source/libvpx/vp9/common/arm/neon/vp9_idct16x16_neon.c - Issue 812033011: libvpx: Pull from upstream

Side by Side Diff: source/libvpx/vp9/common/arm/neon/vp9_idct16x16_neon.c

Issue 812033011: libvpx: Pull from upstream (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/deps/third_party/libvpx/

Patch Set: Created 5 years, 11 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

« no previous file with comments | « source/libvpx/vp9/common/arm/neon/vp9_idct16x16_add_neon_asm.asm ('k') | source/libvpx/vp9/common/arm/neon/vp9_idct32x32_1_add_neon.asm » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

OLD	NEW
1 /*	1 /*

2 * Copyright (c) 2013 The WebM project authors. All Rights Reserved.	2 * Copyright (c) 2013 The WebM project authors. All Rights Reserved.

3 *	3 *

4 * Use of this source code is governed by a BSD-style license	4 * Use of this source code is governed by a BSD-style license

5 * that can be found in the LICENSE file in the root of the source	5 * that can be found in the LICENSE file in the root of the source

6 * tree. An additional intellectual property rights grant can be found	6 * tree. An additional intellectual property rights grant can be found

7 * in the file PATENTS. All contributing project authors may	7 * in the file PATENTS. All contributing project authors may

8 * be found in the AUTHORS file in the root of the source tree.	8 * be found in the AUTHORS file in the root of the source tree.

9 */	9 */

10	10

(...skipping 12 matching lines...) Expand all Loading...
23 void vp9_idct16x16_10_add_neon_pass1(const int16_t *input,	23 void vp9_idct16x16_10_add_neon_pass1(const int16_t *input,

24 int16_t *output,	24 int16_t *output,

25 int output_stride);	25 int output_stride);

26 void vp9_idct16x16_10_add_neon_pass2(const int16_t *src,	26 void vp9_idct16x16_10_add_neon_pass2(const int16_t *src,

27 int16_t *output,	27 int16_t *output,

28 int16_t *pass1Output,	28 int16_t *pass1Output,

29 int16_t skip_adding,	29 int16_t skip_adding,

30 uint8_t *dest,	30 uint8_t *dest,

31 int dest_stride);	31 int dest_stride);

32	32

	33 #if HAVE_NEON_ASM

33 /* For ARM NEON, d8-d15 are callee-saved registers, and need to be saved. */	34 /* For ARM NEON, d8-d15 are callee-saved registers, and need to be saved. */

34 extern void vp9_push_neon(int64_t *store);	35 extern void vp9_push_neon(int64_t *store);

35 extern void vp9_pop_neon(int64_t *store);	36 extern void vp9_pop_neon(int64_t *store);

	37 #endif // HAVE_NEON_ASM

36	38

37 void vp9_idct16x16_256_add_neon(const int16_t *input,	39 void vp9_idct16x16_256_add_neon(const int16_t *input,

38 uint8_t *dest, int dest_stride) {	40 uint8_t *dest, int dest_stride) {

	41 #if HAVE_NEON_ASM

39 int64_t store_reg[8];	42 int64_t store_reg[8];

	43 #endif

40 int16_t pass1_output[16*16] = {0};	44 int16_t pass1_output[16*16] = {0};

41 int16_t row_idct_output[16*16] = {0};	45 int16_t row_idct_output[16*16] = {0};

42	46

	47 #if HAVE_NEON_ASM

43 // save d8-d15 register values.	48 // save d8-d15 register values.

44 vp9_push_neon(store_reg);	49 vp9_push_neon(store_reg);

	50 #endif

45	51

46 /* Parallel idct on the upper 8 rows */	52 /* Parallel idct on the upper 8 rows */

47 // First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the	53 // First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the

48 // stage 6 result in pass1_output.	54 // stage 6 result in pass1_output.

49 vp9_idct16x16_256_add_neon_pass1(input, pass1_output, 8);	55 vp9_idct16x16_256_add_neon_pass1(input, pass1_output, 8);

50	56

51 // Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines	57 // Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines

52 // with result in pass1(pass1_output) to calculate final result in stage 7	58 // with result in pass1(pass1_output) to calculate final result in stage 7

53 // which will be saved into row_idct_output.	59 // which will be saved into row_idct_output.

54 vp9_idct16x16_256_add_neon_pass2(input+1,	60 vp9_idct16x16_256_add_neon_pass2(input+1,

(...skipping 41 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
96 // Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines	102 // Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines

97 // with result in pass1(pass1_output) to calculate final result in stage 7.	103 // with result in pass1(pass1_output) to calculate final result in stage 7.

98 // Then add the result to the destination data.	104 // Then add the result to the destination data.

99 vp9_idct16x16_256_add_neon_pass2(row_idct_output+8*16+1,	105 vp9_idct16x16_256_add_neon_pass2(row_idct_output+8*16+1,

100 row_idct_output+8,	106 row_idct_output+8,

101 pass1_output,	107 pass1_output,

102 1,	108 1,

103 dest+8,	109 dest+8,

104 dest_stride);	110 dest_stride);

105	111

	112 #if HAVE_NEON_ASM

106 // restore d8-d15 register values.	113 // restore d8-d15 register values.

107 vp9_pop_neon(store_reg);	114 vp9_pop_neon(store_reg);

	115 #endif

108	116

109 return;	117 return;

110 }	118 }

111	119

112 void vp9_idct16x16_10_add_neon(const int16_t *input,	120 void vp9_idct16x16_10_add_neon(const int16_t *input,

113 uint8_t *dest, int dest_stride) {	121 uint8_t *dest, int dest_stride) {

	122 #if HAVE_NEON_ASM

114 int64_t store_reg[8];	123 int64_t store_reg[8];

	124 #endif

115 int16_t pass1_output[16*16] = {0};	125 int16_t pass1_output[16*16] = {0};

116 int16_t row_idct_output[16*16] = {0};	126 int16_t row_idct_output[16*16] = {0};

117	127

	128 #if HAVE_NEON_ASM

118 // save d8-d15 register values.	129 // save d8-d15 register values.

119 vp9_push_neon(store_reg);	130 vp9_push_neon(store_reg);

	131 #endif

120	132

121 /* Parallel idct on the upper 8 rows */	133 /* Parallel idct on the upper 8 rows */

122 // First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the	134 // First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the

123 // stage 6 result in pass1_output.	135 // stage 6 result in pass1_output.

124 vp9_idct16x16_10_add_neon_pass1(input, pass1_output, 8);	136 vp9_idct16x16_10_add_neon_pass1(input, pass1_output, 8);

125	137

126 // Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines	138 // Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines

127 // with result in pass1(pass1_output) to calculate final result in stage 7	139 // with result in pass1(pass1_output) to calculate final result in stage 7

128 // which will be saved into row_idct_output.	140 // which will be saved into row_idct_output.

129 vp9_idct16x16_10_add_neon_pass2(input+1,	141 vp9_idct16x16_10_add_neon_pass2(input+1,

(...skipping 28 matching lines...) Expand all Loading...
158 // Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines	170 // Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines

159 // with result in pass1(pass1_output) to calculate final result in stage 7.	171 // with result in pass1(pass1_output) to calculate final result in stage 7.

160 // Then add the result to the destination data.	172 // Then add the result to the destination data.

161 vp9_idct16x16_256_add_neon_pass2(row_idct_output+8*16+1,	173 vp9_idct16x16_256_add_neon_pass2(row_idct_output+8*16+1,

162 row_idct_output+8,	174 row_idct_output+8,

163 pass1_output,	175 pass1_output,

164 1,	176 1,

165 dest+8,	177 dest+8,

166 dest_stride);	178 dest_stride);

167	179

	180 #if HAVE_NEON_ASM

168 // restore d8-d15 register values.	181 // restore d8-d15 register values.

169 vp9_pop_neon(store_reg);	182 vp9_pop_neon(store_reg);

	183 #endif

170	184

171 return;	185 return;

172 }	186 }

OLD	NEW