source/libvpx/vp9/common/arm/neon/vp9_idct16x16_neon.c - Issue 54923004: libvpx: Pull from upstream

Side by Side Diff: source/libvpx/vp9/common/arm/neon/vp9_idct16x16_neon.c

Issue 54923004: libvpx: Pull from upstream (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/deps/third_party/libvpx/

Patch Set: Created 7 years, 1 month ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 /*	1 /*

2 * Copyright (c) 2013 The WebM project authors. All Rights Reserved.	2 * Copyright (c) 2013 The WebM project authors. All Rights Reserved.

3 *	3 *

4 * Use of this source code is governed by a BSD-style license	4 * Use of this source code is governed by a BSD-style license

5 * that can be found in the LICENSE file in the root of the source	5 * that can be found in the LICENSE file in the root of the source

6 * tree. An additional intellectual property rights grant can be found	6 * tree. An additional intellectual property rights grant can be found

7 * in the file PATENTS. All contributing project authors may	7 * in the file PATENTS. All contributing project authors may

8 * be found in the AUTHORS file in the root of the source tree.	8 * be found in the AUTHORS file in the root of the source tree.

9 */	9 */

10	10

11 #include "./vp9_rtcd.h"	11 #include "./vp9_rtcd.h"

12 #include "vp9/common/vp9_common.h"	12 #include "vp9/common/vp9_common.h"

13	13

14 extern void vp9_short_idct16x16_add_neon_pass1(int16_t *input,	14 void vp9_idct16x16_256_add_neon_pass1(const int16_t *input,

15 int16_t *output,	15 int16_t *output,

16 int output_stride);	16 int output_stride);

17 extern void vp9_short_idct16x16_add_neon_pass2(int16_t *src,	17 void vp9_idct16x16_256_add_neon_pass2(const int16_t *src,

18 int16_t *output,	18 int16_t *output,

19 int16_t *pass1Output,	19 int16_t *pass1Output,

20 int16_t skip_adding,	20 int16_t skip_adding,

21 uint8_t *dest,	21 uint8_t *dest,

22 int dest_stride);	22 int dest_stride);

23 extern void vp9_short_idct10_16x16_add_neon_pass1(int16_t *input,	23 void vp9_idct16x16_10_add_neon_pass1(const int16_t *input,

24 int16_t *output,	24 int16_t *output,

25 int output_stride);	25 int output_stride);

26 extern void vp9_short_idct10_16x16_add_neon_pass2(int16_t *src,	26 void vp9_idct16x16_10_add_neon_pass2(const int16_t *src,

27 int16_t *output,	27 int16_t *output,

28 int16_t *pass1Output,	28 int16_t *pass1Output,

29 int16_t skip_adding,	29 int16_t skip_adding,

30 uint8_t *dest,	30 uint8_t *dest,

31 int dest_stride);	31 int dest_stride);

32 extern void save_neon_registers();

33 extern void restore_neon_registers();

34	32

	33 /* For ARM NEON, d8-d15 are callee-saved registers, and need to be saved. */

	34 extern void vp9_push_neon(int64_t *store);

	35 extern void vp9_pop_neon(int64_t *store);

35	36

36 void vp9_short_idct16x16_add_neon(int16_t *input,	37 void vp9_idct16x16_256_add_neon(const int16_t *input,

37 uint8_t *dest, int dest_stride) {	38 uint8_t *dest, int dest_stride) {

	39 int64_t store_reg[8];

38 int16_t pass1_output[16*16] = {0};	40 int16_t pass1_output[16*16] = {0};

39 int16_t row_idct_output[16*16] = {0};	41 int16_t row_idct_output[16*16] = {0};

40	42

41 // save d8-d15 register values.	43 // save d8-d15 register values.

42 save_neon_registers();	44 vp9_push_neon(store_reg);

43	45

44 /* Parallel idct on the upper 8 rows */	46 /* Parallel idct on the upper 8 rows */

45 // First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the	47 // First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the

46 // stage 6 result in pass1_output.	48 // stage 6 result in pass1_output.

47 vp9_short_idct16x16_add_neon_pass1(input, pass1_output, 8);	49 vp9_idct16x16_256_add_neon_pass1(input, pass1_output, 8);

48	50

49 // Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines	51 // Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines

50 // with result in pass1(pass1_output) to calculate final result in stage 7	52 // with result in pass1(pass1_output) to calculate final result in stage 7

51 // which will be saved into row_idct_output.	53 // which will be saved into row_idct_output.

52 vp9_short_idct16x16_add_neon_pass2(input+1,	54 vp9_idct16x16_256_add_neon_pass2(input+1,

53 row_idct_output,	55 row_idct_output,

54 pass1_output,	56 pass1_output,

55 0,	57 0,

56 dest,	58 dest,

57 dest_stride);	59 dest_stride);

58	60

59 /* Parallel idct on the lower 8 rows */	61 /* Parallel idct on the lower 8 rows */

60 // First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the	62 // First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the

61 // stage 6 result in pass1_output.	63 // stage 6 result in pass1_output.

62 vp9_short_idct16x16_add_neon_pass1(input+8*16, pass1_output, 8);	64 vp9_idct16x16_256_add_neon_pass1(input+8*16, pass1_output, 8);

63	65

64 // Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines	66 // Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines

65 // with result in pass1(pass1_output) to calculate final result in stage 7	67 // with result in pass1(pass1_output) to calculate final result in stage 7

66 // which will be saved into row_idct_output.	68 // which will be saved into row_idct_output.

67 vp9_short_idct16x16_add_neon_pass2(input+8*16+1,	69 vp9_idct16x16_256_add_neon_pass2(input+8*16+1,

68 row_idct_output+8,	70 row_idct_output+8,

69 pass1_output,	71 pass1_output,

70 0,	72 0,

71 dest,	73 dest,

72 dest_stride);	74 dest_stride);

73	75

74 /* Parallel idct on the left 8 columns */	76 /* Parallel idct on the left 8 columns */

75 // First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the	77 // First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the

76 // stage 6 result in pass1_output.	78 // stage 6 result in pass1_output.

77 vp9_short_idct16x16_add_neon_pass1(row_idct_output, pass1_output, 8);	79 vp9_idct16x16_256_add_neon_pass1(row_idct_output, pass1_output, 8);

78	80

79 // Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines	81 // Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines

80 // with result in pass1(pass1_output) to calculate final result in stage 7.	82 // with result in pass1(pass1_output) to calculate final result in stage 7.

81 // Then add the result to the destination data.	83 // Then add the result to the destination data.

82 vp9_short_idct16x16_add_neon_pass2(row_idct_output+1,	84 vp9_idct16x16_256_add_neon_pass2(row_idct_output+1,

83 row_idct_output,	85 row_idct_output,

84 pass1_output,	86 pass1_output,

85 1,	87 1,

86 dest,	88 dest,

87 dest_stride);	89 dest_stride);

88	90

89 /* Parallel idct on the right 8 columns */	91 /* Parallel idct on the right 8 columns */

90 // First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the	92 // First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the

91 // stage 6 result in pass1_output.	93 // stage 6 result in pass1_output.

92 vp9_short_idct16x16_add_neon_pass1(row_idct_output+8*16, pass1_output, 8);	94 vp9_idct16x16_256_add_neon_pass1(row_idct_output+8*16, pass1_output, 8);

93	95

94 // Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines	96 // Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines

95 // with result in pass1(pass1_output) to calculate final result in stage 7.	97 // with result in pass1(pass1_output) to calculate final result in stage 7.

96 // Then add the result to the destination data.	98 // Then add the result to the destination data.

97 vp9_short_idct16x16_add_neon_pass2(row_idct_output+8*16+1,	99 vp9_idct16x16_256_add_neon_pass2(row_idct_output+8*16+1,

98 row_idct_output+8,	100 row_idct_output+8,

99 pass1_output,	101 pass1_output,

100 1,	102 1,

101 dest+8,	103 dest+8,

102 dest_stride);	104 dest_stride);

103	105

104 // restore d8-d15 register values.	106 // restore d8-d15 register values.

105 restore_neon_registers();	107 vp9_pop_neon(store_reg);

106	108

107 return;	109 return;

108 }	110 }

109	111

110 void vp9_short_idct10_16x16_add_neon(int16_t *input,	112 void vp9_idct16x16_10_add_neon(const int16_t *input,

111 uint8_t *dest, int dest_stride) {	113 uint8_t *dest, int dest_stride) {

	114 int64_t store_reg[8];

112 int16_t pass1_output[16*16] = {0};	115 int16_t pass1_output[16*16] = {0};

113 int16_t row_idct_output[16*16] = {0};	116 int16_t row_idct_output[16*16] = {0};

114	117

115 // save d8-d15 register values.	118 // save d8-d15 register values.

116 save_neon_registers();	119 vp9_push_neon(store_reg);

117	120

118 /* Parallel idct on the upper 8 rows */	121 /* Parallel idct on the upper 8 rows */

119 // First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the	122 // First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the

120 // stage 6 result in pass1_output.	123 // stage 6 result in pass1_output.

121 vp9_short_idct10_16x16_add_neon_pass1(input, pass1_output, 8);	124 vp9_idct16x16_10_add_neon_pass1(input, pass1_output, 8);

122	125

123 // Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines	126 // Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines

124 // with result in pass1(pass1_output) to calculate final result in stage 7	127 // with result in pass1(pass1_output) to calculate final result in stage 7

125 // which will be saved into row_idct_output.	128 // which will be saved into row_idct_output.

126 vp9_short_idct10_16x16_add_neon_pass2(input+1,	129 vp9_idct16x16_10_add_neon_pass2(input+1,

127 row_idct_output,	130 row_idct_output,

128 pass1_output,	131 pass1_output,

129 0,	132 0,

130 dest,	133 dest,

131 dest_stride);	134 dest_stride);

132	135

133 /* Skip Parallel idct on the lower 8 rows as they are all 0s */	136 /* Skip Parallel idct on the lower 8 rows as they are all 0s */

134	137

135 /* Parallel idct on the left 8 columns */	138 /* Parallel idct on the left 8 columns */

136 // First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the	139 // First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the

137 // stage 6 result in pass1_output.	140 // stage 6 result in pass1_output.

138 vp9_short_idct16x16_add_neon_pass1(row_idct_output, pass1_output, 8);	141 vp9_idct16x16_256_add_neon_pass1(row_idct_output, pass1_output, 8);

139	142

140 // Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines	143 // Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines

141 // with result in pass1(pass1_output) to calculate final result in stage 7.	144 // with result in pass1(pass1_output) to calculate final result in stage 7.

142 // Then add the result to the destination data.	145 // Then add the result to the destination data.

143 vp9_short_idct16x16_add_neon_pass2(row_idct_output+1,	146 vp9_idct16x16_256_add_neon_pass2(row_idct_output+1,

144 row_idct_output,	147 row_idct_output,

145 pass1_output,	148 pass1_output,

146 1,	149 1,

147 dest,	150 dest,

148 dest_stride);	151 dest_stride);

149	152

150 /* Parallel idct on the right 8 columns */	153 /* Parallel idct on the right 8 columns */

151 // First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the	154 // First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the

152 // stage 6 result in pass1_output.	155 // stage 6 result in pass1_output.

153 vp9_short_idct16x16_add_neon_pass1(row_idct_output+8*16, pass1_output, 8);	156 vp9_idct16x16_256_add_neon_pass1(row_idct_output+8*16, pass1_output, 8);

154	157

155 // Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines	158 // Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines

156 // with result in pass1(pass1_output) to calculate final result in stage 7.	159 // with result in pass1(pass1_output) to calculate final result in stage 7.

157 // Then add the result to the destination data.	160 // Then add the result to the destination data.

158 vp9_short_idct16x16_add_neon_pass2(row_idct_output+8*16+1,	161 vp9_idct16x16_256_add_neon_pass2(row_idct_output+8*16+1,

159 row_idct_output+8,	162 row_idct_output+8,

160 pass1_output,	163 pass1_output,

161 1,	164 1,

162 dest+8,	165 dest+8,

163 dest_stride);	166 dest_stride);

164	167

165 // restore d8-d15 register values.	168 // restore d8-d15 register values.

166 restore_neon_registers();	169 vp9_pop_neon(store_reg);

167	170

168 return;	171 return;

169 }	172 }

OLD	NEW

« no previous file with comments | « source/libvpx/build/make/thumb.pm ('k') | source/libvpx/vp9/common/arm/neon/vp9_idct32x32_neon.c » ('j') | no next file with comments »