Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(61)

Side by Side Diff: source/libvpx/vpx_dsp/arm/idct16x16_neon.c

Issue 1302353004: libvpx: Pull from upstream (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/libvpx.git@master
Patch Set: Created 5 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 /* 1 /*
2 * Copyright (c) 2013 The WebM project authors. All Rights Reserved. 2 * Copyright (c) 2013 The WebM project authors. All Rights Reserved.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license 4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source 5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found 6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may 7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree. 8 * be found in the AUTHORS file in the root of the source tree.
9 */ 9 */
10 10
11 #include "vp9/common/vp9_common.h" 11 #include "vpx_dsp/vpx_dsp_common.h"
12 12
13 void vp9_idct16x16_256_add_neon_pass1(const int16_t *input, 13 void vpx_idct16x16_256_add_neon_pass1(const int16_t *input,
14 int16_t *output, 14 int16_t *output,
15 int output_stride); 15 int output_stride);
16 void vp9_idct16x16_256_add_neon_pass2(const int16_t *src, 16 void vpx_idct16x16_256_add_neon_pass2(const int16_t *src,
17 int16_t *output, 17 int16_t *output,
18 int16_t *pass1Output, 18 int16_t *pass1Output,
19 int16_t skip_adding, 19 int16_t skip_adding,
20 uint8_t *dest, 20 uint8_t *dest,
21 int dest_stride); 21 int dest_stride);
22 void vp9_idct16x16_10_add_neon_pass1(const int16_t *input, 22 void vpx_idct16x16_10_add_neon_pass1(const int16_t *input,
23 int16_t *output, 23 int16_t *output,
24 int output_stride); 24 int output_stride);
25 void vp9_idct16x16_10_add_neon_pass2(const int16_t *src, 25 void vpx_idct16x16_10_add_neon_pass2(const int16_t *src,
26 int16_t *output, 26 int16_t *output,
27 int16_t *pass1Output, 27 int16_t *pass1Output,
28 int16_t skip_adding, 28 int16_t skip_adding,
29 uint8_t *dest, 29 uint8_t *dest,
30 int dest_stride); 30 int dest_stride);
31 31
32 #if HAVE_NEON_ASM 32 #if HAVE_NEON_ASM
33 /* For ARM NEON, d8-d15 are callee-saved registers, and need to be saved. */ 33 /* For ARM NEON, d8-d15 are callee-saved registers, and need to be saved. */
34 extern void vp9_push_neon(int64_t *store); 34 extern void vpx_push_neon(int64_t *store);
35 extern void vp9_pop_neon(int64_t *store); 35 extern void vpx_pop_neon(int64_t *store);
36 #endif // HAVE_NEON_ASM 36 #endif // HAVE_NEON_ASM
37 37
38 void vp9_idct16x16_256_add_neon(const int16_t *input, 38 void vpx_idct16x16_256_add_neon(const int16_t *input,
39 uint8_t *dest, int dest_stride) { 39 uint8_t *dest, int dest_stride) {
40 #if HAVE_NEON_ASM 40 #if HAVE_NEON_ASM
41 int64_t store_reg[8]; 41 int64_t store_reg[8];
42 #endif 42 #endif
43 int16_t pass1_output[16*16] = {0}; 43 int16_t pass1_output[16*16] = {0};
44 int16_t row_idct_output[16*16] = {0}; 44 int16_t row_idct_output[16*16] = {0};
45 45
46 #if HAVE_NEON_ASM 46 #if HAVE_NEON_ASM
47 // save d8-d15 register values. 47 // save d8-d15 register values.
48 vp9_push_neon(store_reg); 48 vpx_push_neon(store_reg);
49 #endif 49 #endif
50 50
51 /* Parallel idct on the upper 8 rows */ 51 /* Parallel idct on the upper 8 rows */
52 // First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the 52 // First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the
53 // stage 6 result in pass1_output. 53 // stage 6 result in pass1_output.
54 vp9_idct16x16_256_add_neon_pass1(input, pass1_output, 8); 54 vpx_idct16x16_256_add_neon_pass1(input, pass1_output, 8);
55 55
56 // Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines 56 // Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines
57 // with result in pass1(pass1_output) to calculate final result in stage 7 57 // with result in pass1(pass1_output) to calculate final result in stage 7
58 // which will be saved into row_idct_output. 58 // which will be saved into row_idct_output.
59 vp9_idct16x16_256_add_neon_pass2(input+1, 59 vpx_idct16x16_256_add_neon_pass2(input+1,
60 row_idct_output, 60 row_idct_output,
61 pass1_output, 61 pass1_output,
62 0, 62 0,
63 dest, 63 dest,
64 dest_stride); 64 dest_stride);
65 65
66 /* Parallel idct on the lower 8 rows */ 66 /* Parallel idct on the lower 8 rows */
67 // First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the 67 // First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the
68 // stage 6 result in pass1_output. 68 // stage 6 result in pass1_output.
69 vp9_idct16x16_256_add_neon_pass1(input+8*16, pass1_output, 8); 69 vpx_idct16x16_256_add_neon_pass1(input+8*16, pass1_output, 8);
70 70
71 // Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines 71 // Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines
72 // with result in pass1(pass1_output) to calculate final result in stage 7 72 // with result in pass1(pass1_output) to calculate final result in stage 7
73 // which will be saved into row_idct_output. 73 // which will be saved into row_idct_output.
74 vp9_idct16x16_256_add_neon_pass2(input+8*16+1, 74 vpx_idct16x16_256_add_neon_pass2(input+8*16+1,
75 row_idct_output+8, 75 row_idct_output+8,
76 pass1_output, 76 pass1_output,
77 0, 77 0,
78 dest, 78 dest,
79 dest_stride); 79 dest_stride);
80 80
81 /* Parallel idct on the left 8 columns */ 81 /* Parallel idct on the left 8 columns */
82 // First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the 82 // First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the
83 // stage 6 result in pass1_output. 83 // stage 6 result in pass1_output.
84 vp9_idct16x16_256_add_neon_pass1(row_idct_output, pass1_output, 8); 84 vpx_idct16x16_256_add_neon_pass1(row_idct_output, pass1_output, 8);
85 85
86 // Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines 86 // Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines
87 // with result in pass1(pass1_output) to calculate final result in stage 7. 87 // with result in pass1(pass1_output) to calculate final result in stage 7.
88 // Then add the result to the destination data. 88 // Then add the result to the destination data.
89 vp9_idct16x16_256_add_neon_pass2(row_idct_output+1, 89 vpx_idct16x16_256_add_neon_pass2(row_idct_output+1,
90 row_idct_output, 90 row_idct_output,
91 pass1_output, 91 pass1_output,
92 1, 92 1,
93 dest, 93 dest,
94 dest_stride); 94 dest_stride);
95 95
96 /* Parallel idct on the right 8 columns */ 96 /* Parallel idct on the right 8 columns */
97 // First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the 97 // First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the
98 // stage 6 result in pass1_output. 98 // stage 6 result in pass1_output.
99 vp9_idct16x16_256_add_neon_pass1(row_idct_output+8*16, pass1_output, 8); 99 vpx_idct16x16_256_add_neon_pass1(row_idct_output+8*16, pass1_output, 8);
100 100
101 // Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines 101 // Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines
102 // with result in pass1(pass1_output) to calculate final result in stage 7. 102 // with result in pass1(pass1_output) to calculate final result in stage 7.
103 // Then add the result to the destination data. 103 // Then add the result to the destination data.
104 vp9_idct16x16_256_add_neon_pass2(row_idct_output+8*16+1, 104 vpx_idct16x16_256_add_neon_pass2(row_idct_output+8*16+1,
105 row_idct_output+8, 105 row_idct_output+8,
106 pass1_output, 106 pass1_output,
107 1, 107 1,
108 dest+8, 108 dest+8,
109 dest_stride); 109 dest_stride);
110 110
111 #if HAVE_NEON_ASM 111 #if HAVE_NEON_ASM
112 // restore d8-d15 register values. 112 // restore d8-d15 register values.
113 vp9_pop_neon(store_reg); 113 vpx_pop_neon(store_reg);
114 #endif 114 #endif
115 115
116 return; 116 return;
117 } 117 }
118 118
119 void vp9_idct16x16_10_add_neon(const int16_t *input, 119 void vpx_idct16x16_10_add_neon(const int16_t *input,
120 uint8_t *dest, int dest_stride) { 120 uint8_t *dest, int dest_stride) {
121 #if HAVE_NEON_ASM 121 #if HAVE_NEON_ASM
122 int64_t store_reg[8]; 122 int64_t store_reg[8];
123 #endif 123 #endif
124 int16_t pass1_output[16*16] = {0}; 124 int16_t pass1_output[16*16] = {0};
125 int16_t row_idct_output[16*16] = {0}; 125 int16_t row_idct_output[16*16] = {0};
126 126
127 #if HAVE_NEON_ASM 127 #if HAVE_NEON_ASM
128 // save d8-d15 register values. 128 // save d8-d15 register values.
129 vp9_push_neon(store_reg); 129 vpx_push_neon(store_reg);
130 #endif 130 #endif
131 131
132 /* Parallel idct on the upper 8 rows */ 132 /* Parallel idct on the upper 8 rows */
133 // First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the 133 // First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the
134 // stage 6 result in pass1_output. 134 // stage 6 result in pass1_output.
135 vp9_idct16x16_10_add_neon_pass1(input, pass1_output, 8); 135 vpx_idct16x16_10_add_neon_pass1(input, pass1_output, 8);
136 136
137 // Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines 137 // Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines
138 // with result in pass1(pass1_output) to calculate final result in stage 7 138 // with result in pass1(pass1_output) to calculate final result in stage 7
139 // which will be saved into row_idct_output. 139 // which will be saved into row_idct_output.
140 vp9_idct16x16_10_add_neon_pass2(input+1, 140 vpx_idct16x16_10_add_neon_pass2(input+1,
141 row_idct_output, 141 row_idct_output,
142 pass1_output, 142 pass1_output,
143 0, 143 0,
144 dest, 144 dest,
145 dest_stride); 145 dest_stride);
146 146
147 /* Skip Parallel idct on the lower 8 rows as they are all 0s */ 147 /* Skip Parallel idct on the lower 8 rows as they are all 0s */
148 148
149 /* Parallel idct on the left 8 columns */ 149 /* Parallel idct on the left 8 columns */
150 // First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the 150 // First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the
151 // stage 6 result in pass1_output. 151 // stage 6 result in pass1_output.
152 vp9_idct16x16_256_add_neon_pass1(row_idct_output, pass1_output, 8); 152 vpx_idct16x16_256_add_neon_pass1(row_idct_output, pass1_output, 8);
153 153
154 // Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines 154 // Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines
155 // with result in pass1(pass1_output) to calculate final result in stage 7. 155 // with result in pass1(pass1_output) to calculate final result in stage 7.
156 // Then add the result to the destination data. 156 // Then add the result to the destination data.
157 vp9_idct16x16_256_add_neon_pass2(row_idct_output+1, 157 vpx_idct16x16_256_add_neon_pass2(row_idct_output+1,
158 row_idct_output, 158 row_idct_output,
159 pass1_output, 159 pass1_output,
160 1, 160 1,
161 dest, 161 dest,
162 dest_stride); 162 dest_stride);
163 163
164 /* Parallel idct on the right 8 columns */ 164 /* Parallel idct on the right 8 columns */
165 // First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the 165 // First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the
166 // stage 6 result in pass1_output. 166 // stage 6 result in pass1_output.
167 vp9_idct16x16_256_add_neon_pass1(row_idct_output+8*16, pass1_output, 8); 167 vpx_idct16x16_256_add_neon_pass1(row_idct_output+8*16, pass1_output, 8);
168 168
169 // Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines 169 // Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines
170 // with result in pass1(pass1_output) to calculate final result in stage 7. 170 // with result in pass1(pass1_output) to calculate final result in stage 7.
171 // Then add the result to the destination data. 171 // Then add the result to the destination data.
172 vp9_idct16x16_256_add_neon_pass2(row_idct_output+8*16+1, 172 vpx_idct16x16_256_add_neon_pass2(row_idct_output+8*16+1,
173 row_idct_output+8, 173 row_idct_output+8,
174 pass1_output, 174 pass1_output,
175 1, 175 1,
176 dest+8, 176 dest+8,
177 dest_stride); 177 dest_stride);
178 178
179 #if HAVE_NEON_ASM 179 #if HAVE_NEON_ASM
180 // restore d8-d15 register values. 180 // restore d8-d15 register values.
181 vp9_pop_neon(store_reg); 181 vpx_pop_neon(store_reg);
182 #endif 182 #endif
183 183
184 return; 184 return;
185 } 185 }
OLDNEW
« no previous file with comments | « source/libvpx/vpx_dsp/arm/idct16x16_add_neon.c ('k') | source/libvpx/vpx_dsp/arm/idct32x32_1_add_neon.asm » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698