Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(471)

Side by Side Diff: source/libvpx/vp9/common/arm/neon/vp9_idct16x16_neon.c

Issue 812033011: libvpx: Pull from upstream (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/deps/third_party/libvpx/
Patch Set: Created 5 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 /* 1 /*
2 * Copyright (c) 2013 The WebM project authors. All Rights Reserved. 2 * Copyright (c) 2013 The WebM project authors. All Rights Reserved.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license 4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source 5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found 6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may 7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree. 8 * be found in the AUTHORS file in the root of the source tree.
9 */ 9 */
10 10
(...skipping 12 matching lines...) Expand all
23 void vp9_idct16x16_10_add_neon_pass1(const int16_t *input, 23 void vp9_idct16x16_10_add_neon_pass1(const int16_t *input,
24 int16_t *output, 24 int16_t *output,
25 int output_stride); 25 int output_stride);
26 void vp9_idct16x16_10_add_neon_pass2(const int16_t *src, 26 void vp9_idct16x16_10_add_neon_pass2(const int16_t *src,
27 int16_t *output, 27 int16_t *output,
28 int16_t *pass1Output, 28 int16_t *pass1Output,
29 int16_t skip_adding, 29 int16_t skip_adding,
30 uint8_t *dest, 30 uint8_t *dest,
31 int dest_stride); 31 int dest_stride);
32 32
33 #if HAVE_NEON_ASM
33 /* For ARM NEON, d8-d15 are callee-saved registers, and need to be saved. */ 34 /* For ARM NEON, d8-d15 are callee-saved registers, and need to be saved. */
34 extern void vp9_push_neon(int64_t *store); 35 extern void vp9_push_neon(int64_t *store);
35 extern void vp9_pop_neon(int64_t *store); 36 extern void vp9_pop_neon(int64_t *store);
37 #endif // HAVE_NEON_ASM
36 38
37 void vp9_idct16x16_256_add_neon(const int16_t *input, 39 void vp9_idct16x16_256_add_neon(const int16_t *input,
38 uint8_t *dest, int dest_stride) { 40 uint8_t *dest, int dest_stride) {
41 #if HAVE_NEON_ASM
39 int64_t store_reg[8]; 42 int64_t store_reg[8];
43 #endif
40 int16_t pass1_output[16*16] = {0}; 44 int16_t pass1_output[16*16] = {0};
41 int16_t row_idct_output[16*16] = {0}; 45 int16_t row_idct_output[16*16] = {0};
42 46
47 #if HAVE_NEON_ASM
43 // save d8-d15 register values. 48 // save d8-d15 register values.
44 vp9_push_neon(store_reg); 49 vp9_push_neon(store_reg);
50 #endif
45 51
46 /* Parallel idct on the upper 8 rows */ 52 /* Parallel idct on the upper 8 rows */
47 // First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the 53 // First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the
48 // stage 6 result in pass1_output. 54 // stage 6 result in pass1_output.
49 vp9_idct16x16_256_add_neon_pass1(input, pass1_output, 8); 55 vp9_idct16x16_256_add_neon_pass1(input, pass1_output, 8);
50 56
51 // Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines 57 // Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines
52 // with result in pass1(pass1_output) to calculate final result in stage 7 58 // with result in pass1(pass1_output) to calculate final result in stage 7
53 // which will be saved into row_idct_output. 59 // which will be saved into row_idct_output.
54 vp9_idct16x16_256_add_neon_pass2(input+1, 60 vp9_idct16x16_256_add_neon_pass2(input+1,
(...skipping 41 matching lines...) Expand 10 before | Expand all | Expand 10 after
96 // Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines 102 // Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines
97 // with result in pass1(pass1_output) to calculate final result in stage 7. 103 // with result in pass1(pass1_output) to calculate final result in stage 7.
98 // Then add the result to the destination data. 104 // Then add the result to the destination data.
99 vp9_idct16x16_256_add_neon_pass2(row_idct_output+8*16+1, 105 vp9_idct16x16_256_add_neon_pass2(row_idct_output+8*16+1,
100 row_idct_output+8, 106 row_idct_output+8,
101 pass1_output, 107 pass1_output,
102 1, 108 1,
103 dest+8, 109 dest+8,
104 dest_stride); 110 dest_stride);
105 111
112 #if HAVE_NEON_ASM
106 // restore d8-d15 register values. 113 // restore d8-d15 register values.
107 vp9_pop_neon(store_reg); 114 vp9_pop_neon(store_reg);
115 #endif
108 116
109 return; 117 return;
110 } 118 }
111 119
112 void vp9_idct16x16_10_add_neon(const int16_t *input, 120 void vp9_idct16x16_10_add_neon(const int16_t *input,
113 uint8_t *dest, int dest_stride) { 121 uint8_t *dest, int dest_stride) {
122 #if HAVE_NEON_ASM
114 int64_t store_reg[8]; 123 int64_t store_reg[8];
124 #endif
115 int16_t pass1_output[16*16] = {0}; 125 int16_t pass1_output[16*16] = {0};
116 int16_t row_idct_output[16*16] = {0}; 126 int16_t row_idct_output[16*16] = {0};
117 127
128 #if HAVE_NEON_ASM
118 // save d8-d15 register values. 129 // save d8-d15 register values.
119 vp9_push_neon(store_reg); 130 vp9_push_neon(store_reg);
131 #endif
120 132
121 /* Parallel idct on the upper 8 rows */ 133 /* Parallel idct on the upper 8 rows */
122 // First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the 134 // First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the
123 // stage 6 result in pass1_output. 135 // stage 6 result in pass1_output.
124 vp9_idct16x16_10_add_neon_pass1(input, pass1_output, 8); 136 vp9_idct16x16_10_add_neon_pass1(input, pass1_output, 8);
125 137
126 // Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines 138 // Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines
127 // with result in pass1(pass1_output) to calculate final result in stage 7 139 // with result in pass1(pass1_output) to calculate final result in stage 7
128 // which will be saved into row_idct_output. 140 // which will be saved into row_idct_output.
129 vp9_idct16x16_10_add_neon_pass2(input+1, 141 vp9_idct16x16_10_add_neon_pass2(input+1,
(...skipping 28 matching lines...) Expand all
158 // Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines 170 // Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines
159 // with result in pass1(pass1_output) to calculate final result in stage 7. 171 // with result in pass1(pass1_output) to calculate final result in stage 7.
160 // Then add the result to the destination data. 172 // Then add the result to the destination data.
161 vp9_idct16x16_256_add_neon_pass2(row_idct_output+8*16+1, 173 vp9_idct16x16_256_add_neon_pass2(row_idct_output+8*16+1,
162 row_idct_output+8, 174 row_idct_output+8,
163 pass1_output, 175 pass1_output,
164 1, 176 1,
165 dest+8, 177 dest+8,
166 dest_stride); 178 dest_stride);
167 179
180 #if HAVE_NEON_ASM
168 // restore d8-d15 register values. 181 // restore d8-d15 register values.
169 vp9_pop_neon(store_reg); 182 vp9_pop_neon(store_reg);
183 #endif
170 184
171 return; 185 return;
172 } 186 }
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698