Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(328)

Side by Side Diff: source/libvpx/vp9/common/arm/neon/vp9_idct16x16_neon.c

Issue 54923004: libvpx: Pull from upstream (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/deps/third_party/libvpx/
Patch Set: Created 7 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 /* 1 /*
2 * Copyright (c) 2013 The WebM project authors. All Rights Reserved. 2 * Copyright (c) 2013 The WebM project authors. All Rights Reserved.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license 4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source 5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found 6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may 7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree. 8 * be found in the AUTHORS file in the root of the source tree.
9 */ 9 */
10 10
11 #include "./vp9_rtcd.h" 11 #include "./vp9_rtcd.h"
12 #include "vp9/common/vp9_common.h" 12 #include "vp9/common/vp9_common.h"
13 13
14 extern void vp9_short_idct16x16_add_neon_pass1(int16_t *input, 14 void vp9_idct16x16_256_add_neon_pass1(const int16_t *input,
15 int16_t *output, 15 int16_t *output,
16 int output_stride); 16 int output_stride);
17 extern void vp9_short_idct16x16_add_neon_pass2(int16_t *src, 17 void vp9_idct16x16_256_add_neon_pass2(const int16_t *src,
18 int16_t *output, 18 int16_t *output,
19 int16_t *pass1Output, 19 int16_t *pass1Output,
20 int16_t skip_adding, 20 int16_t skip_adding,
21 uint8_t *dest, 21 uint8_t *dest,
22 int dest_stride); 22 int dest_stride);
23 extern void vp9_short_idct10_16x16_add_neon_pass1(int16_t *input, 23 void vp9_idct16x16_10_add_neon_pass1(const int16_t *input,
24 int16_t *output, 24 int16_t *output,
25 int output_stride); 25 int output_stride);
26 extern void vp9_short_idct10_16x16_add_neon_pass2(int16_t *src, 26 void vp9_idct16x16_10_add_neon_pass2(const int16_t *src,
27 int16_t *output, 27 int16_t *output,
28 int16_t *pass1Output, 28 int16_t *pass1Output,
29 int16_t skip_adding, 29 int16_t skip_adding,
30 uint8_t *dest, 30 uint8_t *dest,
31 int dest_stride); 31 int dest_stride);
32 extern void save_neon_registers();
33 extern void restore_neon_registers();
34 32
33 /* For ARM NEON, d8-d15 are callee-saved registers, and need to be saved. */
34 extern void vp9_push_neon(int64_t *store);
35 extern void vp9_pop_neon(int64_t *store);
35 36
36 void vp9_short_idct16x16_add_neon(int16_t *input, 37 void vp9_idct16x16_256_add_neon(const int16_t *input,
37 uint8_t *dest, int dest_stride) { 38 uint8_t *dest, int dest_stride) {
39 int64_t store_reg[8];
38 int16_t pass1_output[16*16] = {0}; 40 int16_t pass1_output[16*16] = {0};
39 int16_t row_idct_output[16*16] = {0}; 41 int16_t row_idct_output[16*16] = {0};
40 42
41 // save d8-d15 register values. 43 // save d8-d15 register values.
42 save_neon_registers(); 44 vp9_push_neon(store_reg);
43 45
44 /* Parallel idct on the upper 8 rows */ 46 /* Parallel idct on the upper 8 rows */
45 // First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the 47 // First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the
46 // stage 6 result in pass1_output. 48 // stage 6 result in pass1_output.
47 vp9_short_idct16x16_add_neon_pass1(input, pass1_output, 8); 49 vp9_idct16x16_256_add_neon_pass1(input, pass1_output, 8);
48 50
49 // Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines 51 // Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines
50 // with result in pass1(pass1_output) to calculate final result in stage 7 52 // with result in pass1(pass1_output) to calculate final result in stage 7
51 // which will be saved into row_idct_output. 53 // which will be saved into row_idct_output.
52 vp9_short_idct16x16_add_neon_pass2(input+1, 54 vp9_idct16x16_256_add_neon_pass2(input+1,
53 row_idct_output, 55 row_idct_output,
54 pass1_output, 56 pass1_output,
55 0, 57 0,
56 dest, 58 dest,
57 dest_stride); 59 dest_stride);
58 60
59 /* Parallel idct on the lower 8 rows */ 61 /* Parallel idct on the lower 8 rows */
60 // First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the 62 // First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the
61 // stage 6 result in pass1_output. 63 // stage 6 result in pass1_output.
62 vp9_short_idct16x16_add_neon_pass1(input+8*16, pass1_output, 8); 64 vp9_idct16x16_256_add_neon_pass1(input+8*16, pass1_output, 8);
63 65
64 // Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines 66 // Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines
65 // with result in pass1(pass1_output) to calculate final result in stage 7 67 // with result in pass1(pass1_output) to calculate final result in stage 7
66 // which will be saved into row_idct_output. 68 // which will be saved into row_idct_output.
67 vp9_short_idct16x16_add_neon_pass2(input+8*16+1, 69 vp9_idct16x16_256_add_neon_pass2(input+8*16+1,
68 row_idct_output+8, 70 row_idct_output+8,
69 pass1_output, 71 pass1_output,
70 0, 72 0,
71 dest, 73 dest,
72 dest_stride); 74 dest_stride);
73 75
74 /* Parallel idct on the left 8 columns */ 76 /* Parallel idct on the left 8 columns */
75 // First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the 77 // First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the
76 // stage 6 result in pass1_output. 78 // stage 6 result in pass1_output.
77 vp9_short_idct16x16_add_neon_pass1(row_idct_output, pass1_output, 8); 79 vp9_idct16x16_256_add_neon_pass1(row_idct_output, pass1_output, 8);
78 80
79 // Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines 81 // Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines
80 // with result in pass1(pass1_output) to calculate final result in stage 7. 82 // with result in pass1(pass1_output) to calculate final result in stage 7.
81 // Then add the result to the destination data. 83 // Then add the result to the destination data.
82 vp9_short_idct16x16_add_neon_pass2(row_idct_output+1, 84 vp9_idct16x16_256_add_neon_pass2(row_idct_output+1,
83 row_idct_output, 85 row_idct_output,
84 pass1_output, 86 pass1_output,
85 1, 87 1,
86 dest, 88 dest,
87 dest_stride); 89 dest_stride);
88 90
89 /* Parallel idct on the right 8 columns */ 91 /* Parallel idct on the right 8 columns */
90 // First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the 92 // First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the
91 // stage 6 result in pass1_output. 93 // stage 6 result in pass1_output.
92 vp9_short_idct16x16_add_neon_pass1(row_idct_output+8*16, pass1_output, 8); 94 vp9_idct16x16_256_add_neon_pass1(row_idct_output+8*16, pass1_output, 8);
93 95
94 // Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines 96 // Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines
95 // with result in pass1(pass1_output) to calculate final result in stage 7. 97 // with result in pass1(pass1_output) to calculate final result in stage 7.
96 // Then add the result to the destination data. 98 // Then add the result to the destination data.
97 vp9_short_idct16x16_add_neon_pass2(row_idct_output+8*16+1, 99 vp9_idct16x16_256_add_neon_pass2(row_idct_output+8*16+1,
98 row_idct_output+8, 100 row_idct_output+8,
99 pass1_output, 101 pass1_output,
100 1, 102 1,
101 dest+8, 103 dest+8,
102 dest_stride); 104 dest_stride);
103 105
104 // restore d8-d15 register values. 106 // restore d8-d15 register values.
105 restore_neon_registers(); 107 vp9_pop_neon(store_reg);
106 108
107 return; 109 return;
108 } 110 }
109 111
110 void vp9_short_idct10_16x16_add_neon(int16_t *input, 112 void vp9_idct16x16_10_add_neon(const int16_t *input,
111 uint8_t *dest, int dest_stride) { 113 uint8_t *dest, int dest_stride) {
114 int64_t store_reg[8];
112 int16_t pass1_output[16*16] = {0}; 115 int16_t pass1_output[16*16] = {0};
113 int16_t row_idct_output[16*16] = {0}; 116 int16_t row_idct_output[16*16] = {0};
114 117
115 // save d8-d15 register values. 118 // save d8-d15 register values.
116 save_neon_registers(); 119 vp9_push_neon(store_reg);
117 120
118 /* Parallel idct on the upper 8 rows */ 121 /* Parallel idct on the upper 8 rows */
119 // First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the 122 // First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the
120 // stage 6 result in pass1_output. 123 // stage 6 result in pass1_output.
121 vp9_short_idct10_16x16_add_neon_pass1(input, pass1_output, 8); 124 vp9_idct16x16_10_add_neon_pass1(input, pass1_output, 8);
122 125
123 // Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines 126 // Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines
124 // with result in pass1(pass1_output) to calculate final result in stage 7 127 // with result in pass1(pass1_output) to calculate final result in stage 7
125 // which will be saved into row_idct_output. 128 // which will be saved into row_idct_output.
126 vp9_short_idct10_16x16_add_neon_pass2(input+1, 129 vp9_idct16x16_10_add_neon_pass2(input+1,
127 row_idct_output, 130 row_idct_output,
128 pass1_output, 131 pass1_output,
129 0, 132 0,
130 dest, 133 dest,
131 dest_stride); 134 dest_stride);
132 135
133 /* Skip Parallel idct on the lower 8 rows as they are all 0s */ 136 /* Skip Parallel idct on the lower 8 rows as they are all 0s */
134 137
135 /* Parallel idct on the left 8 columns */ 138 /* Parallel idct on the left 8 columns */
136 // First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the 139 // First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the
137 // stage 6 result in pass1_output. 140 // stage 6 result in pass1_output.
138 vp9_short_idct16x16_add_neon_pass1(row_idct_output, pass1_output, 8); 141 vp9_idct16x16_256_add_neon_pass1(row_idct_output, pass1_output, 8);
139 142
140 // Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines 143 // Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines
141 // with result in pass1(pass1_output) to calculate final result in stage 7. 144 // with result in pass1(pass1_output) to calculate final result in stage 7.
142 // Then add the result to the destination data. 145 // Then add the result to the destination data.
143 vp9_short_idct16x16_add_neon_pass2(row_idct_output+1, 146 vp9_idct16x16_256_add_neon_pass2(row_idct_output+1,
144 row_idct_output, 147 row_idct_output,
145 pass1_output, 148 pass1_output,
146 1, 149 1,
147 dest, 150 dest,
148 dest_stride); 151 dest_stride);
149 152
150 /* Parallel idct on the right 8 columns */ 153 /* Parallel idct on the right 8 columns */
151 // First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the 154 // First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the
152 // stage 6 result in pass1_output. 155 // stage 6 result in pass1_output.
153 vp9_short_idct16x16_add_neon_pass1(row_idct_output+8*16, pass1_output, 8); 156 vp9_idct16x16_256_add_neon_pass1(row_idct_output+8*16, pass1_output, 8);
154 157
155 // Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines 158 // Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines
156 // with result in pass1(pass1_output) to calculate final result in stage 7. 159 // with result in pass1(pass1_output) to calculate final result in stage 7.
157 // Then add the result to the destination data. 160 // Then add the result to the destination data.
158 vp9_short_idct16x16_add_neon_pass2(row_idct_output+8*16+1, 161 vp9_idct16x16_256_add_neon_pass2(row_idct_output+8*16+1,
159 row_idct_output+8, 162 row_idct_output+8,
160 pass1_output, 163 pass1_output,
161 1, 164 1,
162 dest+8, 165 dest+8,
163 dest_stride); 166 dest_stride);
164 167
165 // restore d8-d15 register values. 168 // restore d8-d15 register values.
166 restore_neon_registers(); 169 vp9_pop_neon(store_reg);
167 170
168 return; 171 return;
169 } 172 }
OLDNEW
« no previous file with comments | « source/libvpx/build/make/thumb.pm ('k') | source/libvpx/vp9/common/arm/neon/vp9_idct32x32_neon.c » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698