Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(262)

Side by Side Diff: source/libvpx/vp9/common/x86/vp9_asm_stubs.c

Issue 23440041: Libvpx: Pull from upstream (Closed) Base URL: svn://chrome-svn/chrome/trunk/deps/third_party/libvpx/
Patch Set: Created 7 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 /* 1 /*
2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved. 2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license 4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source 5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found 6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may 7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree. 8 * be found in the AUTHORS file in the root of the source tree.
9 */ 9 */
10 10
11 #include <assert.h> 11 #include <assert.h>
12 12
13 #include "./vpx_config.h" 13 #include "./vpx_config.h"
14 #include "./vp9_rtcd.h" 14 #include "./vp9_rtcd.h"
15 #include "vpx_ports/mem.h" 15 #include "vpx_ports/mem.h"
16
17 /////////////////////////////////////////////////////////////////////////// 16 ///////////////////////////////////////////////////////////////////////////
18 // the mmx function that does the bilinear filtering and var calculation // 17 // the mmx function that does the bilinear filtering and var calculation //
19 // int one pass // 18 // int one pass //
20 /////////////////////////////////////////////////////////////////////////// 19 ///////////////////////////////////////////////////////////////////////////
21 DECLARE_ALIGNED(16, const short, vp9_bilinear_filters_mmx[16][8]) = { 20 DECLARE_ALIGNED(16, const short, vp9_bilinear_filters_mmx[16][8]) = {
22 { 128, 128, 128, 128, 0, 0, 0, 0 }, 21 { 128, 128, 128, 128, 0, 0, 0, 0 },
23 { 120, 120, 120, 120, 8, 8, 8, 8 }, 22 { 120, 120, 120, 120, 8, 8, 8, 8 },
24 { 112, 112, 112, 112, 16, 16, 16, 16 }, 23 { 112, 112, 112, 112, 16, 16, 16, 16 },
25 { 104, 104, 104, 104, 24, 24, 24, 24 }, 24 { 104, 104, 104, 104, 24, 24, 24, 24 },
26 { 96, 96, 96, 96, 32, 32, 32, 32 }, 25 { 96, 96, 96, 96, 32, 32, 32, 32 },
27 { 88, 88, 88, 88, 40, 40, 40, 40 }, 26 { 88, 88, 88, 88, 40, 40, 40, 40 },
28 { 80, 80, 80, 80, 48, 48, 48, 48 }, 27 { 80, 80, 80, 80, 48, 48, 48, 48 },
29 { 72, 72, 72, 72, 56, 56, 56, 56 }, 28 { 72, 72, 72, 72, 56, 56, 56, 56 },
30 { 64, 64, 64, 64, 64, 64, 64, 64 }, 29 { 64, 64, 64, 64, 64, 64, 64, 64 },
31 { 56, 56, 56, 56, 72, 72, 72, 72 }, 30 { 56, 56, 56, 56, 72, 72, 72, 72 },
32 { 48, 48, 48, 48, 80, 80, 80, 80 }, 31 { 48, 48, 48, 48, 80, 80, 80, 80 },
33 { 40, 40, 40, 40, 88, 88, 88, 88 }, 32 { 40, 40, 40, 40, 88, 88, 88, 88 },
34 { 32, 32, 32, 32, 96, 96, 96, 96 }, 33 { 32, 32, 32, 32, 96, 96, 96, 96 },
35 { 24, 24, 24, 24, 104, 104, 104, 104 }, 34 { 24, 24, 24, 24, 104, 104, 104, 104 },
36 { 16, 16, 16, 16, 112, 112, 112, 112 }, 35 { 16, 16, 16, 16, 112, 112, 112, 112 },
37 { 8, 8, 8, 8, 120, 120, 120, 120 } 36 { 8, 8, 8, 8, 120, 120, 120, 120 }
38 }; 37 };
39 38
40
41 int num_func_entry = 0;
42 #if HAVE_SSSE3 39 #if HAVE_SSSE3
43 void vp9_filter_block1d16_v8_ssse3(const unsigned char *src_ptr, 40 void vp9_filter_block1d16_v8_ssse3(const unsigned char *src_ptr,
44 const unsigned int src_pitch, 41 const unsigned int src_pitch,
45 unsigned char *output_ptr, 42 unsigned char *output_ptr,
46 unsigned int out_pitch, 43 unsigned int out_pitch,
47 unsigned int output_height, 44 unsigned int output_height,
48 const short *filter); 45 const short *filter);
49 46
50 void vp9_filter_block1d16_v8_intrin_ssse3(const unsigned char *src_ptr,
51 const unsigned int src_pitch,
52 unsigned char *output_ptr,
53 unsigned int out_pitch,
54 unsigned int output_height,
55 const short *filter);
56
57 void vp9_filter_block1d16_h8_ssse3(const unsigned char *src_ptr, 47 void vp9_filter_block1d16_h8_ssse3(const unsigned char *src_ptr,
58 const unsigned int src_pitch, 48 const unsigned int src_pitch,
59 unsigned char *output_ptr, 49 unsigned char *output_ptr,
60 unsigned int out_pitch, 50 unsigned int out_pitch,
61 unsigned int output_height, 51 unsigned int output_height,
62 const short *filter); 52 const short *filter);
63 53
64 void vp9_filter_block1d16_h8_intrin_ssse3(const unsigned char *src_ptr,
65 const unsigned int src_pitch,
66 unsigned char *output_ptr,
67 unsigned int out_pitch,
68 unsigned int output_height,
69 const short *filter);
70
71 void vp9_filter_block1d8_v8_ssse3(const unsigned char *src_ptr, 54 void vp9_filter_block1d8_v8_ssse3(const unsigned char *src_ptr,
72 const unsigned int src_pitch, 55 const unsigned int src_pitch,
73 unsigned char *output_ptr, 56 unsigned char *output_ptr,
74 unsigned int out_pitch, 57 unsigned int out_pitch,
75 unsigned int output_height, 58 unsigned int output_height,
76 const short *filter); 59 const short *filter);
77 60
78 void vp9_filter_block1d8_v8_intrin_ssse3(const unsigned char *src_ptr,
79 const unsigned int src_pitch,
80 unsigned char *output_ptr,
81 unsigned int out_pitch,
82 unsigned int output_height,
83 const short *filter);
84
85 void vp9_filter_block1d8_h8_ssse3(const unsigned char *src_ptr, 61 void vp9_filter_block1d8_h8_ssse3(const unsigned char *src_ptr,
86 const unsigned int src_pitch, 62 const unsigned int src_pitch,
87 unsigned char *output_ptr, 63 unsigned char *output_ptr,
88 unsigned int out_pitch, 64 unsigned int out_pitch,
89 unsigned int output_height, 65 unsigned int output_height,
90 const short *filter); 66 const short *filter);
91 67
92 void vp9_filter_block1d8_h8_intrin_ssse3(const unsigned char *src_ptr,
93 const unsigned int src_pitch,
94 unsigned char *output_ptr,
95 unsigned int out_pitch,
96 unsigned int output_height,
97 const short *filter);
98
99 void vp9_filter_block1d4_v8_ssse3(const unsigned char *src_ptr, 68 void vp9_filter_block1d4_v8_ssse3(const unsigned char *src_ptr,
100 const unsigned int src_pitch, 69 const unsigned int src_pitch,
101 unsigned char *output_ptr, 70 unsigned char *output_ptr,
102 unsigned int out_pitch, 71 unsigned int out_pitch,
103 unsigned int output_height, 72 unsigned int output_height,
104 const short *filter); 73 const short *filter);
105 74
106 void vp9_filter_block1d4_v8_intrin_ssse3(const unsigned char *src_ptr,
107 const unsigned int src_pitch,
108 unsigned char *output_ptr,
109 unsigned int out_pitch,
110 unsigned int output_height,
111 const short *filter);
112
113
114 void vp9_filter_block1d4_h8_ssse3(const unsigned char *src_ptr, 75 void vp9_filter_block1d4_h8_ssse3(const unsigned char *src_ptr,
115 const unsigned int src_pitch, 76 const unsigned int src_pitch,
116 unsigned char *output_ptr, 77 unsigned char *output_ptr,
117 unsigned int out_pitch, 78 unsigned int out_pitch,
118 unsigned int output_height, 79 unsigned int output_height,
119 const short *filter); 80 const short *filter);
120 81
121 void vp9_filter_block1d4_h8_intrin_ssse3(const unsigned char *src_ptr,
122 const unsigned int src_pitch,
123 unsigned char *output_ptr,
124 unsigned int out_pitch,
125 unsigned int output_height,
126 const short *filter);
127
128 void vp9_filter_block1d16_v8_avg_ssse3(const unsigned char *src_ptr, 82 void vp9_filter_block1d16_v8_avg_ssse3(const unsigned char *src_ptr,
129 const unsigned int src_pitch, 83 const unsigned int src_pitch,
130 unsigned char *output_ptr, 84 unsigned char *output_ptr,
131 unsigned int out_pitch, 85 unsigned int out_pitch,
132 unsigned int output_height, 86 unsigned int output_height,
133 const short *filter); 87 const short *filter);
134 88
135 void vp9_filter_block1d16_h8_avg_ssse3(const unsigned char *src_ptr, 89 void vp9_filter_block1d16_h8_avg_ssse3(const unsigned char *src_ptr,
136 const unsigned int src_pitch, 90 const unsigned int src_pitch,
137 unsigned char *output_ptr, 91 unsigned char *output_ptr,
(...skipping 30 matching lines...) Expand all
168 const short *filter); 122 const short *filter);
169 123
170 void vp9_convolve8_horiz_ssse3(const uint8_t *src, ptrdiff_t src_stride, 124 void vp9_convolve8_horiz_ssse3(const uint8_t *src, ptrdiff_t src_stride,
171 uint8_t *dst, ptrdiff_t dst_stride, 125 uint8_t *dst, ptrdiff_t dst_stride,
172 const int16_t *filter_x, int x_step_q4, 126 const int16_t *filter_x, int x_step_q4,
173 const int16_t *filter_y, int y_step_q4, 127 const int16_t *filter_y, int y_step_q4,
174 int w, int h) { 128 int w, int h) {
175 /* Ensure the filter can be compressed to int16_t. */ 129 /* Ensure the filter can be compressed to int16_t. */
176 if (x_step_q4 == 16 && filter_x[3] != 128) { 130 if (x_step_q4 == 16 && filter_x[3] != 128) {
177 while (w >= 16) { 131 while (w >= 16) {
178 vp9_filter_block1d16_h8_intrin_ssse3(src, src_stride, 132 vp9_filter_block1d16_h8_ssse3(src, src_stride,
179 dst, dst_stride, 133 dst, dst_stride,
180 h, filter_x); 134 h, filter_x);
181 src += 16; 135 src += 16;
182 dst += 16; 136 dst += 16;
183 w -= 16; 137 w -= 16;
184 } 138 }
185 while (w >= 8) { 139 while (w >= 8) {
186 vp9_filter_block1d8_h8_intrin_ssse3(src, src_stride, 140 vp9_filter_block1d8_h8_ssse3(src, src_stride,
187 dst, dst_stride, 141 dst, dst_stride,
188 h, filter_x); 142 h, filter_x);
189 src += 8; 143 src += 8;
190 dst += 8; 144 dst += 8;
191 w -= 8; 145 w -= 8;
192 } 146 }
193 while (w >= 4) { 147 while (w >= 4) {
194 vp9_filter_block1d4_h8_intrin_ssse3(src, src_stride, 148 vp9_filter_block1d4_h8_ssse3(src, src_stride,
195 dst, dst_stride, 149 dst, dst_stride,
196 h, filter_x); 150 h, filter_x);
197 src += 4; 151 src += 4;
198 dst += 4; 152 dst += 4;
199 w -= 4; 153 w -= 4;
200 } 154 }
201 } 155 }
202 if (w) { 156 if (w) {
203 vp9_convolve8_horiz_c(src, src_stride, dst, dst_stride, 157 vp9_convolve8_horiz_c(src, src_stride, dst, dst_stride,
204 filter_x, x_step_q4, filter_y, y_step_q4, 158 filter_x, x_step_q4, filter_y, y_step_q4,
205 w, h); 159 w, h);
206 } 160 }
207 } 161 }
208 162
209 void vp9_convolve8_vert_ssse3(const uint8_t *src, ptrdiff_t src_stride, 163 void vp9_convolve8_vert_ssse3(const uint8_t *src, ptrdiff_t src_stride,
210 uint8_t *dst, ptrdiff_t dst_stride, 164 uint8_t *dst, ptrdiff_t dst_stride,
211 const int16_t *filter_x, int x_step_q4, 165 const int16_t *filter_x, int x_step_q4,
212 const int16_t *filter_y, int y_step_q4, 166 const int16_t *filter_y, int y_step_q4,
213 int w, int h) { 167 int w, int h) {
214 if (y_step_q4 == 16 && filter_y[3] != 128) { 168 if (y_step_q4 == 16 && filter_y[3] != 128) {
215 while (w >= 16) { 169 while (w >= 16) {
216 vp9_filter_block1d16_v8_intrin_ssse3(src - src_stride * 3, src_stride, 170 vp9_filter_block1d16_v8_ssse3(src - src_stride * 3, src_stride,
217 dst, dst_stride, 171 dst, dst_stride,
218 h, filter_y); 172 h, filter_y);
219 src += 16; 173 src += 16;
220 dst += 16; 174 dst += 16;
221 w -= 16; 175 w -= 16;
222 } 176 }
223 while (w >= 8) { 177 while (w >= 8) {
224 vp9_filter_block1d8_v8_intrin_ssse3(src - src_stride * 3, src_stride, 178 vp9_filter_block1d8_v8_ssse3(src - src_stride * 3, src_stride,
225 dst, dst_stride, 179 dst, dst_stride,
226 h, filter_y); 180 h, filter_y);
227 src += 8; 181 src += 8;
228 dst += 8; 182 dst += 8;
229 w -= 8; 183 w -= 8;
230 } 184 }
231 while (w >= 4) { 185 while (w >= 4) {
232 vp9_filter_block1d4_v8_intrin_ssse3(src - src_stride * 3, src_stride, 186 vp9_filter_block1d4_v8_ssse3(src - src_stride * 3, src_stride,
233 dst, dst_stride, 187 dst, dst_stride,
234 h, filter_y); 188 h, filter_y);
235 src += 4; 189 src += 4;
236 dst += 4; 190 dst += 4;
237 w -= 4; 191 w -= 4;
238 } 192 }
239 } 193 }
240 if (w) { 194 if (w) {
241 vp9_convolve8_vert_c(src, src_stride, dst, dst_stride, 195 vp9_convolve8_vert_c(src, src_stride, dst, dst_stride,
242 filter_x, x_step_q4, filter_y, y_step_q4, 196 filter_x, x_step_q4, filter_y, y_step_q4,
243 w, h); 197 w, h);
244 } 198 }
245 } 199 }
246 200
247 void vp9_convolve8_avg_horiz_ssse3(const uint8_t *src, ptrdiff_t src_stride, 201 void vp9_convolve8_avg_horiz_ssse3(const uint8_t *src, ptrdiff_t src_stride,
248 uint8_t *dst, ptrdiff_t dst_stride, 202 uint8_t *dst, ptrdiff_t dst_stride,
249 const int16_t *filter_x, int x_step_q4, 203 const int16_t *filter_x, int x_step_q4,
250 const int16_t *filter_y, int y_step_q4, 204 const int16_t *filter_y, int y_step_q4,
251 int w, int h) { 205 int w, int h) {
252 if (x_step_q4 == 16 && filter_x[3] != 128) { 206 if (x_step_q4 == 16 && filter_x[3] != 128) {
253 while (w >= 16) { 207 while (w >= 16) {
254 vp9_filter_block1d16_h8_avg_ssse3(src, src_stride, 208 vp9_filter_block1d16_h8_avg_ssse3(src, src_stride,
255 dst, dst_stride, 209 dst, dst_stride,
256 h, filter_x); 210 h, filter_x);
257 src += 16; 211 src += 16;
258 dst += 16; 212 dst += 16;
259 w -= 16; 213 w -= 16;
260 } 214 }
261 while (w >= 8) { 215 while (w >= 8) {
262 vp9_filter_block1d8_h8_avg_ssse3(src, src_stride, 216 vp9_filter_block1d8_h8_avg_ssse3(src, src_stride,
263 dst, dst_stride, 217 dst, dst_stride,
264 h, filter_x); 218 h, filter_x);
265 src += 8; 219 src += 8;
266 dst += 8; 220 dst += 8;
267 w -= 8; 221 w -= 8;
268 } 222 }
269 while (w >= 4) { 223 while (w >= 4) {
270 vp9_filter_block1d4_h8_avg_ssse3(src, src_stride, 224 vp9_filter_block1d4_h8_avg_ssse3(src, src_stride,
271 dst, dst_stride, 225 dst, dst_stride,
272 h, filter_x); 226 h, filter_x);
273 src += 4; 227 src += 4;
274 dst += 4; 228 dst += 4;
275 w -= 4; 229 w -= 4;
276 } 230 }
277 } 231 }
278 if (w) { 232 if (w) {
279 vp9_convolve8_avg_horiz_c(src, src_stride, dst, dst_stride, 233 vp9_convolve8_avg_horiz_c(src, src_stride, dst, dst_stride,
280 filter_x, x_step_q4, filter_y, y_step_q4, 234 filter_x, x_step_q4, filter_y, y_step_q4,
281 w, h); 235 w, h);
282 } 236 }
283 } 237 }
284 238
285 void vp9_convolve8_avg_vert_ssse3(const uint8_t *src, ptrdiff_t src_stride, 239 void vp9_convolve8_avg_vert_ssse3(const uint8_t *src, ptrdiff_t src_stride,
286 uint8_t *dst, ptrdiff_t dst_stride, 240 uint8_t *dst, ptrdiff_t dst_stride,
287 const int16_t *filter_x, int x_step_q4, 241 const int16_t *filter_x, int x_step_q4,
288 const int16_t *filter_y, int y_step_q4, 242 const int16_t *filter_y, int y_step_q4,
289 int w, int h) { 243 int w, int h) {
290 if (y_step_q4 == 16 && filter_y[3] != 128) { 244 if (y_step_q4 == 16 && filter_y[3] != 128) {
291 while (w >= 16) { 245 while (w >= 16) {
292 vp9_filter_block1d16_v8_avg_ssse3(src - src_stride * 3, src_stride, 246 vp9_filter_block1d16_v8_avg_ssse3(src - src_stride * 3, src_stride,
293 dst, dst_stride, 247 dst, dst_stride,
294 h, filter_y); 248 h, filter_y);
295 src += 16; 249 src += 16;
296 dst += 16; 250 dst += 16;
297 w -= 16; 251 w -= 16;
298 } 252 }
299 while (w >= 8) { 253 while (w >= 8) {
300 vp9_filter_block1d8_v8_avg_ssse3(src - src_stride * 3, src_stride, 254 vp9_filter_block1d8_v8_avg_ssse3(src - src_stride * 3, src_stride,
301 dst, dst_stride, 255 dst, dst_stride,
302 h, filter_y); 256 h, filter_y);
303 src += 8; 257 src += 8;
304 dst += 8; 258 dst += 8;
305 w -= 8; 259 w -= 8;
306 } 260 }
307 while (w >= 4) { 261 while (w >= 4) {
308 vp9_filter_block1d4_v8_avg_ssse3(src - src_stride * 3, src_stride, 262 vp9_filter_block1d4_v8_avg_ssse3(src - src_stride * 3, src_stride,
309 dst, dst_stride, 263 dst, dst_stride,
310 h, filter_y); 264 h, filter_y);
311 src += 4; 265 src += 4;
312 dst += 4; 266 dst += 4;
313 w -= 4; 267 w -= 4;
314 } 268 }
315 } 269 }
316 if (w) { 270 if (w) {
317 vp9_convolve8_avg_vert_c(src, src_stride, dst, dst_stride, 271 vp9_convolve8_avg_vert_c(src, src_stride, dst, dst_stride,
318 filter_x, x_step_q4, filter_y, y_step_q4, 272 filter_x, x_step_q4, filter_y, y_step_q4,
319 w, h); 273 w, h);
320 } 274 }
(...skipping 35 matching lines...) Expand 10 before | Expand all | Expand 10 after
356 w, h + 7); 310 w, h + 7);
357 vp9_convolve8_avg_vert_ssse3(fdata2 + 3 * 64, 64, dst, dst_stride, 311 vp9_convolve8_avg_vert_ssse3(fdata2 + 3 * 64, 64, dst, dst_stride,
358 filter_x, x_step_q4, filter_y, y_step_q4, 312 filter_x, x_step_q4, filter_y, y_step_q4,
359 w, h); 313 w, h);
360 } else { 314 } else {
361 vp9_convolve8_avg_c(src, src_stride, dst, dst_stride, 315 vp9_convolve8_avg_c(src, src_stride, dst, dst_stride,
362 filter_x, x_step_q4, filter_y, y_step_q4, w, h); 316 filter_x, x_step_q4, filter_y, y_step_q4, w, h);
363 } 317 }
364 } 318 }
365 #endif 319 #endif
OLDNEW
« no previous file with comments | « source/libvpx/test/cpu_speed_test.cc ('k') | source/libvpx/vp9/common/x86/vp9_subpixel_8t_intrin_ssse3.c » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698