Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(42)

Side by Side Diff: source/scale_gcc.cc

Issue 1525033005: change scale down by 4 to use rounding. (Closed) Base URL: https://chromium.googlesource.com/libyuv/libyuv@master
Patch Set: scale by 4 uses ssse3 now Created 5 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « source/scale_any.cc ('k') | source/scale_win.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 * Copyright 2013 The LibYuv Project Authors. All rights reserved. 2 * Copyright 2013 The LibYuv Project Authors. All rights reserved.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license 4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source 5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found 6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may 7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree. 8 * be found in the AUTHORS file in the root of the source tree.
9 */ 9 */
10 10
(...skipping 268 matching lines...) Expand 10 before | Expand all | Expand 10 after
279 : "+r"(src_ptr), // %0 279 : "+r"(src_ptr), // %0
280 "+r"(dst_ptr), // %1 280 "+r"(dst_ptr), // %1
281 "+r"(dst_width) // %2 281 "+r"(dst_width) // %2
282 : "r"((intptr_t)(src_stride)) // %3 282 : "r"((intptr_t)(src_stride)) // %3
283 : "memory", "cc", NACL_R14 283 : "memory", "cc", NACL_R14
284 "xmm0", "xmm1", "xmm2", "xmm3", "xmm5" 284 "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
285 ); 285 );
286 } 286 }
287 #endif // HAS_SCALEROWDOWN2_AVX2 287 #endif // HAS_SCALEROWDOWN2_AVX2
288 288
289 void ScaleRowDown4_SSE2(const uint8* src_ptr, ptrdiff_t src_stride, 289 void ScaleRowDown4_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
290 uint8* dst_ptr, int dst_width) { 290 uint8* dst_ptr, int dst_width) {
291 asm volatile ( 291 asm volatile (
292 "pcmpeqb %%xmm5,%%xmm5 \n" 292 "pcmpeqb %%xmm5,%%xmm5 \n"
293 "psrld $0x18,%%xmm5 \n" 293 "psrld $0x18,%%xmm5 \n"
294 "pslld $0x10,%%xmm5 \n" 294 "pslld $0x10,%%xmm5 \n"
295 295
296 LABELALIGN 296 LABELALIGN
297 "1: \n" 297 "1: \n"
298 "movdqu " MEMACCESS(0) ",%%xmm0 \n" 298 "movdqu " MEMACCESS(0) ",%%xmm0 \n"
299 "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n" 299 "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n"
300 "lea " MEMLEA(0x20,0) ",%0 \n" 300 "lea " MEMLEA(0x20,0) ",%0 \n"
301 "pand %%xmm5,%%xmm0 \n" 301 "pand %%xmm5,%%xmm0 \n"
302 "pand %%xmm5,%%xmm1 \n" 302 "pand %%xmm5,%%xmm1 \n"
303 "packuswb %%xmm1,%%xmm0 \n" 303 "packuswb %%xmm1,%%xmm0 \n"
304 "psrlw $0x8,%%xmm0 \n" 304 "psrlw $0x8,%%xmm0 \n"
305 "packuswb %%xmm0,%%xmm0 \n" 305 "packuswb %%xmm0,%%xmm0 \n"
306 "movq %%xmm0," MEMACCESS(1) " \n" 306 "movq %%xmm0," MEMACCESS(1) " \n"
307 "lea " MEMLEA(0x8,1) ",%1 \n" 307 "lea " MEMLEA(0x8,1) ",%1 \n"
308 "sub $0x8,%2 \n" 308 "sub $0x8,%2 \n"
309 "jg 1b \n" 309 "jg 1b \n"
310 : "+r"(src_ptr), // %0 310 : "+r"(src_ptr), // %0
311 "+r"(dst_ptr), // %1 311 "+r"(dst_ptr), // %1
312 "+r"(dst_width) // %2 312 "+r"(dst_width) // %2
313 :: "memory", "cc", "xmm0", "xmm1", "xmm5" 313 :: "memory", "cc", "xmm0", "xmm1", "xmm5"
314 ); 314 );
315 } 315 }
316 316
317 void ScaleRowDown4Box_SSE2(const uint8* src_ptr, ptrdiff_t src_stride, 317 void ScaleRowDown4Box_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
318 uint8* dst_ptr, int dst_width) { 318 uint8* dst_ptr, int dst_width) {
319 intptr_t stridex3 = 0; 319 intptr_t stridex3 = 0;
320 asm volatile ( 320 asm volatile (
321 "pcmpeqb %%xmm7,%%xmm7 \n" 321 "pcmpeqb %%xmm4,%%xmm4 \n"
322 "psrlw $0x8,%%xmm7 \n" 322 "psrlw $0xf,%%xmm4 \n"
323 "movdqa %%xmm4,%%xmm5 \n"
324 "packuswb %%xmm4,%%xmm4 \n"
325 "psllw $0x3,%%xmm5 \n"
323 "lea " MEMLEA4(0x00,4,4,2) ",%3 \n" 326 "lea " MEMLEA4(0x00,4,4,2) ",%3 \n"
324 327
325 LABELALIGN 328 LABELALIGN
326 "1: \n" 329 "1: \n"
327 "movdqu " MEMACCESS(0) ",%%xmm0 \n" 330 "movdqu " MEMACCESS(0) ",%%xmm0 \n"
328 "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n" 331 "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n"
329 MEMOPREG(movdqu,0x00,0,4,1,xmm2) // movdqu (%0,%4,1),%%xmm2 332 MEMOPREG(movdqu,0x00,0,4,1,xmm2) // movdqu (%0,%4,1),%%xmm2
330 MEMOPREG(movdqu,0x10,0,4,1,xmm3) // movdqu 0x10(%0,%4,1),%%xmm3 333 MEMOPREG(movdqu,0x10,0,4,1,xmm3) // movdqu 0x10(%0,%4,1),%%xmm3
331 "pavgb %%xmm2,%%xmm0 \n" 334 "pmaddubsw %%xmm4,%%xmm0 \n"
332 "pavgb %%xmm3,%%xmm1 \n" 335 "pmaddubsw %%xmm4,%%xmm1 \n"
336 "pmaddubsw %%xmm4,%%xmm2 \n"
337 "pmaddubsw %%xmm4,%%xmm3 \n"
338 "paddw %%xmm2,%%xmm0 \n"
339 "paddw %%xmm3,%%xmm1 \n"
333 MEMOPREG(movdqu,0x00,0,4,2,xmm2) // movdqu (%0,%4,2),%%xmm2 340 MEMOPREG(movdqu,0x00,0,4,2,xmm2) // movdqu (%0,%4,2),%%xmm2
334 MEMOPREG(movdqu,0x10,0,4,2,xmm3) // movdqu 0x10(%0,%4,2),%%xmm3 341 MEMOPREG(movdqu,0x10,0,4,2,xmm3) // movdqu 0x10(%0,%4,2),%%xmm3
335 MEMOPREG(movdqu,0x00,0,3,1,xmm4) // movdqu (%0,%3,1),%%xmm4 342 "pmaddubsw %%xmm4,%%xmm2 \n"
336 MEMOPREG(movdqu,0x10,0,3,1,xmm5) // movdqu 0x10(%0,%3,1),%%xmm5 343 "pmaddubsw %%xmm4,%%xmm3 \n"
344 "paddw %%xmm2,%%xmm0 \n"
345 "paddw %%xmm3,%%xmm1 \n"
346 MEMOPREG(movdqu,0x00,0,3,1,xmm2) // movdqu (%0,%3,1),%%xmm2
347 MEMOPREG(movdqu,0x10,0,3,1,xmm3) // movdqu 0x10(%0,%3,1),%%xmm3
337 "lea " MEMLEA(0x20,0) ",%0 \n" 348 "lea " MEMLEA(0x20,0) ",%0 \n"
338 "pavgb %%xmm4,%%xmm2 \n" 349 "pmaddubsw %%xmm4,%%xmm2 \n"
339 "pavgb %%xmm2,%%xmm0 \n" 350 "pmaddubsw %%xmm4,%%xmm3 \n"
340 "pavgb %%xmm5,%%xmm3 \n" 351 "paddw %%xmm2,%%xmm0 \n"
341 "pavgb %%xmm3,%%xmm1 \n" 352 "paddw %%xmm3,%%xmm1 \n"
342 "movdqa %%xmm0,%%xmm2 \n" 353 "phaddw %%xmm1,%%xmm0 \n"
343 "psrlw $0x8,%%xmm0 \n" 354 "paddw %%xmm5,%%xmm0 \n"
344 "movdqa %%xmm1,%%xmm3 \n" 355 "psrlw $0x4,%%xmm0 \n"
345 "psrlw $0x8,%%xmm1 \n" 356 "packuswb %%xmm0,%%xmm0 \n"
346 "pand %%xmm7,%%xmm2 \n"
347 "pand %%xmm7,%%xmm3 \n"
348 "pavgw %%xmm2,%%xmm0 \n"
349 "pavgw %%xmm3,%%xmm1 \n"
350 "packuswb %%xmm1,%%xmm0 \n"
351 "movdqa %%xmm0,%%xmm2 \n"
352 "psrlw $0x8,%%xmm0 \n"
353 "pand %%xmm7,%%xmm2 \n"
354 "pavgw %%xmm2,%%xmm0 \n"
355 "packuswb %%xmm0,%%xmm0 \n"
356 "movq %%xmm0," MEMACCESS(1) " \n" 357 "movq %%xmm0," MEMACCESS(1) " \n"
357 "lea " MEMLEA(0x8,1) ",%1 \n" 358 "lea " MEMLEA(0x8,1) ",%1 \n"
358 "sub $0x8,%2 \n" 359 "sub $0x8,%2 \n"
359 "jg 1b \n" 360 "jg 1b \n"
360 : "+r"(src_ptr), // %0 361 : "+r"(src_ptr), // %0
361 "+r"(dst_ptr), // %1 362 "+r"(dst_ptr), // %1
362 "+r"(dst_width), // %2 363 "+r"(dst_width), // %2
363 "+r"(stridex3) // %3 364 "+r"(stridex3) // %3
364 : "r"((intptr_t)(src_stride)) // %4 365 : "r"((intptr_t)(src_stride)) // %4
365 : "memory", "cc", NACL_R14 366 : "memory", "cc", NACL_R14
366 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm7" 367 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
367 ); 368 );
368 } 369 }
369 370
370 void ScaleRowDown34_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride, 371 void ScaleRowDown34_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
371 uint8* dst_ptr, int dst_width) { 372 uint8* dst_ptr, int dst_width) {
372 asm volatile ( 373 asm volatile (
373 "movdqa %0,%%xmm3 \n" 374 "movdqa %0,%%xmm3 \n"
374 "movdqa %1,%%xmm4 \n" 375 "movdqa %1,%%xmm4 \n"
375 "movdqa %2,%%xmm5 \n" 376 "movdqa %2,%%xmm5 \n"
376 : 377 :
(...skipping 818 matching lines...) Expand 10 before | Expand all | Expand 10 after
1195 ); 1196 );
1196 return num; 1197 return num;
1197 } 1198 }
1198 1199
1199 #endif // defined(__x86_64__) || defined(__i386__) 1200 #endif // defined(__x86_64__) || defined(__i386__)
1200 1201
1201 #ifdef __cplusplus 1202 #ifdef __cplusplus
1202 } // extern "C" 1203 } // extern "C"
1203 } // namespace libyuv 1204 } // namespace libyuv
1204 #endif 1205 #endif
OLDNEW
« no previous file with comments | « source/scale_any.cc ('k') | source/scale_win.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698