Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(532)

Side by Side Diff: patched-ffmpeg-mt/libavcodec/ppc/dsputil_altivec.c

Issue 789004: ffmpeg roll of source to mar 9 version... (Closed) Base URL: svn://chrome-svn/chrome/trunk/deps/third_party/ffmpeg/
Patch Set: '' Created 10 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 /* 1 /*
2 * Copyright (c) 2002 Brian Foley 2 * Copyright (c) 2002 Brian Foley
3 * Copyright (c) 2002 Dieter Shirley 3 * Copyright (c) 2002 Dieter Shirley
4 * Copyright (c) 2003-2004 Romain Dolbeau <romain@dolbeau.org> 4 * Copyright (c) 2003-2004 Romain Dolbeau <romain@dolbeau.org>
5 * 5 *
6 * This file is part of FFmpeg. 6 * This file is part of FFmpeg.
7 * 7 *
8 * FFmpeg is free software; you can redistribute it and/or 8 * FFmpeg is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public 9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either 10 * License as published by the Free Software Foundation; either
(...skipping 10 matching lines...) Expand all
21 */ 21 */
22 22
23 #include "config.h" 23 #include "config.h"
24 #if HAVE_ALTIVEC_H 24 #if HAVE_ALTIVEC_H
25 #include <altivec.h> 25 #include <altivec.h>
26 #endif 26 #endif
27 #include "libavcodec/dsputil.h" 27 #include "libavcodec/dsputil.h"
28 #include "dsputil_ppc.h" 28 #include "dsputil_ppc.h"
29 #include "util_altivec.h" 29 #include "util_altivec.h"
30 #include "types_altivec.h" 30 #include "types_altivec.h"
31 #include "dsputil_altivec.h"
31 32
32 int sad16_x2_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h ) 33 static int sad16_x2_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size , int h)
33 { 34 {
34 int i; 35 int i;
35 int s; 36 int s;
36 const vector unsigned char zero = (const vector unsigned char)vec_splat_u8(0 ); 37 const vector unsigned char zero = (const vector unsigned char)vec_splat_u8(0 );
37 vector unsigned char *tv; 38 vector unsigned char *tv;
38 vector unsigned char pix1v, pix2v, pix2iv, avgv, t5; 39 vector unsigned char pix1v, pix2v, pix2iv, avgv, t5;
39 vector unsigned int sad; 40 vector unsigned int sad;
40 vector signed int sumdiffs; 41 vector signed int sumdiffs;
41 42
42 s = 0; 43 s = 0;
(...skipping 24 matching lines...) Expand all
67 pix2 += line_size; 68 pix2 += line_size;
68 } 69 }
69 /* Sum up the four partial sums, and put the result into s */ 70 /* Sum up the four partial sums, and put the result into s */
70 sumdiffs = vec_sums((vector signed int) sad, (vector signed int) zero); 71 sumdiffs = vec_sums((vector signed int) sad, (vector signed int) zero);
71 sumdiffs = vec_splat(sumdiffs, 3); 72 sumdiffs = vec_splat(sumdiffs, 3);
72 vec_ste(sumdiffs, 0, &s); 73 vec_ste(sumdiffs, 0, &s);
73 74
74 return s; 75 return s;
75 } 76 }
76 77
77 int sad16_y2_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h ) 78 static int sad16_y2_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size , int h)
78 { 79 {
79 int i; 80 int i;
80 int s; 81 int s;
81 const vector unsigned char zero = (const vector unsigned char)vec_splat_u8(0 ); 82 const vector unsigned char zero = (const vector unsigned char)vec_splat_u8(0 );
82 vector unsigned char *tv; 83 vector unsigned char *tv;
83 vector unsigned char pix1v, pix2v, pix3v, avgv, t5; 84 vector unsigned char pix1v, pix2v, pix3v, avgv, t5;
84 vector unsigned int sad; 85 vector unsigned int sad;
85 vector signed int sumdiffs; 86 vector signed int sumdiffs;
86 uint8_t *pix3 = pix2 + line_size; 87 uint8_t *pix3 = pix2 + line_size;
87 88
(...skipping 35 matching lines...) Expand 10 before | Expand all | Expand 10 after
123 124
124 } 125 }
125 126
126 /* Sum up the four partial sums, and put the result into s */ 127 /* Sum up the four partial sums, and put the result into s */
127 sumdiffs = vec_sums((vector signed int) sad, (vector signed int) zero); 128 sumdiffs = vec_sums((vector signed int) sad, (vector signed int) zero);
128 sumdiffs = vec_splat(sumdiffs, 3); 129 sumdiffs = vec_splat(sumdiffs, 3);
129 vec_ste(sumdiffs, 0, &s); 130 vec_ste(sumdiffs, 0, &s);
130 return s; 131 return s;
131 } 132 }
132 133
133 int sad16_xy2_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) 134 static int sad16_xy2_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_siz e, int h)
134 { 135 {
135 int i; 136 int i;
136 int s; 137 int s;
137 uint8_t *pix3 = pix2 + line_size; 138 uint8_t *pix3 = pix2 + line_size;
138 const vector unsigned char zero = (const vector unsigned char)vec_splat_u8(0 ); 139 const vector unsigned char zero = (const vector unsigned char)vec_splat_u8(0 );
139 const vector unsigned short two = (const vector unsigned short)vec_splat_u16 (2); 140 const vector unsigned short two = (const vector unsigned short)vec_splat_u16 (2);
140 vector unsigned char *tv, avgv, t5; 141 vector unsigned char *tv, avgv, t5;
141 vector unsigned char pix1v, pix2v, pix3v, pix2iv, pix3iv; 142 vector unsigned char pix1v, pix2v, pix3v, pix2iv, pix3iv;
142 vector unsigned short pix2lv, pix2hv, pix2ilv, pix2ihv; 143 vector unsigned short pix2lv, pix2hv, pix2ilv, pix2ihv;
143 vector unsigned short pix3lv, pix3hv, pix3ilv, pix3ihv; 144 vector unsigned short pix3lv, pix3hv, pix3ilv, pix3ihv;
(...skipping 74 matching lines...) Expand 10 before | Expand all | Expand 10 after
218 t2 = t4; 219 t2 = t4;
219 } 220 }
220 /* Sum up the four partial sums, and put the result into s */ 221 /* Sum up the four partial sums, and put the result into s */
221 sumdiffs = vec_sums((vector signed int) sad, (vector signed int) zero); 222 sumdiffs = vec_sums((vector signed int) sad, (vector signed int) zero);
222 sumdiffs = vec_splat(sumdiffs, 3); 223 sumdiffs = vec_splat(sumdiffs, 3);
223 vec_ste(sumdiffs, 0, &s); 224 vec_ste(sumdiffs, 0, &s);
224 225
225 return s; 226 return s;
226 } 227 }
227 228
228 int sad16_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) 229 static int sad16_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, i nt h)
229 { 230 {
230 int i; 231 int i;
231 int s; 232 int s;
232 const vector unsigned int zero = (const vector unsigned int)vec_splat_u32(0) ; 233 const vector unsigned int zero = (const vector unsigned int)vec_splat_u32(0) ;
233 vector unsigned char perm1, perm2, *pix1v, *pix2v; 234 vector unsigned char perm1, perm2, *pix1v, *pix2v;
234 vector unsigned char t1, t2, t3,t4, t5; 235 vector unsigned char t1, t2, t3,t4, t5;
235 vector unsigned int sad; 236 vector unsigned int sad;
236 vector signed int sumdiffs; 237 vector signed int sumdiffs;
237 238
238 sad = (vector unsigned int)vec_splat_u32(0); 239 sad = (vector unsigned int)vec_splat_u32(0);
(...skipping 21 matching lines...) Expand all
260 } 261 }
261 262
262 /* Sum up the four partial sums, and put the result into s */ 263 /* Sum up the four partial sums, and put the result into s */
263 sumdiffs = vec_sums((vector signed int) sad, (vector signed int) zero); 264 sumdiffs = vec_sums((vector signed int) sad, (vector signed int) zero);
264 sumdiffs = vec_splat(sumdiffs, 3); 265 sumdiffs = vec_splat(sumdiffs, 3);
265 vec_ste(sumdiffs, 0, &s); 266 vec_ste(sumdiffs, 0, &s);
266 267
267 return s; 268 return s;
268 } 269 }
269 270
270 int sad8_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) 271 static int sad8_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, in t h)
271 { 272 {
272 int i; 273 int i;
273 int s; 274 int s;
274 const vector unsigned int zero = (const vector unsigned int)vec_splat_u32(0) ; 275 const vector unsigned int zero = (const vector unsigned int)vec_splat_u32(0) ;
275 vector unsigned char perm1, perm2, permclear, *pix1v, *pix2v; 276 vector unsigned char perm1, perm2, permclear, *pix1v, *pix2v;
276 vector unsigned char t1, t2, t3,t4, t5; 277 vector unsigned char t1, t2, t3,t4, t5;
277 vector unsigned int sad; 278 vector unsigned int sad;
278 vector signed int sumdiffs; 279 vector signed int sumdiffs;
279 280
280 sad = (vector unsigned int)vec_splat_u32(0); 281 sad = (vector unsigned int)vec_splat_u32(0);
(...skipping 24 matching lines...) Expand all
305 } 306 }
306 307
307 /* Sum up the four partial sums, and put the result into s */ 308 /* Sum up the four partial sums, and put the result into s */
308 sumdiffs = vec_sums((vector signed int) sad, (vector signed int) zero); 309 sumdiffs = vec_sums((vector signed int) sad, (vector signed int) zero);
309 sumdiffs = vec_splat(sumdiffs, 3); 310 sumdiffs = vec_splat(sumdiffs, 3);
310 vec_ste(sumdiffs, 0, &s); 311 vec_ste(sumdiffs, 0, &s);
311 312
312 return s; 313 return s;
313 } 314 }
314 315
315 int pix_norm1_altivec(uint8_t *pix, int line_size) 316 static int pix_norm1_altivec(uint8_t *pix, int line_size)
316 { 317 {
317 int i; 318 int i;
318 int s; 319 int s;
319 const vector unsigned int zero = (const vector unsigned int)vec_splat_u32(0) ; 320 const vector unsigned int zero = (const vector unsigned int)vec_splat_u32(0) ;
320 vector unsigned char *tv; 321 vector unsigned char *tv;
321 vector unsigned char pixv; 322 vector unsigned char pixv;
322 vector unsigned int sv; 323 vector unsigned int sv;
323 vector signed int sum; 324 vector signed int sum;
324 325
325 sv = (vector unsigned int)vec_splat_u32(0); 326 sv = (vector unsigned int)vec_splat_u32(0);
(...skipping 15 matching lines...) Expand all
341 vec_ste(sum, 0, &s); 342 vec_ste(sum, 0, &s);
342 343
343 return s; 344 return s;
344 } 345 }
345 346
346 /** 347 /**
347 * Sum of Squared Errors for a 8x8 block. 348 * Sum of Squared Errors for a 8x8 block.
348 * AltiVec-enhanced. 349 * AltiVec-enhanced.
349 * It's the sad8_altivec code above w/ squaring added. 350 * It's the sad8_altivec code above w/ squaring added.
350 */ 351 */
351 int sse8_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) 352 static int sse8_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, in t h)
352 { 353 {
353 int i; 354 int i;
354 int s; 355 int s;
355 const vector unsigned int zero = (const vector unsigned int)vec_splat_u32(0) ; 356 const vector unsigned int zero = (const vector unsigned int)vec_splat_u32(0) ;
356 vector unsigned char perm1, perm2, permclear, *pix1v, *pix2v; 357 vector unsigned char perm1, perm2, permclear, *pix1v, *pix2v;
357 vector unsigned char t1, t2, t3,t4, t5; 358 vector unsigned char t1, t2, t3,t4, t5;
358 vector unsigned int sum; 359 vector unsigned int sum;
359 vector signed int sumsqr; 360 vector signed int sumsqr;
360 361
361 sum = (vector unsigned int)vec_splat_u32(0); 362 sum = (vector unsigned int)vec_splat_u32(0);
(...skipping 33 matching lines...) Expand 10 before | Expand all | Expand 10 after
395 vec_ste(sumsqr, 0, &s); 396 vec_ste(sumsqr, 0, &s);
396 397
397 return s; 398 return s;
398 } 399 }
399 400
400 /** 401 /**
401 * Sum of Squared Errors for a 16x16 block. 402 * Sum of Squared Errors for a 16x16 block.
402 * AltiVec-enhanced. 403 * AltiVec-enhanced.
403 * It's the sad16_altivec code above w/ squaring added. 404 * It's the sad16_altivec code above w/ squaring added.
404 */ 405 */
405 int sse16_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) 406 static int sse16_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, i nt h)
406 { 407 {
407 int i; 408 int i;
408 int s; 409 int s;
409 const vector unsigned int zero = (const vector unsigned int)vec_splat_u32(0) ; 410 const vector unsigned int zero = (const vector unsigned int)vec_splat_u32(0) ;
410 vector unsigned char perm1, perm2, *pix1v, *pix2v; 411 vector unsigned char perm1, perm2, *pix1v, *pix2v;
411 vector unsigned char t1, t2, t3,t4, t5; 412 vector unsigned char t1, t2, t3,t4, t5;
412 vector unsigned int sum; 413 vector unsigned int sum;
413 vector signed int sumsqr; 414 vector signed int sumsqr;
414 415
415 sum = (vector unsigned int)vec_splat_u32(0); 416 sum = (vector unsigned int)vec_splat_u32(0);
(...skipping 23 matching lines...) Expand all
439 } 440 }
440 441
441 /* Sum up the four partial sums, and put the result into s */ 442 /* Sum up the four partial sums, and put the result into s */
442 sumsqr = vec_sums((vector signed int) sum, (vector signed int) zero); 443 sumsqr = vec_sums((vector signed int) sum, (vector signed int) zero);
443 sumsqr = vec_splat(sumsqr, 3); 444 sumsqr = vec_splat(sumsqr, 3);
444 vec_ste(sumsqr, 0, &s); 445 vec_ste(sumsqr, 0, &s);
445 446
446 return s; 447 return s;
447 } 448 }
448 449
449 int pix_sum_altivec(uint8_t * pix, int line_size) 450 static int pix_sum_altivec(uint8_t * pix, int line_size)
450 { 451 {
451 const vector unsigned int zero = (const vector unsigned int)vec_splat_u32(0) ; 452 const vector unsigned int zero = (const vector unsigned int)vec_splat_u32(0) ;
452 vector unsigned char perm, *pixv; 453 vector unsigned char perm, *pixv;
453 vector unsigned char t1; 454 vector unsigned char t1;
454 vector unsigned int sad; 455 vector unsigned int sad;
455 vector signed int sumdiffs; 456 vector signed int sumdiffs;
456 457
457 int i; 458 int i;
458 int s; 459 int s;
459 460
(...skipping 12 matching lines...) Expand all
472 } 473 }
473 474
474 /* Sum up the four partial sums, and put the result into s */ 475 /* Sum up the four partial sums, and put the result into s */
475 sumdiffs = vec_sums((vector signed int) sad, (vector signed int) zero); 476 sumdiffs = vec_sums((vector signed int) sad, (vector signed int) zero);
476 sumdiffs = vec_splat(sumdiffs, 3); 477 sumdiffs = vec_splat(sumdiffs, 3);
477 vec_ste(sumdiffs, 0, &s); 478 vec_ste(sumdiffs, 0, &s);
478 479
479 return s; 480 return s;
480 } 481 }
481 482
482 void get_pixels_altivec(DCTELEM *restrict block, const uint8_t *pixels, int line _size) 483 static void get_pixels_altivec(DCTELEM *restrict block, const uint8_t *pixels, i nt line_size)
483 { 484 {
484 int i; 485 int i;
485 vector unsigned char perm, bytes, *pixv; 486 vector unsigned char perm, bytes, *pixv;
486 const vector unsigned char zero = (const vector unsigned char)vec_splat_u8(0 ); 487 const vector unsigned char zero = (const vector unsigned char)vec_splat_u8(0 );
487 vector signed short shorts; 488 vector signed short shorts;
488 489
489 for (i = 0; i < 8; i++) { 490 for (i = 0; i < 8; i++) {
490 // Read potentially unaligned pixels. 491 // Read potentially unaligned pixels.
491 // We're reading 16 pixels, and actually only want 8, 492 // We're reading 16 pixels, and actually only want 8,
492 // but we simply ignore the extras. 493 // but we simply ignore the extras.
493 perm = vec_lvsl(0, pixels); 494 perm = vec_lvsl(0, pixels);
494 pixv = (vector unsigned char *) pixels; 495 pixv = (vector unsigned char *) pixels;
495 bytes = vec_perm(pixv[0], pixv[1], perm); 496 bytes = vec_perm(pixv[0], pixv[1], perm);
496 497
497 // convert the bytes into shorts 498 // convert the bytes into shorts
498 shorts = (vector signed short)vec_mergeh(zero, bytes); 499 shorts = (vector signed short)vec_mergeh(zero, bytes);
499 500
500 // save the data to the block, we assume the block is 16-byte aligned 501 // save the data to the block, we assume the block is 16-byte aligned
501 vec_st(shorts, i*16, (vector signed short*)block); 502 vec_st(shorts, i*16, (vector signed short*)block);
502 503
503 pixels += line_size; 504 pixels += line_size;
504 } 505 }
505 } 506 }
506 507
507 void diff_pixels_altivec(DCTELEM *restrict block, const uint8_t *s1, 508 static void diff_pixels_altivec(DCTELEM *restrict block, const uint8_t *s1,
508 const uint8_t *s2, int stride) 509 const uint8_t *s2, int stride)
509 { 510 {
510 int i; 511 int i;
511 vector unsigned char perm, bytes, *pixv; 512 vector unsigned char perm, bytes, *pixv;
512 const vector unsigned char zero = (const vector unsigned char)vec_splat_u8(0 ); 513 const vector unsigned char zero = (const vector unsigned char)vec_splat_u8(0 );
513 vector signed short shorts1, shorts2; 514 vector signed short shorts1, shorts2;
514 515
515 for (i = 0; i < 4; i++) { 516 for (i = 0; i < 4; i++) {
516 // Read potentially unaligned pixels 517 // Read potentially unaligned pixels
517 // We're reading 16 pixels, and actually only want 8, 518 // We're reading 16 pixels, and actually only want 8,
(...skipping 64 matching lines...) Expand 10 before | Expand all | Expand 10 after
582 vec_st(zero_s16v, 16, block); 583 vec_st(zero_s16v, 16, block);
583 vec_st(zero_s16v, 32, block); 584 vec_st(zero_s16v, 32, block);
584 vec_st(zero_s16v, 48, block); 585 vec_st(zero_s16v, 48, block);
585 vec_st(zero_s16v, 64, block); 586 vec_st(zero_s16v, 64, block);
586 vec_st(zero_s16v, 80, block); 587 vec_st(zero_s16v, 80, block);
587 vec_st(zero_s16v, 96, block); 588 vec_st(zero_s16v, 96, block);
588 vec_st(zero_s16v, 112, block); 589 vec_st(zero_s16v, 112, block);
589 } 590 }
590 591
591 592
592 void add_bytes_altivec(uint8_t *dst, uint8_t *src, int w) { 593 static void add_bytes_altivec(uint8_t *dst, uint8_t *src, int w) {
593 register int i; 594 register int i;
594 register vector unsigned char vdst, vsrc; 595 register vector unsigned char vdst, vsrc;
595 596
596 /* dst and src are 16 bytes-aligned (guaranteed) */ 597 /* dst and src are 16 bytes-aligned (guaranteed) */
597 for (i = 0 ; (i + 15) < w ; i+=16) { 598 for (i = 0 ; (i + 15) < w ; i+=16) {
598 vdst = vec_ld(i, (unsigned char*)dst); 599 vdst = vec_ld(i, (unsigned char*)dst);
599 vsrc = vec_ld(i, (unsigned char*)src); 600 vsrc = vec_ld(i, (unsigned char*)src);
600 vdst = vec_add(vsrc, vdst); 601 vdst = vec_add(vsrc, vdst);
601 vec_st(vdst, i, (unsigned char*)dst); 602 vec_st(vdst, i, (unsigned char*)dst);
602 } 603 }
(...skipping 77 matching lines...) Expand 10 before | Expand all | Expand 10 after
680 blockv = vec_avg(blockv,pixelsv); 681 blockv = vec_avg(blockv,pixelsv);
681 vec_st(blockv, 0, (unsigned char*)block); 682 vec_st(blockv, 0, (unsigned char*)block);
682 pixels+=line_size; 683 pixels+=line_size;
683 block +=line_size; 684 block +=line_size;
684 } 685 }
685 686
686 POWERPC_PERF_STOP_COUNT(altivec_avg_pixels16_num, 1); 687 POWERPC_PERF_STOP_COUNT(altivec_avg_pixels16_num, 1);
687 } 688 }
688 689
689 /* next one assumes that ((line_size % 8) == 0) */ 690 /* next one assumes that ((line_size % 8) == 0) */
690 void avg_pixels8_altivec(uint8_t * block, const uint8_t * pixels, int line_size, int h) 691 static void avg_pixels8_altivec(uint8_t * block, const uint8_t * pixels, int lin e_size, int h)
691 { 692 {
692 POWERPC_PERF_DECLARE(altivec_avg_pixels8_num, 1); 693 POWERPC_PERF_DECLARE(altivec_avg_pixels8_num, 1);
693 register vector unsigned char pixelsv1, pixelsv2, pixelsv, blockv; 694 register vector unsigned char pixelsv1, pixelsv2, pixelsv, blockv;
694 int i; 695 int i;
695 696
696 POWERPC_PERF_START_COUNT(altivec_avg_pixels8_num, 1); 697 POWERPC_PERF_START_COUNT(altivec_avg_pixels8_num, 1);
697 698
698 for (i = 0; i < h; i++) { 699 for (i = 0; i < h; i++) {
699 /* block is 8 bytes-aligned, so we're either in the 700 /* block is 8 bytes-aligned, so we're either in the
700 left block (16 bytes-aligned) or in the right block (not) */ 701 left block (16 bytes-aligned) or in the right block (not) */
(...skipping 15 matching lines...) Expand all
716 vec_st(blockv, 0, block); 717 vec_st(blockv, 0, block);
717 718
718 pixels += line_size; 719 pixels += line_size;
719 block += line_size; 720 block += line_size;
720 } 721 }
721 722
722 POWERPC_PERF_STOP_COUNT(altivec_avg_pixels8_num, 1); 723 POWERPC_PERF_STOP_COUNT(altivec_avg_pixels8_num, 1);
723 } 724 }
724 725
725 /* next one assumes that ((line_size % 8) == 0) */ 726 /* next one assumes that ((line_size % 8) == 0) */
726 void put_pixels8_xy2_altivec(uint8_t *block, const uint8_t *pixels, int line_siz e, int h) 727 static void put_pixels8_xy2_altivec(uint8_t *block, const uint8_t *pixels, int l ine_size, int h)
727 { 728 {
728 POWERPC_PERF_DECLARE(altivec_put_pixels8_xy2_num, 1); 729 POWERPC_PERF_DECLARE(altivec_put_pixels8_xy2_num, 1);
729 register int i; 730 register int i;
730 register vector unsigned char pixelsv1, pixelsv2, pixelsavg; 731 register vector unsigned char pixelsv1, pixelsv2, pixelsavg;
731 register vector unsigned char blockv, temp1, temp2; 732 register vector unsigned char blockv, temp1, temp2;
732 register vector unsigned short pixelssum1, pixelssum2, temp3; 733 register vector unsigned short pixelssum1, pixelssum2, temp3;
733 register const vector unsigned char vczero = (const vector unsigned char)vec _splat_u8(0); 734 register const vector unsigned char vczero = (const vector unsigned char)vec _splat_u8(0);
734 register const vector unsigned short vctwo = (const vector unsigned short)ve c_splat_u16(2); 735 register const vector unsigned short vctwo = (const vector unsigned short)ve c_splat_u16(2);
735 736
736 temp1 = vec_ld(0, pixels); 737 temp1 = vec_ld(0, pixels);
(...skipping 42 matching lines...) Expand 10 before | Expand all | Expand 10 after
779 vec_st(blockv, 0, block); 780 vec_st(blockv, 0, block);
780 781
781 block += line_size; 782 block += line_size;
782 pixels += line_size; 783 pixels += line_size;
783 } 784 }
784 785
785 POWERPC_PERF_STOP_COUNT(altivec_put_pixels8_xy2_num, 1); 786 POWERPC_PERF_STOP_COUNT(altivec_put_pixels8_xy2_num, 1);
786 } 787 }
787 788
788 /* next one assumes that ((line_size % 8) == 0) */ 789 /* next one assumes that ((line_size % 8) == 0) */
789 void put_no_rnd_pixels8_xy2_altivec(uint8_t *block, const uint8_t *pixels, int l ine_size, int h) 790 static void put_no_rnd_pixels8_xy2_altivec(uint8_t *block, const uint8_t *pixels , int line_size, int h)
790 { 791 {
791 POWERPC_PERF_DECLARE(altivec_put_no_rnd_pixels8_xy2_num, 1); 792 POWERPC_PERF_DECLARE(altivec_put_no_rnd_pixels8_xy2_num, 1);
792 register int i; 793 register int i;
793 register vector unsigned char pixelsv1, pixelsv2, pixelsavg; 794 register vector unsigned char pixelsv1, pixelsv2, pixelsavg;
794 register vector unsigned char blockv, temp1, temp2; 795 register vector unsigned char blockv, temp1, temp2;
795 register vector unsigned short pixelssum1, pixelssum2, temp3; 796 register vector unsigned short pixelssum1, pixelssum2, temp3;
796 register const vector unsigned char vczero = (const vector unsigned char)vec _splat_u8(0); 797 register const vector unsigned char vczero = (const vector unsigned char)vec _splat_u8(0);
797 register const vector unsigned short vcone = (const vector unsigned short)ve c_splat_u16(1); 798 register const vector unsigned short vcone = (const vector unsigned short)ve c_splat_u16(1);
798 register const vector unsigned short vctwo = (const vector unsigned short)ve c_splat_u16(2); 799 register const vector unsigned short vctwo = (const vector unsigned short)ve c_splat_u16(2);
799 800
(...skipping 43 matching lines...) Expand 10 before | Expand all | Expand 10 after
843 vec_st(blockv, 0, block); 844 vec_st(blockv, 0, block);
844 845
845 block += line_size; 846 block += line_size;
846 pixels += line_size; 847 pixels += line_size;
847 } 848 }
848 849
849 POWERPC_PERF_STOP_COUNT(altivec_put_no_rnd_pixels8_xy2_num, 1); 850 POWERPC_PERF_STOP_COUNT(altivec_put_no_rnd_pixels8_xy2_num, 1);
850 } 851 }
851 852
852 /* next one assumes that ((line_size % 16) == 0) */ 853 /* next one assumes that ((line_size % 16) == 0) */
853 void put_pixels16_xy2_altivec(uint8_t * block, const uint8_t * pixels, int line_ size, int h) 854 static void put_pixels16_xy2_altivec(uint8_t * block, const uint8_t * pixels, in t line_size, int h)
854 { 855 {
855 POWERPC_PERF_DECLARE(altivec_put_pixels16_xy2_num, 1); 856 POWERPC_PERF_DECLARE(altivec_put_pixels16_xy2_num, 1);
856 register int i; 857 register int i;
857 register vector unsigned char pixelsv1, pixelsv2, pixelsv3, pixelsv4; 858 register vector unsigned char pixelsv1, pixelsv2, pixelsv3, pixelsv4;
858 register vector unsigned char blockv, temp1, temp2; 859 register vector unsigned char blockv, temp1, temp2;
859 register vector unsigned short temp3, temp4, 860 register vector unsigned short temp3, temp4,
860 pixelssum1, pixelssum2, pixelssum3, pixelssum4; 861 pixelssum1, pixelssum2, pixelssum3, pixelssum4;
861 register const vector unsigned char vczero = (const vector unsigned char)vec _splat_u8(0); 862 register const vector unsigned char vczero = (const vector unsigned char)vec _splat_u8(0);
862 register const vector unsigned short vctwo = (const vector unsigned short)ve c_splat_u16(2); 863 register const vector unsigned short vctwo = (const vector unsigned short)ve c_splat_u16(2);
863 864
(...skipping 52 matching lines...) Expand 10 before | Expand all | Expand 10 after
916 vec_st(blockv, 0, block); 917 vec_st(blockv, 0, block);
917 918
918 block += line_size; 919 block += line_size;
919 pixels += line_size; 920 pixels += line_size;
920 } 921 }
921 922
922 POWERPC_PERF_STOP_COUNT(altivec_put_pixels16_xy2_num, 1); 923 POWERPC_PERF_STOP_COUNT(altivec_put_pixels16_xy2_num, 1);
923 } 924 }
924 925
925 /* next one assumes that ((line_size % 16) == 0) */ 926 /* next one assumes that ((line_size % 16) == 0) */
926 void put_no_rnd_pixels16_xy2_altivec(uint8_t * block, const uint8_t * pixels, in t line_size, int h) 927 static void put_no_rnd_pixels16_xy2_altivec(uint8_t * block, const uint8_t * pix els, int line_size, int h)
927 { 928 {
928 POWERPC_PERF_DECLARE(altivec_put_no_rnd_pixels16_xy2_num, 1); 929 POWERPC_PERF_DECLARE(altivec_put_no_rnd_pixels16_xy2_num, 1);
929 register int i; 930 register int i;
930 register vector unsigned char pixelsv1, pixelsv2, pixelsv3, pixelsv4; 931 register vector unsigned char pixelsv1, pixelsv2, pixelsv3, pixelsv4;
931 register vector unsigned char blockv, temp1, temp2; 932 register vector unsigned char blockv, temp1, temp2;
932 register vector unsigned short temp3, temp4, 933 register vector unsigned short temp3, temp4,
933 pixelssum1, pixelssum2, pixelssum3, pixelssum4; 934 pixelssum1, pixelssum2, pixelssum3, pixelssum4;
934 register const vector unsigned char vczero = (const vector unsigned char)vec _splat_u8(0); 935 register const vector unsigned char vczero = (const vector unsigned char)vec _splat_u8(0);
935 register const vector unsigned short vcone = (const vector unsigned short)ve c_splat_u16(1); 936 register const vector unsigned short vcone = (const vector unsigned short)ve c_splat_u16(1);
936 register const vector unsigned short vctwo = (const vector unsigned short)ve c_splat_u16(2); 937 register const vector unsigned short vctwo = (const vector unsigned short)ve c_splat_u16(2);
(...skipping 52 matching lines...) Expand 10 before | Expand all | Expand 10 after
989 990
990 vec_st(blockv, 0, block); 991 vec_st(blockv, 0, block);
991 992
992 block += line_size; 993 block += line_size;
993 pixels += line_size; 994 pixels += line_size;
994 } 995 }
995 996
996 POWERPC_PERF_STOP_COUNT(altivec_put_no_rnd_pixels16_xy2_num, 1); 997 POWERPC_PERF_STOP_COUNT(altivec_put_no_rnd_pixels16_xy2_num, 1);
997 } 998 }
998 999
999 int hadamard8_diff8x8_altivec(/*MpegEncContext*/ void *s, uint8_t *dst, uint8_t *src, int stride, int h){ 1000 static int hadamard8_diff8x8_altivec(/*MpegEncContext*/ void *s, uint8_t *dst, u int8_t *src, int stride, int h){
1000 POWERPC_PERF_DECLARE(altivec_hadamard8_diff8x8_num, 1); 1001 POWERPC_PERF_DECLARE(altivec_hadamard8_diff8x8_num, 1);
1001 int sum; 1002 int sum;
1002 register const vector unsigned char vzero = 1003 register const vector unsigned char vzero =
1003 (const vector unsigned char)vec_splat_u8(0); 1004 (const vector unsigned char)vec_splat_u8(0);
1004 register vector signed short temp0, temp1, temp2, temp3, temp4, 1005 register vector signed short temp0, temp1, temp2, temp3, temp4,
1005 temp5, temp6, temp7; 1006 temp5, temp6, temp7;
1006 POWERPC_PERF_START_COUNT(altivec_hadamard8_diff8x8_num, 1); 1007 POWERPC_PERF_START_COUNT(altivec_hadamard8_diff8x8_num, 1);
1007 { 1008 {
1008 register const vector signed short vprod1 =(const vector signed short) 1009 register const vector signed short vprod1 =(const vector signed short)
1009 { 1,-1, 1,-1, 1,-1, 1,-1 }; 1010 { 1,-1, 1,-1, 1,-1, 1,-1 };
(...skipping 300 matching lines...) Expand 10 before | Expand all | Expand 10 after
1310 vsum = vec_sum4s(vec_abs(line5CS), vsum); 1311 vsum = vec_sum4s(vec_abs(line5CS), vsum);
1311 vsum = vec_sum4s(vec_abs(line6CS), vsum); 1312 vsum = vec_sum4s(vec_abs(line6CS), vsum);
1312 vsum = vec_sum4s(vec_abs(line7CS), vsum); 1313 vsum = vec_sum4s(vec_abs(line7CS), vsum);
1313 vsum = vec_sums(vsum, (vector signed int)vzero); 1314 vsum = vec_sums(vsum, (vector signed int)vzero);
1314 vsum = vec_splat(vsum, 3); 1315 vsum = vec_splat(vsum, 3);
1315 vec_ste(vsum, 0, &sum); 1316 vec_ste(vsum, 0, &sum);
1316 } 1317 }
1317 return sum; 1318 return sum;
1318 } 1319 }
1319 1320
1320 int hadamard8_diff16_altivec(/*MpegEncContext*/ void *s, uint8_t *dst, uint8_t * src, int stride, int h){ 1321 static int hadamard8_diff16_altivec(/*MpegEncContext*/ void *s, uint8_t *dst, ui nt8_t *src, int stride, int h){
1321 POWERPC_PERF_DECLARE(altivec_hadamard8_diff16_num, 1); 1322 POWERPC_PERF_DECLARE(altivec_hadamard8_diff16_num, 1);
1322 int score; 1323 int score;
1323 POWERPC_PERF_START_COUNT(altivec_hadamard8_diff16_num, 1); 1324 POWERPC_PERF_START_COUNT(altivec_hadamard8_diff16_num, 1);
1324 score = hadamard8_diff16x8_altivec(s, dst, src, stride, 8); 1325 score = hadamard8_diff16x8_altivec(s, dst, src, stride, 8);
1325 if (h==16) { 1326 if (h==16) {
1326 dst += 8*stride; 1327 dst += 8*stride;
1327 src += 8*stride; 1328 src += 8*stride;
1328 score += hadamard8_diff16x8_altivec(s, dst, src, stride, 8); 1329 score += hadamard8_diff16x8_altivec(s, dst, src, stride, 8);
1329 } 1330 }
1330 POWERPC_PERF_STOP_COUNT(altivec_hadamard8_diff16_num, 1); 1331 POWERPC_PERF_STOP_COUNT(altivec_hadamard8_diff16_num, 1);
(...skipping 17 matching lines...) Expand all
1348 t0 = (vector bool int)vec_and(a, t1); 1349 t0 = (vector bool int)vec_and(a, t1);
1349 t1 = (vector bool int)vec_andc(a, t1); 1350 t1 = (vector bool int)vec_andc(a, t1);
1350 a = vec_sub(m, (vector float)t1); 1351 a = vec_sub(m, (vector float)t1);
1351 m = vec_add(m, (vector float)t0); 1352 m = vec_add(m, (vector float)t0);
1352 vec_stl(a, 0, ang+i); 1353 vec_stl(a, 0, ang+i);
1353 vec_stl(m, 0, mag+i); 1354 vec_stl(m, 0, mag+i);
1354 } 1355 }
1355 } 1356 }
1356 1357
1357 /* next one assumes that ((line_size % 8) == 0) */ 1358 /* next one assumes that ((line_size % 8) == 0) */
1358 void avg_pixels8_xy2_altivec(uint8_t *block, const uint8_t *pixels, int line_siz e, int h) 1359 static void avg_pixels8_xy2_altivec(uint8_t *block, const uint8_t *pixels, int l ine_size, int h)
1359 { 1360 {
1360 POWERPC_PERF_DECLARE(altivec_avg_pixels8_xy2_num, 1); 1361 POWERPC_PERF_DECLARE(altivec_avg_pixels8_xy2_num, 1);
1361 register int i; 1362 register int i;
1362 register vector unsigned char pixelsv1, pixelsv2, pixelsavg; 1363 register vector unsigned char pixelsv1, pixelsv2, pixelsavg;
1363 register vector unsigned char blockv, temp1, temp2, blocktemp; 1364 register vector unsigned char blockv, temp1, temp2, blocktemp;
1364 register vector unsigned short pixelssum1, pixelssum2, temp3; 1365 register vector unsigned short pixelssum1, pixelssum2, temp3;
1365 1366
1366 register const vector unsigned char vczero = (const vector unsigned char) 1367 register const vector unsigned char vczero = (const vector unsigned char)
1367 vec_splat_u8(0); 1368 vec_splat_u8(0);
1368 register const vector unsigned short vctwo = (const vector unsigned short) 1369 register const vector unsigned short vctwo = (const vector unsigned short)
(...skipping 78 matching lines...) Expand 10 before | Expand all | Expand 10 after
1447 c->put_pixels_tab[1][3] = put_pixels8_xy2_altivec; 1448 c->put_pixels_tab[1][3] = put_pixels8_xy2_altivec;
1448 c->put_no_rnd_pixels_tab[1][3] = put_no_rnd_pixels8_xy2_altivec; 1449 c->put_no_rnd_pixels_tab[1][3] = put_no_rnd_pixels8_xy2_altivec;
1449 c->put_pixels_tab[0][3] = put_pixels16_xy2_altivec; 1450 c->put_pixels_tab[0][3] = put_pixels16_xy2_altivec;
1450 c->put_no_rnd_pixels_tab[0][3] = put_no_rnd_pixels16_xy2_altivec; 1451 c->put_no_rnd_pixels_tab[0][3] = put_no_rnd_pixels16_xy2_altivec;
1451 1452
1452 c->hadamard8_diff[0] = hadamard8_diff16_altivec; 1453 c->hadamard8_diff[0] = hadamard8_diff16_altivec;
1453 c->hadamard8_diff[1] = hadamard8_diff8x8_altivec; 1454 c->hadamard8_diff[1] = hadamard8_diff8x8_altivec;
1454 if (CONFIG_VORBIS_DECODER) 1455 if (CONFIG_VORBIS_DECODER)
1455 c->vorbis_inverse_coupling = vorbis_inverse_coupling_altivec; 1456 c->vorbis_inverse_coupling = vorbis_inverse_coupling_altivec;
1456 } 1457 }
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698