OLD | NEW |
1 /* | 1 /* |
2 * Copyright (c) 2002 Brian Foley | 2 * Copyright (c) 2002 Brian Foley |
3 * Copyright (c) 2002 Dieter Shirley | 3 * Copyright (c) 2002 Dieter Shirley |
4 * Copyright (c) 2003-2004 Romain Dolbeau <romain@dolbeau.org> | 4 * Copyright (c) 2003-2004 Romain Dolbeau <romain@dolbeau.org> |
5 * | 5 * |
6 * This file is part of FFmpeg. | 6 * This file is part of FFmpeg. |
7 * | 7 * |
8 * FFmpeg is free software; you can redistribute it and/or | 8 * FFmpeg is free software; you can redistribute it and/or |
9 * modify it under the terms of the GNU Lesser General Public | 9 * modify it under the terms of the GNU Lesser General Public |
10 * License as published by the Free Software Foundation; either | 10 * License as published by the Free Software Foundation; either |
(...skipping 10 matching lines...) Expand all Loading... |
21 */ | 21 */ |
22 | 22 |
23 #include "config.h" | 23 #include "config.h" |
24 #if HAVE_ALTIVEC_H | 24 #if HAVE_ALTIVEC_H |
25 #include <altivec.h> | 25 #include <altivec.h> |
26 #endif | 26 #endif |
27 #include "libavcodec/dsputil.h" | 27 #include "libavcodec/dsputil.h" |
28 #include "dsputil_ppc.h" | 28 #include "dsputil_ppc.h" |
29 #include "util_altivec.h" | 29 #include "util_altivec.h" |
30 #include "types_altivec.h" | 30 #include "types_altivec.h" |
| 31 #include "dsputil_altivec.h" |
31 | 32 |
32 int sad16_x2_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h
) | 33 static int sad16_x2_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size
, int h) |
33 { | 34 { |
34 int i; | 35 int i; |
35 int s; | 36 int s; |
36 const vector unsigned char zero = (const vector unsigned char)vec_splat_u8(0
); | 37 const vector unsigned char zero = (const vector unsigned char)vec_splat_u8(0
); |
37 vector unsigned char *tv; | 38 vector unsigned char *tv; |
38 vector unsigned char pix1v, pix2v, pix2iv, avgv, t5; | 39 vector unsigned char pix1v, pix2v, pix2iv, avgv, t5; |
39 vector unsigned int sad; | 40 vector unsigned int sad; |
40 vector signed int sumdiffs; | 41 vector signed int sumdiffs; |
41 | 42 |
42 s = 0; | 43 s = 0; |
(...skipping 24 matching lines...) Expand all Loading... |
67 pix2 += line_size; | 68 pix2 += line_size; |
68 } | 69 } |
69 /* Sum up the four partial sums, and put the result into s */ | 70 /* Sum up the four partial sums, and put the result into s */ |
70 sumdiffs = vec_sums((vector signed int) sad, (vector signed int) zero); | 71 sumdiffs = vec_sums((vector signed int) sad, (vector signed int) zero); |
71 sumdiffs = vec_splat(sumdiffs, 3); | 72 sumdiffs = vec_splat(sumdiffs, 3); |
72 vec_ste(sumdiffs, 0, &s); | 73 vec_ste(sumdiffs, 0, &s); |
73 | 74 |
74 return s; | 75 return s; |
75 } | 76 } |
76 | 77 |
77 int sad16_y2_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h
) | 78 static int sad16_y2_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size
, int h) |
78 { | 79 { |
79 int i; | 80 int i; |
80 int s; | 81 int s; |
81 const vector unsigned char zero = (const vector unsigned char)vec_splat_u8(0
); | 82 const vector unsigned char zero = (const vector unsigned char)vec_splat_u8(0
); |
82 vector unsigned char *tv; | 83 vector unsigned char *tv; |
83 vector unsigned char pix1v, pix2v, pix3v, avgv, t5; | 84 vector unsigned char pix1v, pix2v, pix3v, avgv, t5; |
84 vector unsigned int sad; | 85 vector unsigned int sad; |
85 vector signed int sumdiffs; | 86 vector signed int sumdiffs; |
86 uint8_t *pix3 = pix2 + line_size; | 87 uint8_t *pix3 = pix2 + line_size; |
87 | 88 |
(...skipping 35 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
123 | 124 |
124 } | 125 } |
125 | 126 |
126 /* Sum up the four partial sums, and put the result into s */ | 127 /* Sum up the four partial sums, and put the result into s */ |
127 sumdiffs = vec_sums((vector signed int) sad, (vector signed int) zero); | 128 sumdiffs = vec_sums((vector signed int) sad, (vector signed int) zero); |
128 sumdiffs = vec_splat(sumdiffs, 3); | 129 sumdiffs = vec_splat(sumdiffs, 3); |
129 vec_ste(sumdiffs, 0, &s); | 130 vec_ste(sumdiffs, 0, &s); |
130 return s; | 131 return s; |
131 } | 132 } |
132 | 133 |
133 int sad16_xy2_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int
h) | 134 static int sad16_xy2_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_siz
e, int h) |
134 { | 135 { |
135 int i; | 136 int i; |
136 int s; | 137 int s; |
137 uint8_t *pix3 = pix2 + line_size; | 138 uint8_t *pix3 = pix2 + line_size; |
138 const vector unsigned char zero = (const vector unsigned char)vec_splat_u8(0
); | 139 const vector unsigned char zero = (const vector unsigned char)vec_splat_u8(0
); |
139 const vector unsigned short two = (const vector unsigned short)vec_splat_u16
(2); | 140 const vector unsigned short two = (const vector unsigned short)vec_splat_u16
(2); |
140 vector unsigned char *tv, avgv, t5; | 141 vector unsigned char *tv, avgv, t5; |
141 vector unsigned char pix1v, pix2v, pix3v, pix2iv, pix3iv; | 142 vector unsigned char pix1v, pix2v, pix3v, pix2iv, pix3iv; |
142 vector unsigned short pix2lv, pix2hv, pix2ilv, pix2ihv; | 143 vector unsigned short pix2lv, pix2hv, pix2ilv, pix2ihv; |
143 vector unsigned short pix3lv, pix3hv, pix3ilv, pix3ihv; | 144 vector unsigned short pix3lv, pix3hv, pix3ilv, pix3ihv; |
(...skipping 74 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
218 t2 = t4; | 219 t2 = t4; |
219 } | 220 } |
220 /* Sum up the four partial sums, and put the result into s */ | 221 /* Sum up the four partial sums, and put the result into s */ |
221 sumdiffs = vec_sums((vector signed int) sad, (vector signed int) zero); | 222 sumdiffs = vec_sums((vector signed int) sad, (vector signed int) zero); |
222 sumdiffs = vec_splat(sumdiffs, 3); | 223 sumdiffs = vec_splat(sumdiffs, 3); |
223 vec_ste(sumdiffs, 0, &s); | 224 vec_ste(sumdiffs, 0, &s); |
224 | 225 |
225 return s; | 226 return s; |
226 } | 227 } |
227 | 228 |
228 int sad16_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) | 229 static int sad16_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, i
nt h) |
229 { | 230 { |
230 int i; | 231 int i; |
231 int s; | 232 int s; |
232 const vector unsigned int zero = (const vector unsigned int)vec_splat_u32(0)
; | 233 const vector unsigned int zero = (const vector unsigned int)vec_splat_u32(0)
; |
233 vector unsigned char perm1, perm2, *pix1v, *pix2v; | 234 vector unsigned char perm1, perm2, *pix1v, *pix2v; |
234 vector unsigned char t1, t2, t3,t4, t5; | 235 vector unsigned char t1, t2, t3,t4, t5; |
235 vector unsigned int sad; | 236 vector unsigned int sad; |
236 vector signed int sumdiffs; | 237 vector signed int sumdiffs; |
237 | 238 |
238 sad = (vector unsigned int)vec_splat_u32(0); | 239 sad = (vector unsigned int)vec_splat_u32(0); |
(...skipping 21 matching lines...) Expand all Loading... |
260 } | 261 } |
261 | 262 |
262 /* Sum up the four partial sums, and put the result into s */ | 263 /* Sum up the four partial sums, and put the result into s */ |
263 sumdiffs = vec_sums((vector signed int) sad, (vector signed int) zero); | 264 sumdiffs = vec_sums((vector signed int) sad, (vector signed int) zero); |
264 sumdiffs = vec_splat(sumdiffs, 3); | 265 sumdiffs = vec_splat(sumdiffs, 3); |
265 vec_ste(sumdiffs, 0, &s); | 266 vec_ste(sumdiffs, 0, &s); |
266 | 267 |
267 return s; | 268 return s; |
268 } | 269 } |
269 | 270 |
270 int sad8_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) | 271 static int sad8_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, in
t h) |
271 { | 272 { |
272 int i; | 273 int i; |
273 int s; | 274 int s; |
274 const vector unsigned int zero = (const vector unsigned int)vec_splat_u32(0)
; | 275 const vector unsigned int zero = (const vector unsigned int)vec_splat_u32(0)
; |
275 vector unsigned char perm1, perm2, permclear, *pix1v, *pix2v; | 276 vector unsigned char perm1, perm2, permclear, *pix1v, *pix2v; |
276 vector unsigned char t1, t2, t3,t4, t5; | 277 vector unsigned char t1, t2, t3,t4, t5; |
277 vector unsigned int sad; | 278 vector unsigned int sad; |
278 vector signed int sumdiffs; | 279 vector signed int sumdiffs; |
279 | 280 |
280 sad = (vector unsigned int)vec_splat_u32(0); | 281 sad = (vector unsigned int)vec_splat_u32(0); |
(...skipping 24 matching lines...) Expand all Loading... |
305 } | 306 } |
306 | 307 |
307 /* Sum up the four partial sums, and put the result into s */ | 308 /* Sum up the four partial sums, and put the result into s */ |
308 sumdiffs = vec_sums((vector signed int) sad, (vector signed int) zero); | 309 sumdiffs = vec_sums((vector signed int) sad, (vector signed int) zero); |
309 sumdiffs = vec_splat(sumdiffs, 3); | 310 sumdiffs = vec_splat(sumdiffs, 3); |
310 vec_ste(sumdiffs, 0, &s); | 311 vec_ste(sumdiffs, 0, &s); |
311 | 312 |
312 return s; | 313 return s; |
313 } | 314 } |
314 | 315 |
315 int pix_norm1_altivec(uint8_t *pix, int line_size) | 316 static int pix_norm1_altivec(uint8_t *pix, int line_size) |
316 { | 317 { |
317 int i; | 318 int i; |
318 int s; | 319 int s; |
319 const vector unsigned int zero = (const vector unsigned int)vec_splat_u32(0)
; | 320 const vector unsigned int zero = (const vector unsigned int)vec_splat_u32(0)
; |
320 vector unsigned char *tv; | 321 vector unsigned char *tv; |
321 vector unsigned char pixv; | 322 vector unsigned char pixv; |
322 vector unsigned int sv; | 323 vector unsigned int sv; |
323 vector signed int sum; | 324 vector signed int sum; |
324 | 325 |
325 sv = (vector unsigned int)vec_splat_u32(0); | 326 sv = (vector unsigned int)vec_splat_u32(0); |
(...skipping 15 matching lines...) Expand all Loading... |
341 vec_ste(sum, 0, &s); | 342 vec_ste(sum, 0, &s); |
342 | 343 |
343 return s; | 344 return s; |
344 } | 345 } |
345 | 346 |
346 /** | 347 /** |
347 * Sum of Squared Errors for a 8x8 block. | 348 * Sum of Squared Errors for a 8x8 block. |
348 * AltiVec-enhanced. | 349 * AltiVec-enhanced. |
349 * It's the sad8_altivec code above w/ squaring added. | 350 * It's the sad8_altivec code above w/ squaring added. |
350 */ | 351 */ |
351 int sse8_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) | 352 static int sse8_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, in
t h) |
352 { | 353 { |
353 int i; | 354 int i; |
354 int s; | 355 int s; |
355 const vector unsigned int zero = (const vector unsigned int)vec_splat_u32(0)
; | 356 const vector unsigned int zero = (const vector unsigned int)vec_splat_u32(0)
; |
356 vector unsigned char perm1, perm2, permclear, *pix1v, *pix2v; | 357 vector unsigned char perm1, perm2, permclear, *pix1v, *pix2v; |
357 vector unsigned char t1, t2, t3,t4, t5; | 358 vector unsigned char t1, t2, t3,t4, t5; |
358 vector unsigned int sum; | 359 vector unsigned int sum; |
359 vector signed int sumsqr; | 360 vector signed int sumsqr; |
360 | 361 |
361 sum = (vector unsigned int)vec_splat_u32(0); | 362 sum = (vector unsigned int)vec_splat_u32(0); |
(...skipping 33 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
395 vec_ste(sumsqr, 0, &s); | 396 vec_ste(sumsqr, 0, &s); |
396 | 397 |
397 return s; | 398 return s; |
398 } | 399 } |
399 | 400 |
400 /** | 401 /** |
401 * Sum of Squared Errors for a 16x16 block. | 402 * Sum of Squared Errors for a 16x16 block. |
402 * AltiVec-enhanced. | 403 * AltiVec-enhanced. |
403 * It's the sad16_altivec code above w/ squaring added. | 404 * It's the sad16_altivec code above w/ squaring added. |
404 */ | 405 */ |
405 int sse16_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) | 406 static int sse16_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, i
nt h) |
406 { | 407 { |
407 int i; | 408 int i; |
408 int s; | 409 int s; |
409 const vector unsigned int zero = (const vector unsigned int)vec_splat_u32(0)
; | 410 const vector unsigned int zero = (const vector unsigned int)vec_splat_u32(0)
; |
410 vector unsigned char perm1, perm2, *pix1v, *pix2v; | 411 vector unsigned char perm1, perm2, *pix1v, *pix2v; |
411 vector unsigned char t1, t2, t3,t4, t5; | 412 vector unsigned char t1, t2, t3,t4, t5; |
412 vector unsigned int sum; | 413 vector unsigned int sum; |
413 vector signed int sumsqr; | 414 vector signed int sumsqr; |
414 | 415 |
415 sum = (vector unsigned int)vec_splat_u32(0); | 416 sum = (vector unsigned int)vec_splat_u32(0); |
(...skipping 23 matching lines...) Expand all Loading... |
439 } | 440 } |
440 | 441 |
441 /* Sum up the four partial sums, and put the result into s */ | 442 /* Sum up the four partial sums, and put the result into s */ |
442 sumsqr = vec_sums((vector signed int) sum, (vector signed int) zero); | 443 sumsqr = vec_sums((vector signed int) sum, (vector signed int) zero); |
443 sumsqr = vec_splat(sumsqr, 3); | 444 sumsqr = vec_splat(sumsqr, 3); |
444 vec_ste(sumsqr, 0, &s); | 445 vec_ste(sumsqr, 0, &s); |
445 | 446 |
446 return s; | 447 return s; |
447 } | 448 } |
448 | 449 |
449 int pix_sum_altivec(uint8_t * pix, int line_size) | 450 static int pix_sum_altivec(uint8_t * pix, int line_size) |
450 { | 451 { |
451 const vector unsigned int zero = (const vector unsigned int)vec_splat_u32(0)
; | 452 const vector unsigned int zero = (const vector unsigned int)vec_splat_u32(0)
; |
452 vector unsigned char perm, *pixv; | 453 vector unsigned char perm, *pixv; |
453 vector unsigned char t1; | 454 vector unsigned char t1; |
454 vector unsigned int sad; | 455 vector unsigned int sad; |
455 vector signed int sumdiffs; | 456 vector signed int sumdiffs; |
456 | 457 |
457 int i; | 458 int i; |
458 int s; | 459 int s; |
459 | 460 |
(...skipping 12 matching lines...) Expand all Loading... |
472 } | 473 } |
473 | 474 |
474 /* Sum up the four partial sums, and put the result into s */ | 475 /* Sum up the four partial sums, and put the result into s */ |
475 sumdiffs = vec_sums((vector signed int) sad, (vector signed int) zero); | 476 sumdiffs = vec_sums((vector signed int) sad, (vector signed int) zero); |
476 sumdiffs = vec_splat(sumdiffs, 3); | 477 sumdiffs = vec_splat(sumdiffs, 3); |
477 vec_ste(sumdiffs, 0, &s); | 478 vec_ste(sumdiffs, 0, &s); |
478 | 479 |
479 return s; | 480 return s; |
480 } | 481 } |
481 | 482 |
482 void get_pixels_altivec(DCTELEM *restrict block, const uint8_t *pixels, int line
_size) | 483 static void get_pixels_altivec(DCTELEM *restrict block, const uint8_t *pixels, i
nt line_size) |
483 { | 484 { |
484 int i; | 485 int i; |
485 vector unsigned char perm, bytes, *pixv; | 486 vector unsigned char perm, bytes, *pixv; |
486 const vector unsigned char zero = (const vector unsigned char)vec_splat_u8(0
); | 487 const vector unsigned char zero = (const vector unsigned char)vec_splat_u8(0
); |
487 vector signed short shorts; | 488 vector signed short shorts; |
488 | 489 |
489 for (i = 0; i < 8; i++) { | 490 for (i = 0; i < 8; i++) { |
490 // Read potentially unaligned pixels. | 491 // Read potentially unaligned pixels. |
491 // We're reading 16 pixels, and actually only want 8, | 492 // We're reading 16 pixels, and actually only want 8, |
492 // but we simply ignore the extras. | 493 // but we simply ignore the extras. |
493 perm = vec_lvsl(0, pixels); | 494 perm = vec_lvsl(0, pixels); |
494 pixv = (vector unsigned char *) pixels; | 495 pixv = (vector unsigned char *) pixels; |
495 bytes = vec_perm(pixv[0], pixv[1], perm); | 496 bytes = vec_perm(pixv[0], pixv[1], perm); |
496 | 497 |
497 // convert the bytes into shorts | 498 // convert the bytes into shorts |
498 shorts = (vector signed short)vec_mergeh(zero, bytes); | 499 shorts = (vector signed short)vec_mergeh(zero, bytes); |
499 | 500 |
500 // save the data to the block, we assume the block is 16-byte aligned | 501 // save the data to the block, we assume the block is 16-byte aligned |
501 vec_st(shorts, i*16, (vector signed short*)block); | 502 vec_st(shorts, i*16, (vector signed short*)block); |
502 | 503 |
503 pixels += line_size; | 504 pixels += line_size; |
504 } | 505 } |
505 } | 506 } |
506 | 507 |
507 void diff_pixels_altivec(DCTELEM *restrict block, const uint8_t *s1, | 508 static void diff_pixels_altivec(DCTELEM *restrict block, const uint8_t *s1, |
508 const uint8_t *s2, int stride) | 509 const uint8_t *s2, int stride) |
509 { | 510 { |
510 int i; | 511 int i; |
511 vector unsigned char perm, bytes, *pixv; | 512 vector unsigned char perm, bytes, *pixv; |
512 const vector unsigned char zero = (const vector unsigned char)vec_splat_u8(0
); | 513 const vector unsigned char zero = (const vector unsigned char)vec_splat_u8(0
); |
513 vector signed short shorts1, shorts2; | 514 vector signed short shorts1, shorts2; |
514 | 515 |
515 for (i = 0; i < 4; i++) { | 516 for (i = 0; i < 4; i++) { |
516 // Read potentially unaligned pixels | 517 // Read potentially unaligned pixels |
517 // We're reading 16 pixels, and actually only want 8, | 518 // We're reading 16 pixels, and actually only want 8, |
(...skipping 64 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
582 vec_st(zero_s16v, 16, block); | 583 vec_st(zero_s16v, 16, block); |
583 vec_st(zero_s16v, 32, block); | 584 vec_st(zero_s16v, 32, block); |
584 vec_st(zero_s16v, 48, block); | 585 vec_st(zero_s16v, 48, block); |
585 vec_st(zero_s16v, 64, block); | 586 vec_st(zero_s16v, 64, block); |
586 vec_st(zero_s16v, 80, block); | 587 vec_st(zero_s16v, 80, block); |
587 vec_st(zero_s16v, 96, block); | 588 vec_st(zero_s16v, 96, block); |
588 vec_st(zero_s16v, 112, block); | 589 vec_st(zero_s16v, 112, block); |
589 } | 590 } |
590 | 591 |
591 | 592 |
592 void add_bytes_altivec(uint8_t *dst, uint8_t *src, int w) { | 593 static void add_bytes_altivec(uint8_t *dst, uint8_t *src, int w) { |
593 register int i; | 594 register int i; |
594 register vector unsigned char vdst, vsrc; | 595 register vector unsigned char vdst, vsrc; |
595 | 596 |
596 /* dst and src are 16 bytes-aligned (guaranteed) */ | 597 /* dst and src are 16 bytes-aligned (guaranteed) */ |
597 for (i = 0 ; (i + 15) < w ; i+=16) { | 598 for (i = 0 ; (i + 15) < w ; i+=16) { |
598 vdst = vec_ld(i, (unsigned char*)dst); | 599 vdst = vec_ld(i, (unsigned char*)dst); |
599 vsrc = vec_ld(i, (unsigned char*)src); | 600 vsrc = vec_ld(i, (unsigned char*)src); |
600 vdst = vec_add(vsrc, vdst); | 601 vdst = vec_add(vsrc, vdst); |
601 vec_st(vdst, i, (unsigned char*)dst); | 602 vec_st(vdst, i, (unsigned char*)dst); |
602 } | 603 } |
(...skipping 77 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
680 blockv = vec_avg(blockv,pixelsv); | 681 blockv = vec_avg(blockv,pixelsv); |
681 vec_st(blockv, 0, (unsigned char*)block); | 682 vec_st(blockv, 0, (unsigned char*)block); |
682 pixels+=line_size; | 683 pixels+=line_size; |
683 block +=line_size; | 684 block +=line_size; |
684 } | 685 } |
685 | 686 |
686 POWERPC_PERF_STOP_COUNT(altivec_avg_pixels16_num, 1); | 687 POWERPC_PERF_STOP_COUNT(altivec_avg_pixels16_num, 1); |
687 } | 688 } |
688 | 689 |
689 /* next one assumes that ((line_size % 8) == 0) */ | 690 /* next one assumes that ((line_size % 8) == 0) */ |
690 void avg_pixels8_altivec(uint8_t * block, const uint8_t * pixels, int line_size,
int h) | 691 static void avg_pixels8_altivec(uint8_t * block, const uint8_t * pixels, int lin
e_size, int h) |
691 { | 692 { |
692 POWERPC_PERF_DECLARE(altivec_avg_pixels8_num, 1); | 693 POWERPC_PERF_DECLARE(altivec_avg_pixels8_num, 1); |
693 register vector unsigned char pixelsv1, pixelsv2, pixelsv, blockv; | 694 register vector unsigned char pixelsv1, pixelsv2, pixelsv, blockv; |
694 int i; | 695 int i; |
695 | 696 |
696 POWERPC_PERF_START_COUNT(altivec_avg_pixels8_num, 1); | 697 POWERPC_PERF_START_COUNT(altivec_avg_pixels8_num, 1); |
697 | 698 |
698 for (i = 0; i < h; i++) { | 699 for (i = 0; i < h; i++) { |
699 /* block is 8 bytes-aligned, so we're either in the | 700 /* block is 8 bytes-aligned, so we're either in the |
700 left block (16 bytes-aligned) or in the right block (not) */ | 701 left block (16 bytes-aligned) or in the right block (not) */ |
(...skipping 15 matching lines...) Expand all Loading... |
716 vec_st(blockv, 0, block); | 717 vec_st(blockv, 0, block); |
717 | 718 |
718 pixels += line_size; | 719 pixels += line_size; |
719 block += line_size; | 720 block += line_size; |
720 } | 721 } |
721 | 722 |
722 POWERPC_PERF_STOP_COUNT(altivec_avg_pixels8_num, 1); | 723 POWERPC_PERF_STOP_COUNT(altivec_avg_pixels8_num, 1); |
723 } | 724 } |
724 | 725 |
725 /* next one assumes that ((line_size % 8) == 0) */ | 726 /* next one assumes that ((line_size % 8) == 0) */ |
726 void put_pixels8_xy2_altivec(uint8_t *block, const uint8_t *pixels, int line_siz
e, int h) | 727 static void put_pixels8_xy2_altivec(uint8_t *block, const uint8_t *pixels, int l
ine_size, int h) |
727 { | 728 { |
728 POWERPC_PERF_DECLARE(altivec_put_pixels8_xy2_num, 1); | 729 POWERPC_PERF_DECLARE(altivec_put_pixels8_xy2_num, 1); |
729 register int i; | 730 register int i; |
730 register vector unsigned char pixelsv1, pixelsv2, pixelsavg; | 731 register vector unsigned char pixelsv1, pixelsv2, pixelsavg; |
731 register vector unsigned char blockv, temp1, temp2; | 732 register vector unsigned char blockv, temp1, temp2; |
732 register vector unsigned short pixelssum1, pixelssum2, temp3; | 733 register vector unsigned short pixelssum1, pixelssum2, temp3; |
733 register const vector unsigned char vczero = (const vector unsigned char)vec
_splat_u8(0); | 734 register const vector unsigned char vczero = (const vector unsigned char)vec
_splat_u8(0); |
734 register const vector unsigned short vctwo = (const vector unsigned short)ve
c_splat_u16(2); | 735 register const vector unsigned short vctwo = (const vector unsigned short)ve
c_splat_u16(2); |
735 | 736 |
736 temp1 = vec_ld(0, pixels); | 737 temp1 = vec_ld(0, pixels); |
(...skipping 42 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
779 vec_st(blockv, 0, block); | 780 vec_st(blockv, 0, block); |
780 | 781 |
781 block += line_size; | 782 block += line_size; |
782 pixels += line_size; | 783 pixels += line_size; |
783 } | 784 } |
784 | 785 |
785 POWERPC_PERF_STOP_COUNT(altivec_put_pixels8_xy2_num, 1); | 786 POWERPC_PERF_STOP_COUNT(altivec_put_pixels8_xy2_num, 1); |
786 } | 787 } |
787 | 788 |
788 /* next one assumes that ((line_size % 8) == 0) */ | 789 /* next one assumes that ((line_size % 8) == 0) */ |
789 void put_no_rnd_pixels8_xy2_altivec(uint8_t *block, const uint8_t *pixels, int l
ine_size, int h) | 790 static void put_no_rnd_pixels8_xy2_altivec(uint8_t *block, const uint8_t *pixels
, int line_size, int h) |
790 { | 791 { |
791 POWERPC_PERF_DECLARE(altivec_put_no_rnd_pixels8_xy2_num, 1); | 792 POWERPC_PERF_DECLARE(altivec_put_no_rnd_pixels8_xy2_num, 1); |
792 register int i; | 793 register int i; |
793 register vector unsigned char pixelsv1, pixelsv2, pixelsavg; | 794 register vector unsigned char pixelsv1, pixelsv2, pixelsavg; |
794 register vector unsigned char blockv, temp1, temp2; | 795 register vector unsigned char blockv, temp1, temp2; |
795 register vector unsigned short pixelssum1, pixelssum2, temp3; | 796 register vector unsigned short pixelssum1, pixelssum2, temp3; |
796 register const vector unsigned char vczero = (const vector unsigned char)vec
_splat_u8(0); | 797 register const vector unsigned char vczero = (const vector unsigned char)vec
_splat_u8(0); |
797 register const vector unsigned short vcone = (const vector unsigned short)ve
c_splat_u16(1); | 798 register const vector unsigned short vcone = (const vector unsigned short)ve
c_splat_u16(1); |
798 register const vector unsigned short vctwo = (const vector unsigned short)ve
c_splat_u16(2); | 799 register const vector unsigned short vctwo = (const vector unsigned short)ve
c_splat_u16(2); |
799 | 800 |
(...skipping 43 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
843 vec_st(blockv, 0, block); | 844 vec_st(blockv, 0, block); |
844 | 845 |
845 block += line_size; | 846 block += line_size; |
846 pixels += line_size; | 847 pixels += line_size; |
847 } | 848 } |
848 | 849 |
849 POWERPC_PERF_STOP_COUNT(altivec_put_no_rnd_pixels8_xy2_num, 1); | 850 POWERPC_PERF_STOP_COUNT(altivec_put_no_rnd_pixels8_xy2_num, 1); |
850 } | 851 } |
851 | 852 |
852 /* next one assumes that ((line_size % 16) == 0) */ | 853 /* next one assumes that ((line_size % 16) == 0) */ |
853 void put_pixels16_xy2_altivec(uint8_t * block, const uint8_t * pixels, int line_
size, int h) | 854 static void put_pixels16_xy2_altivec(uint8_t * block, const uint8_t * pixels, in
t line_size, int h) |
854 { | 855 { |
855 POWERPC_PERF_DECLARE(altivec_put_pixels16_xy2_num, 1); | 856 POWERPC_PERF_DECLARE(altivec_put_pixels16_xy2_num, 1); |
856 register int i; | 857 register int i; |
857 register vector unsigned char pixelsv1, pixelsv2, pixelsv3, pixelsv4; | 858 register vector unsigned char pixelsv1, pixelsv2, pixelsv3, pixelsv4; |
858 register vector unsigned char blockv, temp1, temp2; | 859 register vector unsigned char blockv, temp1, temp2; |
859 register vector unsigned short temp3, temp4, | 860 register vector unsigned short temp3, temp4, |
860 pixelssum1, pixelssum2, pixelssum3, pixelssum4; | 861 pixelssum1, pixelssum2, pixelssum3, pixelssum4; |
861 register const vector unsigned char vczero = (const vector unsigned char)vec
_splat_u8(0); | 862 register const vector unsigned char vczero = (const vector unsigned char)vec
_splat_u8(0); |
862 register const vector unsigned short vctwo = (const vector unsigned short)ve
c_splat_u16(2); | 863 register const vector unsigned short vctwo = (const vector unsigned short)ve
c_splat_u16(2); |
863 | 864 |
(...skipping 52 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
916 vec_st(blockv, 0, block); | 917 vec_st(blockv, 0, block); |
917 | 918 |
918 block += line_size; | 919 block += line_size; |
919 pixels += line_size; | 920 pixels += line_size; |
920 } | 921 } |
921 | 922 |
922 POWERPC_PERF_STOP_COUNT(altivec_put_pixels16_xy2_num, 1); | 923 POWERPC_PERF_STOP_COUNT(altivec_put_pixels16_xy2_num, 1); |
923 } | 924 } |
924 | 925 |
925 /* next one assumes that ((line_size % 16) == 0) */ | 926 /* next one assumes that ((line_size % 16) == 0) */ |
926 void put_no_rnd_pixels16_xy2_altivec(uint8_t * block, const uint8_t * pixels, in
t line_size, int h) | 927 static void put_no_rnd_pixels16_xy2_altivec(uint8_t * block, const uint8_t * pix
els, int line_size, int h) |
927 { | 928 { |
928 POWERPC_PERF_DECLARE(altivec_put_no_rnd_pixels16_xy2_num, 1); | 929 POWERPC_PERF_DECLARE(altivec_put_no_rnd_pixels16_xy2_num, 1); |
929 register int i; | 930 register int i; |
930 register vector unsigned char pixelsv1, pixelsv2, pixelsv3, pixelsv4; | 931 register vector unsigned char pixelsv1, pixelsv2, pixelsv3, pixelsv4; |
931 register vector unsigned char blockv, temp1, temp2; | 932 register vector unsigned char blockv, temp1, temp2; |
932 register vector unsigned short temp3, temp4, | 933 register vector unsigned short temp3, temp4, |
933 pixelssum1, pixelssum2, pixelssum3, pixelssum4; | 934 pixelssum1, pixelssum2, pixelssum3, pixelssum4; |
934 register const vector unsigned char vczero = (const vector unsigned char)vec
_splat_u8(0); | 935 register const vector unsigned char vczero = (const vector unsigned char)vec
_splat_u8(0); |
935 register const vector unsigned short vcone = (const vector unsigned short)ve
c_splat_u16(1); | 936 register const vector unsigned short vcone = (const vector unsigned short)ve
c_splat_u16(1); |
936 register const vector unsigned short vctwo = (const vector unsigned short)ve
c_splat_u16(2); | 937 register const vector unsigned short vctwo = (const vector unsigned short)ve
c_splat_u16(2); |
(...skipping 52 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
989 | 990 |
990 vec_st(blockv, 0, block); | 991 vec_st(blockv, 0, block); |
991 | 992 |
992 block += line_size; | 993 block += line_size; |
993 pixels += line_size; | 994 pixels += line_size; |
994 } | 995 } |
995 | 996 |
996 POWERPC_PERF_STOP_COUNT(altivec_put_no_rnd_pixels16_xy2_num, 1); | 997 POWERPC_PERF_STOP_COUNT(altivec_put_no_rnd_pixels16_xy2_num, 1); |
997 } | 998 } |
998 | 999 |
999 int hadamard8_diff8x8_altivec(/*MpegEncContext*/ void *s, uint8_t *dst, uint8_t
*src, int stride, int h){ | 1000 static int hadamard8_diff8x8_altivec(/*MpegEncContext*/ void *s, uint8_t *dst, u
int8_t *src, int stride, int h){ |
1000 POWERPC_PERF_DECLARE(altivec_hadamard8_diff8x8_num, 1); | 1001 POWERPC_PERF_DECLARE(altivec_hadamard8_diff8x8_num, 1); |
1001 int sum; | 1002 int sum; |
1002 register const vector unsigned char vzero = | 1003 register const vector unsigned char vzero = |
1003 (const vector unsigned char)vec_splat_u8(0); | 1004 (const vector unsigned char)vec_splat_u8(0); |
1004 register vector signed short temp0, temp1, temp2, temp3, temp4, | 1005 register vector signed short temp0, temp1, temp2, temp3, temp4, |
1005 temp5, temp6, temp7; | 1006 temp5, temp6, temp7; |
1006 POWERPC_PERF_START_COUNT(altivec_hadamard8_diff8x8_num, 1); | 1007 POWERPC_PERF_START_COUNT(altivec_hadamard8_diff8x8_num, 1); |
1007 { | 1008 { |
1008 register const vector signed short vprod1 =(const vector signed short) | 1009 register const vector signed short vprod1 =(const vector signed short) |
1009 { 1,-1, 1,-1, 1,-1, 1,-1 }; | 1010 { 1,-1, 1,-1, 1,-1, 1,-1 }; |
(...skipping 300 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1310 vsum = vec_sum4s(vec_abs(line5CS), vsum); | 1311 vsum = vec_sum4s(vec_abs(line5CS), vsum); |
1311 vsum = vec_sum4s(vec_abs(line6CS), vsum); | 1312 vsum = vec_sum4s(vec_abs(line6CS), vsum); |
1312 vsum = vec_sum4s(vec_abs(line7CS), vsum); | 1313 vsum = vec_sum4s(vec_abs(line7CS), vsum); |
1313 vsum = vec_sums(vsum, (vector signed int)vzero); | 1314 vsum = vec_sums(vsum, (vector signed int)vzero); |
1314 vsum = vec_splat(vsum, 3); | 1315 vsum = vec_splat(vsum, 3); |
1315 vec_ste(vsum, 0, &sum); | 1316 vec_ste(vsum, 0, &sum); |
1316 } | 1317 } |
1317 return sum; | 1318 return sum; |
1318 } | 1319 } |
1319 | 1320 |
1320 int hadamard8_diff16_altivec(/*MpegEncContext*/ void *s, uint8_t *dst, uint8_t *
src, int stride, int h){ | 1321 static int hadamard8_diff16_altivec(/*MpegEncContext*/ void *s, uint8_t *dst, ui
nt8_t *src, int stride, int h){ |
1321 POWERPC_PERF_DECLARE(altivec_hadamard8_diff16_num, 1); | 1322 POWERPC_PERF_DECLARE(altivec_hadamard8_diff16_num, 1); |
1322 int score; | 1323 int score; |
1323 POWERPC_PERF_START_COUNT(altivec_hadamard8_diff16_num, 1); | 1324 POWERPC_PERF_START_COUNT(altivec_hadamard8_diff16_num, 1); |
1324 score = hadamard8_diff16x8_altivec(s, dst, src, stride, 8); | 1325 score = hadamard8_diff16x8_altivec(s, dst, src, stride, 8); |
1325 if (h==16) { | 1326 if (h==16) { |
1326 dst += 8*stride; | 1327 dst += 8*stride; |
1327 src += 8*stride; | 1328 src += 8*stride; |
1328 score += hadamard8_diff16x8_altivec(s, dst, src, stride, 8); | 1329 score += hadamard8_diff16x8_altivec(s, dst, src, stride, 8); |
1329 } | 1330 } |
1330 POWERPC_PERF_STOP_COUNT(altivec_hadamard8_diff16_num, 1); | 1331 POWERPC_PERF_STOP_COUNT(altivec_hadamard8_diff16_num, 1); |
(...skipping 17 matching lines...) Expand all Loading... |
1348 t0 = (vector bool int)vec_and(a, t1); | 1349 t0 = (vector bool int)vec_and(a, t1); |
1349 t1 = (vector bool int)vec_andc(a, t1); | 1350 t1 = (vector bool int)vec_andc(a, t1); |
1350 a = vec_sub(m, (vector float)t1); | 1351 a = vec_sub(m, (vector float)t1); |
1351 m = vec_add(m, (vector float)t0); | 1352 m = vec_add(m, (vector float)t0); |
1352 vec_stl(a, 0, ang+i); | 1353 vec_stl(a, 0, ang+i); |
1353 vec_stl(m, 0, mag+i); | 1354 vec_stl(m, 0, mag+i); |
1354 } | 1355 } |
1355 } | 1356 } |
1356 | 1357 |
1357 /* next one assumes that ((line_size % 8) == 0) */ | 1358 /* next one assumes that ((line_size % 8) == 0) */ |
1358 void avg_pixels8_xy2_altivec(uint8_t *block, const uint8_t *pixels, int line_siz
e, int h) | 1359 static void avg_pixels8_xy2_altivec(uint8_t *block, const uint8_t *pixels, int l
ine_size, int h) |
1359 { | 1360 { |
1360 POWERPC_PERF_DECLARE(altivec_avg_pixels8_xy2_num, 1); | 1361 POWERPC_PERF_DECLARE(altivec_avg_pixels8_xy2_num, 1); |
1361 register int i; | 1362 register int i; |
1362 register vector unsigned char pixelsv1, pixelsv2, pixelsavg; | 1363 register vector unsigned char pixelsv1, pixelsv2, pixelsavg; |
1363 register vector unsigned char blockv, temp1, temp2, blocktemp; | 1364 register vector unsigned char blockv, temp1, temp2, blocktemp; |
1364 register vector unsigned short pixelssum1, pixelssum2, temp3; | 1365 register vector unsigned short pixelssum1, pixelssum2, temp3; |
1365 | 1366 |
1366 register const vector unsigned char vczero = (const vector unsigned char) | 1367 register const vector unsigned char vczero = (const vector unsigned char) |
1367 vec_splat_u8(0); | 1368 vec_splat_u8(0); |
1368 register const vector unsigned short vctwo = (const vector unsigned short) | 1369 register const vector unsigned short vctwo = (const vector unsigned short) |
(...skipping 78 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1447 c->put_pixels_tab[1][3] = put_pixels8_xy2_altivec; | 1448 c->put_pixels_tab[1][3] = put_pixels8_xy2_altivec; |
1448 c->put_no_rnd_pixels_tab[1][3] = put_no_rnd_pixels8_xy2_altivec; | 1449 c->put_no_rnd_pixels_tab[1][3] = put_no_rnd_pixels8_xy2_altivec; |
1449 c->put_pixels_tab[0][3] = put_pixels16_xy2_altivec; | 1450 c->put_pixels_tab[0][3] = put_pixels16_xy2_altivec; |
1450 c->put_no_rnd_pixels_tab[0][3] = put_no_rnd_pixels16_xy2_altivec; | 1451 c->put_no_rnd_pixels_tab[0][3] = put_no_rnd_pixels16_xy2_altivec; |
1451 | 1452 |
1452 c->hadamard8_diff[0] = hadamard8_diff16_altivec; | 1453 c->hadamard8_diff[0] = hadamard8_diff16_altivec; |
1453 c->hadamard8_diff[1] = hadamard8_diff8x8_altivec; | 1454 c->hadamard8_diff[1] = hadamard8_diff8x8_altivec; |
1454 if (CONFIG_VORBIS_DECODER) | 1455 if (CONFIG_VORBIS_DECODER) |
1455 c->vorbis_inverse_coupling = vorbis_inverse_coupling_altivec; | 1456 c->vorbis_inverse_coupling = vorbis_inverse_coupling_altivec; |
1456 } | 1457 } |
OLD | NEW |