OLD | NEW |
1 /* | 1 /* |
2 * Copyright (c) 2002 Dieter Shirley | 2 * Copyright (c) 2002 Dieter Shirley |
3 * | 3 * |
4 * dct_unquantize_h263_altivec: | 4 * dct_unquantize_h263_altivec: |
5 * Copyright (c) 2003 Romain Dolbeau <romain@dolbeau.org> | 5 * Copyright (c) 2003 Romain Dolbeau <romain@dolbeau.org> |
6 * | 6 * |
7 * This file is part of FFmpeg. | 7 * This file is part of FFmpeg. |
8 * | 8 * |
9 * FFmpeg is free software; you can redistribute it and/or | 9 * FFmpeg is free software; you can redistribute it and/or |
10 * modify it under the terms of the GNU Lesser General Public | 10 * modify it under the terms of the GNU Lesser General Public |
(...skipping 11 matching lines...) Expand all Loading... |
22 */ | 22 */ |
23 | 23 |
24 #include <stdlib.h> | 24 #include <stdlib.h> |
25 #include <stdio.h> | 25 #include <stdio.h> |
26 #include "libavcodec/dsputil.h" | 26 #include "libavcodec/dsputil.h" |
27 #include "libavcodec/mpegvideo.h" | 27 #include "libavcodec/mpegvideo.h" |
28 | 28 |
29 #include "dsputil_ppc.h" | 29 #include "dsputil_ppc.h" |
30 #include "util_altivec.h" | 30 #include "util_altivec.h" |
31 #include "types_altivec.h" | 31 #include "types_altivec.h" |
| 32 #include "dsputil_altivec.h" |
32 | 33 |
33 // Swaps two variables (used for altivec registers) | 34 // Swaps two variables (used for altivec registers) |
34 #define SWAP(a,b) \ | 35 #define SWAP(a,b) \ |
35 do { \ | 36 do { \ |
36 __typeof__(a) swap_temp=a; \ | 37 __typeof__(a) swap_temp=a; \ |
37 a=b; \ | 38 a=b; \ |
38 b=swap_temp; \ | 39 b=swap_temp; \ |
39 } while (0) | 40 } while (0) |
40 | 41 |
41 // transposes a matrix consisting of four vectors with four elements each | 42 // transposes a matrix consisting of four vectors with four elements each |
(...skipping 19 matching lines...) Expand all Loading... |
61 __typeof__(vec)* _load_addr = (__typeof__(vec)*)(address); \ | 62 __typeof__(vec)* _load_addr = (__typeof__(vec)*)(address); \ |
62 vector unsigned char _perm_vec = vec_lvsl(0,(address)); \ | 63 vector unsigned char _perm_vec = vec_lvsl(0,(address)); \ |
63 vec = vec_ld(0, _load_addr); \ | 64 vec = vec_ld(0, _load_addr); \ |
64 vec = vec_perm(vec, vec, _perm_vec); \ | 65 vec = vec_perm(vec, vec, _perm_vec); \ |
65 vec = vec_splat(vec, 0); \ | 66 vec = vec_splat(vec, 0); \ |
66 } | 67 } |
67 | 68 |
68 | 69 |
69 #define FOUROF(a) {a,a,a,a} | 70 #define FOUROF(a) {a,a,a,a} |
70 | 71 |
71 int dct_quantize_altivec(MpegEncContext* s, | 72 static int dct_quantize_altivec(MpegEncContext* s, |
72 DCTELEM* data, int n, | 73 DCTELEM* data, int n, |
73 int qscale, int* overflow) | 74 int qscale, int* overflow) |
74 { | 75 { |
75 int lastNonZero; | 76 int lastNonZero; |
76 vector float row0, row1, row2, row3, row4, row5, row6, row7; | 77 vector float row0, row1, row2, row3, row4, row5, row6, row7; |
77 vector float alt0, alt1, alt2, alt3, alt4, alt5, alt6, alt7; | 78 vector float alt0, alt1, alt2, alt3, alt4, alt5, alt6, alt7; |
78 const vector float zero = (const vector float)FOUROF(0.); | 79 const vector float zero = (const vector float)FOUROF(0.); |
79 // used after quantize step | 80 // used after quantize step |
80 int oldBaseValue = 0; | 81 int oldBaseValue = 0; |
81 | 82 |
(...skipping 386 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
468 (s->dsp.idct_permutation_type != FF_NO_IDCT_PERM)) { | 469 (s->dsp.idct_permutation_type != FF_NO_IDCT_PERM)) { |
469 ff_block_permute(data, s->dsp.idct_permutation, | 470 ff_block_permute(data, s->dsp.idct_permutation, |
470 s->intra_scantable.scantable, lastNonZero); | 471 s->intra_scantable.scantable, lastNonZero); |
471 } | 472 } |
472 | 473 |
473 return lastNonZero; | 474 return lastNonZero; |
474 } | 475 } |
475 | 476 |
476 /* AltiVec version of dct_unquantize_h263 | 477 /* AltiVec version of dct_unquantize_h263 |
477 this code assumes `block' is 16 bytes-aligned */ | 478 this code assumes `block' is 16 bytes-aligned */ |
478 void dct_unquantize_h263_altivec(MpegEncContext *s, | 479 static void dct_unquantize_h263_altivec(MpegEncContext *s, |
479 DCTELEM *block, int n, int qscale) | 480 DCTELEM *block, int n, int qscale) |
480 { | 481 { |
481 POWERPC_PERF_DECLARE(altivec_dct_unquantize_h263_num, 1); | 482 POWERPC_PERF_DECLARE(altivec_dct_unquantize_h263_num, 1); |
482 int i, level, qmul, qadd; | 483 int i, level, qmul, qadd; |
483 int nCoeffs; | 484 int nCoeffs; |
484 | 485 |
485 assert(s->block_last_index[n]>=0); | 486 assert(s->block_last_index[n]>=0); |
486 | 487 |
487 POWERPC_PERF_START_COUNT(altivec_dct_unquantize_h263_num, 1); | 488 POWERPC_PERF_START_COUNT(altivec_dct_unquantize_h263_num, 1); |
488 | 489 |
(...skipping 10 matching lines...) Expand all Loading... |
499 qadd = 0; | 500 qadd = 0; |
500 i = 1; | 501 i = 1; |
501 nCoeffs= 63; //does not always use zigzag table | 502 nCoeffs= 63; //does not always use zigzag table |
502 } else { | 503 } else { |
503 i = 0; | 504 i = 0; |
504 nCoeffs= s->intra_scantable.raster_end[ s->block_last_index[n] ]; | 505 nCoeffs= s->intra_scantable.raster_end[ s->block_last_index[n] ]; |
505 } | 506 } |
506 | 507 |
507 { | 508 { |
508 register const vector signed short vczero = (const vector signed short)v
ec_splat_s16(0); | 509 register const vector signed short vczero = (const vector signed short)v
ec_splat_s16(0); |
509 DECLARE_ALIGNED_16(short, qmul8) = qmul; | 510 DECLARE_ALIGNED(16, short, qmul8) = qmul; |
510 DECLARE_ALIGNED_16(short, qadd8) = qadd; | 511 DECLARE_ALIGNED(16, short, qadd8) = qadd; |
511 register vector signed short blockv, qmulv, qaddv, nqaddv, temp1; | 512 register vector signed short blockv, qmulv, qaddv, nqaddv, temp1; |
512 register vector bool short blockv_null, blockv_neg; | 513 register vector bool short blockv_null, blockv_neg; |
513 register short backup_0 = block[0]; | 514 register short backup_0 = block[0]; |
514 register int j = 0; | 515 register int j = 0; |
515 | 516 |
516 qmulv = vec_splat((vec_s16)vec_lde(0, &qmul8), 0); | 517 qmulv = vec_splat((vec_s16)vec_lde(0, &qmul8), 0); |
517 qaddv = vec_splat((vec_s16)vec_lde(0, &qadd8), 0); | 518 qaddv = vec_splat((vec_s16)vec_lde(0, &qadd8), 0); |
518 nqaddv = vec_sub(vczero, qaddv); | 519 nqaddv = vec_sub(vczero, qaddv); |
519 | 520 |
520 #if 0 // block *is* 16 bytes-aligned, it seems. | 521 #if 0 // block *is* 16 bytes-aligned, it seems. |
(...skipping 44 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
565 | 566 |
566 if (i == 1) { | 567 if (i == 1) { |
567 // cheat. this avoid special-casing the first iteration | 568 // cheat. this avoid special-casing the first iteration |
568 block[0] = backup_0; | 569 block[0] = backup_0; |
569 } | 570 } |
570 } | 571 } |
571 POWERPC_PERF_STOP_COUNT(altivec_dct_unquantize_h263_num, nCoeffs == 63); | 572 POWERPC_PERF_STOP_COUNT(altivec_dct_unquantize_h263_num, nCoeffs == 63); |
572 } | 573 } |
573 | 574 |
574 | 575 |
575 void idct_put_altivec(uint8_t *dest, int line_size, int16_t *block); | |
576 void idct_add_altivec(uint8_t *dest, int line_size, int16_t *block); | |
577 | |
578 void MPV_common_init_altivec(MpegEncContext *s) | 576 void MPV_common_init_altivec(MpegEncContext *s) |
579 { | 577 { |
580 if ((mm_flags & FF_MM_ALTIVEC) == 0) return; | 578 if ((mm_flags & FF_MM_ALTIVEC) == 0) return; |
581 | 579 |
582 if (s->avctx->lowres==0) { | 580 if (s->avctx->lowres==0) { |
583 if ((s->avctx->idct_algo == FF_IDCT_AUTO) || | 581 if ((s->avctx->idct_algo == FF_IDCT_AUTO) || |
584 (s->avctx->idct_algo == FF_IDCT_ALTIVEC)) { | 582 (s->avctx->idct_algo == FF_IDCT_ALTIVEC)) { |
585 s->dsp.idct_put = idct_put_altivec; | 583 s->dsp.idct_put = idct_put_altivec; |
586 s->dsp.idct_add = idct_add_altivec; | 584 s->dsp.idct_add = idct_add_altivec; |
587 s->dsp.idct_permutation_type = FF_TRANSPOSE_IDCT_PERM; | 585 s->dsp.idct_permutation_type = FF_TRANSPOSE_IDCT_PERM; |
(...skipping 17 matching lines...) Expand all Loading... |
605 | 603 |
606 if ((s->avctx->dct_algo == FF_DCT_AUTO) || | 604 if ((s->avctx->dct_algo == FF_DCT_AUTO) || |
607 (s->avctx->dct_algo == FF_DCT_ALTIVEC)) { | 605 (s->avctx->dct_algo == FF_DCT_ALTIVEC)) { |
608 #if 0 /* seems to cause trouble under some circumstances */ | 606 #if 0 /* seems to cause trouble under some circumstances */ |
609 s->dct_quantize = dct_quantize_altivec; | 607 s->dct_quantize = dct_quantize_altivec; |
610 #endif | 608 #endif |
611 s->dct_unquantize_h263_intra = dct_unquantize_h263_altivec; | 609 s->dct_unquantize_h263_intra = dct_unquantize_h263_altivec; |
612 s->dct_unquantize_h263_inter = dct_unquantize_h263_altivec; | 610 s->dct_unquantize_h263_inter = dct_unquantize_h263_altivec; |
613 } | 611 } |
614 } | 612 } |
OLD | NEW |