| OLD | NEW |
| 1 /* | 1 /* |
| 2 * Copyright (c) 2002 Dieter Shirley | 2 * Copyright (c) 2002 Dieter Shirley |
| 3 * | 3 * |
| 4 * dct_unquantize_h263_altivec: | 4 * dct_unquantize_h263_altivec: |
| 5 * Copyright (c) 2003 Romain Dolbeau <romain@dolbeau.org> | 5 * Copyright (c) 2003 Romain Dolbeau <romain@dolbeau.org> |
| 6 * | 6 * |
| 7 * This file is part of FFmpeg. | 7 * This file is part of FFmpeg. |
| 8 * | 8 * |
| 9 * FFmpeg is free software; you can redistribute it and/or | 9 * FFmpeg is free software; you can redistribute it and/or |
| 10 * modify it under the terms of the GNU Lesser General Public | 10 * modify it under the terms of the GNU Lesser General Public |
| (...skipping 11 matching lines...) Expand all Loading... |
| 22 */ | 22 */ |
| 23 | 23 |
| 24 #include <stdlib.h> | 24 #include <stdlib.h> |
| 25 #include <stdio.h> | 25 #include <stdio.h> |
| 26 #include "libavcodec/dsputil.h" | 26 #include "libavcodec/dsputil.h" |
| 27 #include "libavcodec/mpegvideo.h" | 27 #include "libavcodec/mpegvideo.h" |
| 28 | 28 |
| 29 #include "dsputil_ppc.h" | 29 #include "dsputil_ppc.h" |
| 30 #include "util_altivec.h" | 30 #include "util_altivec.h" |
| 31 #include "types_altivec.h" | 31 #include "types_altivec.h" |
| 32 #include "dsputil_altivec.h" |
| 32 | 33 |
| 33 // Swaps two variables (used for altivec registers) | 34 // Swaps two variables (used for altivec registers) |
| 34 #define SWAP(a,b) \ | 35 #define SWAP(a,b) \ |
| 35 do { \ | 36 do { \ |
| 36 __typeof__(a) swap_temp=a; \ | 37 __typeof__(a) swap_temp=a; \ |
| 37 a=b; \ | 38 a=b; \ |
| 38 b=swap_temp; \ | 39 b=swap_temp; \ |
| 39 } while (0) | 40 } while (0) |
| 40 | 41 |
| 41 // transposes a matrix consisting of four vectors with four elements each | 42 // transposes a matrix consisting of four vectors with four elements each |
| (...skipping 19 matching lines...) Expand all Loading... |
| 61 __typeof__(vec)* _load_addr = (__typeof__(vec)*)(address); \ | 62 __typeof__(vec)* _load_addr = (__typeof__(vec)*)(address); \ |
| 62 vector unsigned char _perm_vec = vec_lvsl(0,(address)); \ | 63 vector unsigned char _perm_vec = vec_lvsl(0,(address)); \ |
| 63 vec = vec_ld(0, _load_addr); \ | 64 vec = vec_ld(0, _load_addr); \ |
| 64 vec = vec_perm(vec, vec, _perm_vec); \ | 65 vec = vec_perm(vec, vec, _perm_vec); \ |
| 65 vec = vec_splat(vec, 0); \ | 66 vec = vec_splat(vec, 0); \ |
| 66 } | 67 } |
| 67 | 68 |
| 68 | 69 |
| 69 #define FOUROF(a) {a,a,a,a} | 70 #define FOUROF(a) {a,a,a,a} |
| 70 | 71 |
| 71 int dct_quantize_altivec(MpegEncContext* s, | 72 static int dct_quantize_altivec(MpegEncContext* s, |
| 72 DCTELEM* data, int n, | 73 DCTELEM* data, int n, |
| 73 int qscale, int* overflow) | 74 int qscale, int* overflow) |
| 74 { | 75 { |
| 75 int lastNonZero; | 76 int lastNonZero; |
| 76 vector float row0, row1, row2, row3, row4, row5, row6, row7; | 77 vector float row0, row1, row2, row3, row4, row5, row6, row7; |
| 77 vector float alt0, alt1, alt2, alt3, alt4, alt5, alt6, alt7; | 78 vector float alt0, alt1, alt2, alt3, alt4, alt5, alt6, alt7; |
| 78 const vector float zero = (const vector float)FOUROF(0.); | 79 const vector float zero = (const vector float)FOUROF(0.); |
| 79 // used after quantize step | 80 // used after quantize step |
| 80 int oldBaseValue = 0; | 81 int oldBaseValue = 0; |
| 81 | 82 |
| (...skipping 386 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 468 (s->dsp.idct_permutation_type != FF_NO_IDCT_PERM)) { | 469 (s->dsp.idct_permutation_type != FF_NO_IDCT_PERM)) { |
| 469 ff_block_permute(data, s->dsp.idct_permutation, | 470 ff_block_permute(data, s->dsp.idct_permutation, |
| 470 s->intra_scantable.scantable, lastNonZero); | 471 s->intra_scantable.scantable, lastNonZero); |
| 471 } | 472 } |
| 472 | 473 |
| 473 return lastNonZero; | 474 return lastNonZero; |
| 474 } | 475 } |
| 475 | 476 |
| 476 /* AltiVec version of dct_unquantize_h263 | 477 /* AltiVec version of dct_unquantize_h263 |
| 477 this code assumes `block' is 16 bytes-aligned */ | 478 this code assumes `block' is 16 bytes-aligned */ |
| 478 void dct_unquantize_h263_altivec(MpegEncContext *s, | 479 static void dct_unquantize_h263_altivec(MpegEncContext *s, |
| 479 DCTELEM *block, int n, int qscale) | 480 DCTELEM *block, int n, int qscale) |
| 480 { | 481 { |
| 481 POWERPC_PERF_DECLARE(altivec_dct_unquantize_h263_num, 1); | 482 POWERPC_PERF_DECLARE(altivec_dct_unquantize_h263_num, 1); |
| 482 int i, level, qmul, qadd; | 483 int i, level, qmul, qadd; |
| 483 int nCoeffs; | 484 int nCoeffs; |
| 484 | 485 |
| 485 assert(s->block_last_index[n]>=0); | 486 assert(s->block_last_index[n]>=0); |
| 486 | 487 |
| 487 POWERPC_PERF_START_COUNT(altivec_dct_unquantize_h263_num, 1); | 488 POWERPC_PERF_START_COUNT(altivec_dct_unquantize_h263_num, 1); |
| 488 | 489 |
| (...skipping 10 matching lines...) Expand all Loading... |
| 499 qadd = 0; | 500 qadd = 0; |
| 500 i = 1; | 501 i = 1; |
| 501 nCoeffs= 63; //does not always use zigzag table | 502 nCoeffs= 63; //does not always use zigzag table |
| 502 } else { | 503 } else { |
| 503 i = 0; | 504 i = 0; |
| 504 nCoeffs= s->intra_scantable.raster_end[ s->block_last_index[n] ]; | 505 nCoeffs= s->intra_scantable.raster_end[ s->block_last_index[n] ]; |
| 505 } | 506 } |
| 506 | 507 |
| 507 { | 508 { |
| 508 register const vector signed short vczero = (const vector signed short)v
ec_splat_s16(0); | 509 register const vector signed short vczero = (const vector signed short)v
ec_splat_s16(0); |
| 509 DECLARE_ALIGNED_16(short, qmul8) = qmul; | 510 DECLARE_ALIGNED(16, short, qmul8) = qmul; |
| 510 DECLARE_ALIGNED_16(short, qadd8) = qadd; | 511 DECLARE_ALIGNED(16, short, qadd8) = qadd; |
| 511 register vector signed short blockv, qmulv, qaddv, nqaddv, temp1; | 512 register vector signed short blockv, qmulv, qaddv, nqaddv, temp1; |
| 512 register vector bool short blockv_null, blockv_neg; | 513 register vector bool short blockv_null, blockv_neg; |
| 513 register short backup_0 = block[0]; | 514 register short backup_0 = block[0]; |
| 514 register int j = 0; | 515 register int j = 0; |
| 515 | 516 |
| 516 qmulv = vec_splat((vec_s16)vec_lde(0, &qmul8), 0); | 517 qmulv = vec_splat((vec_s16)vec_lde(0, &qmul8), 0); |
| 517 qaddv = vec_splat((vec_s16)vec_lde(0, &qadd8), 0); | 518 qaddv = vec_splat((vec_s16)vec_lde(0, &qadd8), 0); |
| 518 nqaddv = vec_sub(vczero, qaddv); | 519 nqaddv = vec_sub(vczero, qaddv); |
| 519 | 520 |
| 520 #if 0 // block *is* 16 bytes-aligned, it seems. | 521 #if 0 // block *is* 16 bytes-aligned, it seems. |
| (...skipping 44 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 565 | 566 |
| 566 if (i == 1) { | 567 if (i == 1) { |
| 567 // cheat. this avoid special-casing the first iteration | 568 // cheat. this avoid special-casing the first iteration |
| 568 block[0] = backup_0; | 569 block[0] = backup_0; |
| 569 } | 570 } |
| 570 } | 571 } |
| 571 POWERPC_PERF_STOP_COUNT(altivec_dct_unquantize_h263_num, nCoeffs == 63); | 572 POWERPC_PERF_STOP_COUNT(altivec_dct_unquantize_h263_num, nCoeffs == 63); |
| 572 } | 573 } |
| 573 | 574 |
| 574 | 575 |
| 575 void idct_put_altivec(uint8_t *dest, int line_size, int16_t *block); | |
| 576 void idct_add_altivec(uint8_t *dest, int line_size, int16_t *block); | |
| 577 | |
| 578 void MPV_common_init_altivec(MpegEncContext *s) | 576 void MPV_common_init_altivec(MpegEncContext *s) |
| 579 { | 577 { |
| 580 if ((mm_flags & FF_MM_ALTIVEC) == 0) return; | 578 if ((mm_flags & FF_MM_ALTIVEC) == 0) return; |
| 581 | 579 |
| 582 if (s->avctx->lowres==0) { | 580 if (s->avctx->lowres==0) { |
| 583 if ((s->avctx->idct_algo == FF_IDCT_AUTO) || | 581 if ((s->avctx->idct_algo == FF_IDCT_AUTO) || |
| 584 (s->avctx->idct_algo == FF_IDCT_ALTIVEC)) { | 582 (s->avctx->idct_algo == FF_IDCT_ALTIVEC)) { |
| 585 s->dsp.idct_put = idct_put_altivec; | 583 s->dsp.idct_put = idct_put_altivec; |
| 586 s->dsp.idct_add = idct_add_altivec; | 584 s->dsp.idct_add = idct_add_altivec; |
| 587 s->dsp.idct_permutation_type = FF_TRANSPOSE_IDCT_PERM; | 585 s->dsp.idct_permutation_type = FF_TRANSPOSE_IDCT_PERM; |
| (...skipping 17 matching lines...) Expand all Loading... |
| 605 | 603 |
| 606 if ((s->avctx->dct_algo == FF_DCT_AUTO) || | 604 if ((s->avctx->dct_algo == FF_DCT_AUTO) || |
| 607 (s->avctx->dct_algo == FF_DCT_ALTIVEC)) { | 605 (s->avctx->dct_algo == FF_DCT_ALTIVEC)) { |
| 608 #if 0 /* seems to cause trouble under some circumstances */ | 606 #if 0 /* seems to cause trouble under some circumstances */ |
| 609 s->dct_quantize = dct_quantize_altivec; | 607 s->dct_quantize = dct_quantize_altivec; |
| 610 #endif | 608 #endif |
| 611 s->dct_unquantize_h263_intra = dct_unquantize_h263_altivec; | 609 s->dct_unquantize_h263_intra = dct_unquantize_h263_altivec; |
| 612 s->dct_unquantize_h263_inter = dct_unquantize_h263_altivec; | 610 s->dct_unquantize_h263_inter = dct_unquantize_h263_altivec; |
| 613 } | 611 } |
| 614 } | 612 } |
| OLD | NEW |