| OLD | NEW |
| 1 /* | 1 /* |
| 2 * Copyright (c) 2004 Romain Dolbeau <romain@dolbeau.org> | 2 * Copyright (c) 2004 Romain Dolbeau <romain@dolbeau.org> |
| 3 * | 3 * |
| 4 * This file is part of FFmpeg. | 4 * This file is part of FFmpeg. |
| 5 * | 5 * |
| 6 * FFmpeg is free software; you can redistribute it and/or | 6 * FFmpeg is free software; you can redistribute it and/or |
| 7 * modify it under the terms of the GNU Lesser General Public | 7 * modify it under the terms of the GNU Lesser General Public |
| 8 * License as published by the Free Software Foundation; either | 8 * License as published by the Free Software Foundation; either |
| 9 * version 2.1 of the License, or (at your option) any later version. | 9 * version 2.1 of the License, or (at your option) any later version. |
| 10 * | 10 * |
| (...skipping 61 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 72 #undef PREFIX_h264_qpel16_v_lowpass_num | 72 #undef PREFIX_h264_qpel16_v_lowpass_num |
| 73 #undef PREFIX_h264_qpel16_hv_lowpass_altivec | 73 #undef PREFIX_h264_qpel16_hv_lowpass_altivec |
| 74 #undef PREFIX_h264_qpel16_hv_lowpass_num | 74 #undef PREFIX_h264_qpel16_hv_lowpass_num |
| 75 | 75 |
| 76 #define H264_MC(OPNAME, SIZE, CODETYPE) \ | 76 #define H264_MC(OPNAME, SIZE, CODETYPE) \ |
| 77 static void OPNAME ## h264_qpel ## SIZE ## _mc00_ ## CODETYPE (uint8_t *dst, uin
t8_t *src, int stride){\ | 77 static void OPNAME ## h264_qpel ## SIZE ## _mc00_ ## CODETYPE (uint8_t *dst, uin
t8_t *src, int stride){\ |
| 78 OPNAME ## pixels ## SIZE ## _ ## CODETYPE(dst, src, stride, SIZE);\ | 78 OPNAME ## pixels ## SIZE ## _ ## CODETYPE(dst, src, stride, SIZE);\ |
| 79 }\ | 79 }\ |
| 80 \ | 80 \ |
| 81 static void OPNAME ## h264_qpel ## SIZE ## _mc10_ ## CODETYPE(uint8_t *dst, uint
8_t *src, int stride){ \ | 81 static void OPNAME ## h264_qpel ## SIZE ## _mc10_ ## CODETYPE(uint8_t *dst, uint
8_t *src, int stride){ \ |
| 82 DECLARE_ALIGNED_16(uint8_t, half)[SIZE*SIZE];\ | 82 DECLARE_ALIGNED(16, uint8_t, half)[SIZE*SIZE];\ |
| 83 put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(half, src, SIZE, stride);\ | 83 put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(half, src, SIZE, stride);\ |
| 84 OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, src, half, stride, stride,
SIZE);\ | 84 OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, src, half, stride, stride,
SIZE);\ |
| 85 }\ | 85 }\ |
| 86 \ | 86 \ |
| 87 static void OPNAME ## h264_qpel ## SIZE ## _mc20_ ## CODETYPE(uint8_t *dst, uint
8_t *src, int stride){\ | 87 static void OPNAME ## h264_qpel ## SIZE ## _mc20_ ## CODETYPE(uint8_t *dst, uint
8_t *src, int stride){\ |
| 88 OPNAME ## h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(dst, src, stride, str
ide);\ | 88 OPNAME ## h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(dst, src, stride, str
ide);\ |
| 89 }\ | 89 }\ |
| 90 \ | 90 \ |
| 91 static void OPNAME ## h264_qpel ## SIZE ## _mc30_ ## CODETYPE(uint8_t *dst, uint
8_t *src, int stride){\ | 91 static void OPNAME ## h264_qpel ## SIZE ## _mc30_ ## CODETYPE(uint8_t *dst, uint
8_t *src, int stride){\ |
| 92 DECLARE_ALIGNED_16(uint8_t, half)[SIZE*SIZE];\ | 92 DECLARE_ALIGNED(16, uint8_t, half)[SIZE*SIZE];\ |
| 93 put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(half, src, SIZE, stride);\ | 93 put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(half, src, SIZE, stride);\ |
| 94 OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, src+1, half, stride, strid
e, SIZE);\ | 94 OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, src+1, half, stride, strid
e, SIZE);\ |
| 95 }\ | 95 }\ |
| 96 \ | 96 \ |
| 97 static void OPNAME ## h264_qpel ## SIZE ## _mc01_ ## CODETYPE(uint8_t *dst, uint
8_t *src, int stride){\ | 97 static void OPNAME ## h264_qpel ## SIZE ## _mc01_ ## CODETYPE(uint8_t *dst, uint
8_t *src, int stride){\ |
| 98 DECLARE_ALIGNED_16(uint8_t, half)[SIZE*SIZE];\ | 98 DECLARE_ALIGNED(16, uint8_t, half)[SIZE*SIZE];\ |
| 99 put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(half, src, SIZE, stride);\ | 99 put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(half, src, SIZE, stride);\ |
| 100 OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, src, half, stride, stride,
SIZE);\ | 100 OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, src, half, stride, stride,
SIZE);\ |
| 101 }\ | 101 }\ |
| 102 \ | 102 \ |
| 103 static void OPNAME ## h264_qpel ## SIZE ## _mc02_ ## CODETYPE(uint8_t *dst, uint
8_t *src, int stride){\ | 103 static void OPNAME ## h264_qpel ## SIZE ## _mc02_ ## CODETYPE(uint8_t *dst, uint
8_t *src, int stride){\ |
| 104 OPNAME ## h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(dst, src, stride, str
ide);\ | 104 OPNAME ## h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(dst, src, stride, str
ide);\ |
| 105 }\ | 105 }\ |
| 106 \ | 106 \ |
| 107 static void OPNAME ## h264_qpel ## SIZE ## _mc03_ ## CODETYPE(uint8_t *dst, uint
8_t *src, int stride){\ | 107 static void OPNAME ## h264_qpel ## SIZE ## _mc03_ ## CODETYPE(uint8_t *dst, uint
8_t *src, int stride){\ |
| 108 DECLARE_ALIGNED_16(uint8_t, half)[SIZE*SIZE];\ | 108 DECLARE_ALIGNED(16, uint8_t, half)[SIZE*SIZE];\ |
| 109 put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(half, src, SIZE, stride);\ | 109 put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(half, src, SIZE, stride);\ |
| 110 OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, src+stride, half, stride,
stride, SIZE);\ | 110 OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, src+stride, half, stride,
stride, SIZE);\ |
| 111 }\ | 111 }\ |
| 112 \ | 112 \ |
| 113 static void OPNAME ## h264_qpel ## SIZE ## _mc11_ ## CODETYPE(uint8_t *dst, uint
8_t *src, int stride){\ | 113 static void OPNAME ## h264_qpel ## SIZE ## _mc11_ ## CODETYPE(uint8_t *dst, uint
8_t *src, int stride){\ |
| 114 DECLARE_ALIGNED_16(uint8_t, halfH)[SIZE*SIZE];\ | 114 DECLARE_ALIGNED(16, uint8_t, halfH)[SIZE*SIZE];\ |
| 115 DECLARE_ALIGNED_16(uint8_t, halfV)[SIZE*SIZE];\ | 115 DECLARE_ALIGNED(16, uint8_t, halfV)[SIZE*SIZE];\ |
| 116 put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src, SIZE, stride);\ | 116 put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src, SIZE, stride);\ |
| 117 put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src, SIZE, stride);\ | 117 put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src, SIZE, stride);\ |
| 118 OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfV, stride, SIZE
, SIZE);\ | 118 OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfV, stride, SIZE
, SIZE);\ |
| 119 }\ | 119 }\ |
| 120 \ | 120 \ |
| 121 static void OPNAME ## h264_qpel ## SIZE ## _mc31_ ## CODETYPE(uint8_t *dst, uint
8_t *src, int stride){\ | 121 static void OPNAME ## h264_qpel ## SIZE ## _mc31_ ## CODETYPE(uint8_t *dst, uint
8_t *src, int stride){\ |
| 122 DECLARE_ALIGNED_16(uint8_t, halfH)[SIZE*SIZE];\ | 122 DECLARE_ALIGNED(16, uint8_t, halfH)[SIZE*SIZE];\ |
| 123 DECLARE_ALIGNED_16(uint8_t, halfV)[SIZE*SIZE];\ | 123 DECLARE_ALIGNED(16, uint8_t, halfV)[SIZE*SIZE];\ |
| 124 put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src, SIZE, stride);\ | 124 put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src, SIZE, stride);\ |
| 125 put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src+1, SIZE, stride)
;\ | 125 put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src+1, SIZE, stride)
;\ |
| 126 OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfV, stride, SIZE
, SIZE);\ | 126 OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfV, stride, SIZE
, SIZE);\ |
| 127 }\ | 127 }\ |
| 128 \ | 128 \ |
| 129 static void OPNAME ## h264_qpel ## SIZE ## _mc13_ ## CODETYPE(uint8_t *dst, uint
8_t *src, int stride){\ | 129 static void OPNAME ## h264_qpel ## SIZE ## _mc13_ ## CODETYPE(uint8_t *dst, uint
8_t *src, int stride){\ |
| 130 DECLARE_ALIGNED_16(uint8_t, halfH)[SIZE*SIZE];\ | 130 DECLARE_ALIGNED(16, uint8_t, halfH)[SIZE*SIZE];\ |
| 131 DECLARE_ALIGNED_16(uint8_t, halfV)[SIZE*SIZE];\ | 131 DECLARE_ALIGNED(16, uint8_t, halfV)[SIZE*SIZE];\ |
| 132 put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src + stride, SIZE,
stride);\ | 132 put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src + stride, SIZE,
stride);\ |
| 133 put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src, SIZE, stride);\ | 133 put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src, SIZE, stride);\ |
| 134 OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfV, stride, SIZE
, SIZE);\ | 134 OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfV, stride, SIZE
, SIZE);\ |
| 135 }\ | 135 }\ |
| 136 \ | 136 \ |
| 137 static void OPNAME ## h264_qpel ## SIZE ## _mc33_ ## CODETYPE(uint8_t *dst, uint
8_t *src, int stride){\ | 137 static void OPNAME ## h264_qpel ## SIZE ## _mc33_ ## CODETYPE(uint8_t *dst, uint
8_t *src, int stride){\ |
| 138 DECLARE_ALIGNED_16(uint8_t, halfH)[SIZE*SIZE];\ | 138 DECLARE_ALIGNED(16, uint8_t, halfH)[SIZE*SIZE];\ |
| 139 DECLARE_ALIGNED_16(uint8_t, halfV)[SIZE*SIZE];\ | 139 DECLARE_ALIGNED(16, uint8_t, halfV)[SIZE*SIZE];\ |
| 140 put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src + stride, SIZE,
stride);\ | 140 put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src + stride, SIZE,
stride);\ |
| 141 put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src+1, SIZE, stride)
;\ | 141 put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src+1, SIZE, stride)
;\ |
| 142 OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfV, stride, SIZE
, SIZE);\ | 142 OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfV, stride, SIZE
, SIZE);\ |
| 143 }\ | 143 }\ |
| 144 \ | 144 \ |
| 145 static void OPNAME ## h264_qpel ## SIZE ## _mc22_ ## CODETYPE(uint8_t *dst, uint
8_t *src, int stride){\ | 145 static void OPNAME ## h264_qpel ## SIZE ## _mc22_ ## CODETYPE(uint8_t *dst, uint
8_t *src, int stride){\ |
| 146 DECLARE_ALIGNED_16(int16_t, tmp)[SIZE*(SIZE+8)];\ | 146 DECLARE_ALIGNED(16, int16_t, tmp)[SIZE*(SIZE+8)];\ |
| 147 OPNAME ## h264_qpel ## SIZE ## _hv_lowpass_ ## CODETYPE(dst, tmp, src, strid
e, SIZE, stride);\ | 147 OPNAME ## h264_qpel ## SIZE ## _hv_lowpass_ ## CODETYPE(dst, tmp, src, strid
e, SIZE, stride);\ |
| 148 }\ | 148 }\ |
| 149 \ | 149 \ |
| 150 static void OPNAME ## h264_qpel ## SIZE ## _mc21_ ## CODETYPE(uint8_t *dst, uint
8_t *src, int stride){\ | 150 static void OPNAME ## h264_qpel ## SIZE ## _mc21_ ## CODETYPE(uint8_t *dst, uint
8_t *src, int stride){\ |
| 151 DECLARE_ALIGNED_16(uint8_t, halfH)[SIZE*SIZE];\ | 151 DECLARE_ALIGNED(16, uint8_t, halfH)[SIZE*SIZE];\ |
| 152 DECLARE_ALIGNED_16(uint8_t, halfHV)[SIZE*SIZE];\ | 152 DECLARE_ALIGNED(16, uint8_t, halfHV)[SIZE*SIZE];\ |
| 153 DECLARE_ALIGNED_16(int16_t, tmp)[SIZE*(SIZE+8)];\ | 153 DECLARE_ALIGNED(16, int16_t, tmp)[SIZE*(SIZE+8)];\ |
| 154 put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src, SIZE, stride);\ | 154 put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src, SIZE, stride);\ |
| 155 put_h264_qpel ## SIZE ## _hv_lowpass_ ## CODETYPE(halfHV, tmp, src, SIZE, SI
ZE, stride);\ | 155 put_h264_qpel ## SIZE ## _hv_lowpass_ ## CODETYPE(halfHV, tmp, src, SIZE, SI
ZE, stride);\ |
| 156 OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfHV, stride, SIZ
E, SIZE);\ | 156 OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfHV, stride, SIZ
E, SIZE);\ |
| 157 }\ | 157 }\ |
| 158 \ | 158 \ |
| 159 static void OPNAME ## h264_qpel ## SIZE ## _mc23_ ## CODETYPE(uint8_t *dst, uint
8_t *src, int stride){\ | 159 static void OPNAME ## h264_qpel ## SIZE ## _mc23_ ## CODETYPE(uint8_t *dst, uint
8_t *src, int stride){\ |
| 160 DECLARE_ALIGNED_16(uint8_t, halfH)[SIZE*SIZE];\ | 160 DECLARE_ALIGNED(16, uint8_t, halfH)[SIZE*SIZE];\ |
| 161 DECLARE_ALIGNED_16(uint8_t, halfHV)[SIZE*SIZE];\ | 161 DECLARE_ALIGNED(16, uint8_t, halfHV)[SIZE*SIZE];\ |
| 162 DECLARE_ALIGNED_16(int16_t, tmp)[SIZE*(SIZE+8)];\ | 162 DECLARE_ALIGNED(16, int16_t, tmp)[SIZE*(SIZE+8)];\ |
| 163 put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src + stride, SIZE,
stride);\ | 163 put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src + stride, SIZE,
stride);\ |
| 164 put_h264_qpel ## SIZE ## _hv_lowpass_ ## CODETYPE(halfHV, tmp, src, SIZE, SI
ZE, stride);\ | 164 put_h264_qpel ## SIZE ## _hv_lowpass_ ## CODETYPE(halfHV, tmp, src, SIZE, SI
ZE, stride);\ |
| 165 OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfHV, stride, SIZ
E, SIZE);\ | 165 OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfHV, stride, SIZ
E, SIZE);\ |
| 166 }\ | 166 }\ |
| 167 \ | 167 \ |
| 168 static void OPNAME ## h264_qpel ## SIZE ## _mc12_ ## CODETYPE(uint8_t *dst, uint
8_t *src, int stride){\ | 168 static void OPNAME ## h264_qpel ## SIZE ## _mc12_ ## CODETYPE(uint8_t *dst, uint
8_t *src, int stride){\ |
| 169 DECLARE_ALIGNED_16(uint8_t, halfV)[SIZE*SIZE];\ | 169 DECLARE_ALIGNED(16, uint8_t, halfV)[SIZE*SIZE];\ |
| 170 DECLARE_ALIGNED_16(uint8_t, halfHV)[SIZE*SIZE];\ | 170 DECLARE_ALIGNED(16, uint8_t, halfHV)[SIZE*SIZE];\ |
| 171 DECLARE_ALIGNED_16(int16_t, tmp)[SIZE*(SIZE+8)];\ | 171 DECLARE_ALIGNED(16, int16_t, tmp)[SIZE*(SIZE+8)];\ |
| 172 put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src, SIZE, stride);\ | 172 put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src, SIZE, stride);\ |
| 173 put_h264_qpel ## SIZE ## _hv_lowpass_ ## CODETYPE(halfHV, tmp, src, SIZE, SI
ZE, stride);\ | 173 put_h264_qpel ## SIZE ## _hv_lowpass_ ## CODETYPE(halfHV, tmp, src, SIZE, SI
ZE, stride);\ |
| 174 OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfV, halfHV, stride, SIZ
E, SIZE);\ | 174 OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfV, halfHV, stride, SIZ
E, SIZE);\ |
| 175 }\ | 175 }\ |
| 176 \ | 176 \ |
| 177 static void OPNAME ## h264_qpel ## SIZE ## _mc32_ ## CODETYPE(uint8_t *dst, uint
8_t *src, int stride){\ | 177 static void OPNAME ## h264_qpel ## SIZE ## _mc32_ ## CODETYPE(uint8_t *dst, uint
8_t *src, int stride){\ |
| 178 DECLARE_ALIGNED_16(uint8_t, halfV)[SIZE*SIZE];\ | 178 DECLARE_ALIGNED(16, uint8_t, halfV)[SIZE*SIZE];\ |
| 179 DECLARE_ALIGNED_16(uint8_t, halfHV)[SIZE*SIZE];\ | 179 DECLARE_ALIGNED(16, uint8_t, halfHV)[SIZE*SIZE];\ |
| 180 DECLARE_ALIGNED_16(int16_t, tmp)[SIZE*(SIZE+8)];\ | 180 DECLARE_ALIGNED(16, int16_t, tmp)[SIZE*(SIZE+8)];\ |
| 181 put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src+1, SIZE, stride)
;\ | 181 put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src+1, SIZE, stride)
;\ |
| 182 put_h264_qpel ## SIZE ## _hv_lowpass_ ## CODETYPE(halfHV, tmp, src, SIZE, SI
ZE, stride);\ | 182 put_h264_qpel ## SIZE ## _hv_lowpass_ ## CODETYPE(halfHV, tmp, src, SIZE, SI
ZE, stride);\ |
| 183 OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfV, halfHV, stride, SIZ
E, SIZE);\ | 183 OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfV, halfHV, stride, SIZ
E, SIZE);\ |
| 184 }\ | 184 }\ |
| 185 | 185 |
| 186 static inline void put_pixels16_l2_altivec( uint8_t * dst, const uint8_t * src1, | 186 static inline void put_pixels16_l2_altivec( uint8_t * dst, const uint8_t * src1, |
| 187 const uint8_t * src2, int dst_stride, | 187 const uint8_t * src2, int dst_stride, |
| 188 int src_stride1, int h) | 188 int src_stride1, int h) |
| 189 { | 189 { |
| 190 int i; | 190 int i; |
| (...skipping 233 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 424 vec_u8 bodyv = vec_perm( idstsum8, idstsum8, perm_stv );\ | 424 vec_u8 bodyv = vec_perm( idstsum8, idstsum8, perm_stv );\ |
| 425 vec_u8 edgelv = vec_perm( sel, zero_u8v, perm_stv ); \ | 425 vec_u8 edgelv = vec_perm( sel, zero_u8v, perm_stv ); \ |
| 426 lv = vec_sel( lv, bodyv, edgelv ); \ | 426 lv = vec_sel( lv, bodyv, edgelv ); \ |
| 427 vec_st( lv, 7, dest ); \ | 427 vec_st( lv, 7, dest ); \ |
| 428 hv = vec_ld( 0, dest ); \ | 428 hv = vec_ld( 0, dest ); \ |
| 429 edgehv = vec_perm( zero_u8v, sel, perm_stv ); \ | 429 edgehv = vec_perm( zero_u8v, sel, perm_stv ); \ |
| 430 hv = vec_sel( hv, bodyv, edgehv ); \ | 430 hv = vec_sel( hv, bodyv, edgehv ); \ |
| 431 vec_st( hv, 0, dest ); \ | 431 vec_st( hv, 0, dest ); \ |
| 432 } | 432 } |
| 433 | 433 |
| 434 void ff_h264_idct8_add_altivec( uint8_t *dst, DCTELEM *dct, int stride ) { | 434 static void ff_h264_idct8_add_altivec( uint8_t *dst, DCTELEM *dct, int stride )
{ |
| 435 vec_s16 s0, s1, s2, s3, s4, s5, s6, s7; | 435 vec_s16 s0, s1, s2, s3, s4, s5, s6, s7; |
| 436 vec_s16 d0, d1, d2, d3, d4, d5, d6, d7; | 436 vec_s16 d0, d1, d2, d3, d4, d5, d6, d7; |
| 437 vec_s16 idct0, idct1, idct2, idct3, idct4, idct5, idct6, idct7; | 437 vec_s16 idct0, idct1, idct2, idct3, idct4, idct5, idct6, idct7; |
| 438 | 438 |
| 439 vec_u8 perm_ldv = vec_lvsl(0, dst); | 439 vec_u8 perm_ldv = vec_lvsl(0, dst); |
| 440 vec_u8 perm_stv = vec_lvsr(8, dst); | 440 vec_u8 perm_stv = vec_lvsr(8, dst); |
| 441 | 441 |
| 442 const vec_u16 onev = vec_splat_u16(1); | 442 const vec_u16 onev = vec_splat_u16(1); |
| 443 const vec_u16 twov = vec_splat_u16(2); | 443 const vec_u16 twov = vec_splat_u16(2); |
| 444 const vec_u16 sixv = vec_splat_u16(6); | 444 const vec_u16 sixv = vec_splat_u16(6); |
| (...skipping 28 matching lines...) Expand all Loading... |
| 473 ALTIVEC_STORE_SUM_CLIP(&dst[5*stride], idct5, perm_ldv, perm_stv, sel); | 473 ALTIVEC_STORE_SUM_CLIP(&dst[5*stride], idct5, perm_ldv, perm_stv, sel); |
| 474 ALTIVEC_STORE_SUM_CLIP(&dst[6*stride], idct6, perm_ldv, perm_stv, sel); | 474 ALTIVEC_STORE_SUM_CLIP(&dst[6*stride], idct6, perm_ldv, perm_stv, sel); |
| 475 ALTIVEC_STORE_SUM_CLIP(&dst[7*stride], idct7, perm_ldv, perm_stv, sel); | 475 ALTIVEC_STORE_SUM_CLIP(&dst[7*stride], idct7, perm_ldv, perm_stv, sel); |
| 476 } | 476 } |
| 477 | 477 |
| 478 static av_always_inline void h264_idct_dc_add_internal(uint8_t *dst, DCTELEM *bl
ock, int stride, int size) | 478 static av_always_inline void h264_idct_dc_add_internal(uint8_t *dst, DCTELEM *bl
ock, int stride, int size) |
| 479 { | 479 { |
| 480 vec_s16 dc16; | 480 vec_s16 dc16; |
| 481 vec_u8 dcplus, dcminus, v0, v1, v2, v3, aligner; | 481 vec_u8 dcplus, dcminus, v0, v1, v2, v3, aligner; |
| 482 LOAD_ZERO; | 482 LOAD_ZERO; |
| 483 DECLARE_ALIGNED_16(int, dc); | 483 DECLARE_ALIGNED(16, int, dc); |
| 484 int i; | 484 int i; |
| 485 | 485 |
| 486 dc = (block[0] + 32) >> 6; | 486 dc = (block[0] + 32) >> 6; |
| 487 dc16 = vec_splat((vec_s16) vec_lde(0, &dc), 1); | 487 dc16 = vec_splat((vec_s16) vec_lde(0, &dc), 1); |
| 488 | 488 |
| 489 if (size == 4) | 489 if (size == 4) |
| 490 dc16 = vec_sld(dc16, zero_s16v, 8); | 490 dc16 = vec_sld(dc16, zero_s16v, 8); |
| 491 dcplus = vec_packsu(dc16, zero_s16v); | 491 dcplus = vec_packsu(dc16, zero_s16v); |
| 492 dcminus = vec_packsu(vec_sub(zero_s16v, dc16), zero_s16v); | 492 dcminus = vec_packsu(vec_sub(zero_s16v, dc16), zero_s16v); |
| 493 | 493 |
| (...skipping 89 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 583 \ | 583 \ |
| 584 r0 = vec_mergeh(r4, r6); /*all set 0*/ \ | 584 r0 = vec_mergeh(r4, r6); /*all set 0*/ \ |
| 585 r1 = vec_mergel(r4, r6); /*all set 1*/ \ | 585 r1 = vec_mergel(r4, r6); /*all set 1*/ \ |
| 586 r2 = vec_mergeh(r5, r7); /*all set 2*/ \ | 586 r2 = vec_mergeh(r5, r7); /*all set 2*/ \ |
| 587 r3 = vec_mergel(r5, r7); /*all set 3*/ \ | 587 r3 = vec_mergel(r5, r7); /*all set 3*/ \ |
| 588 } | 588 } |
| 589 | 589 |
| 590 static inline void write16x4(uint8_t *dst, int dst_stride, | 590 static inline void write16x4(uint8_t *dst, int dst_stride, |
| 591 register vec_u8 r0, register vec_u8 r1, | 591 register vec_u8 r0, register vec_u8 r1, |
| 592 register vec_u8 r2, register vec_u8 r3) { | 592 register vec_u8 r2, register vec_u8 r3) { |
| 593 DECLARE_ALIGNED_16(unsigned char, result)[64]; | 593 DECLARE_ALIGNED(16, unsigned char, result)[64]; |
| 594 uint32_t *src_int = (uint32_t *)result, *dst_int = (uint32_t *)dst; | 594 uint32_t *src_int = (uint32_t *)result, *dst_int = (uint32_t *)dst; |
| 595 int int_dst_stride = dst_stride/4; | 595 int int_dst_stride = dst_stride/4; |
| 596 | 596 |
| 597 vec_st(r0, 0, result); | 597 vec_st(r0, 0, result); |
| 598 vec_st(r1, 16, result); | 598 vec_st(r1, 16, result); |
| 599 vec_st(r2, 32, result); | 599 vec_st(r2, 32, result); |
| 600 vec_st(r3, 48, result); | 600 vec_st(r3, 48, result); |
| 601 /* FIXME: there has to be a better way!!!! */ | 601 /* FIXME: there has to be a better way!!!! */ |
| 602 *dst_int = *src_int; | 602 *dst_int = *src_int; |
| 603 *(dst_int+ int_dst_stride) = *(src_int + 1); | 603 *(dst_int+ int_dst_stride) = *(src_int + 1); |
| (...skipping 159 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 763 delta = vec_subs(stage2, vec160); /* d */
\ | 763 delta = vec_subs(stage2, vec160); /* d */
\ |
| 764 deltaneg = vec_min(tc0masked, deltaneg);
\ | 764 deltaneg = vec_min(tc0masked, deltaneg);
\ |
| 765 delta = vec_min(tc0masked, delta);
\ | 765 delta = vec_min(tc0masked, delta);
\ |
| 766 p0 = vec_subs(p0, deltaneg);
\ | 766 p0 = vec_subs(p0, deltaneg);
\ |
| 767 q0 = vec_subs(q0, delta);
\ | 767 q0 = vec_subs(q0, delta);
\ |
| 768 p0 = vec_adds(p0, delta);
\ | 768 p0 = vec_adds(p0, delta);
\ |
| 769 q0 = vec_adds(q0, deltaneg);
\ | 769 q0 = vec_adds(q0, deltaneg);
\ |
| 770 } | 770 } |
| 771 | 771 |
| 772 #define h264_loop_filter_luma_altivec(p2, p1, p0, q0, q1, q2, alpha, beta, tc0)
{ \ | 772 #define h264_loop_filter_luma_altivec(p2, p1, p0, q0, q1, q2, alpha, beta, tc0)
{ \ |
| 773 DECLARE_ALIGNED_16(unsigned char, temp)[16];
\ | 773 DECLARE_ALIGNED(16, unsigned char, temp)[16];
\ |
| 774 register vec_u8 alphavec;
\ | 774 register vec_u8 alphavec;
\ |
| 775 register vec_u8 betavec;
\ | 775 register vec_u8 betavec;
\ |
| 776 register vec_u8 mask;
\ | 776 register vec_u8 mask;
\ |
| 777 register vec_u8 p1mask;
\ | 777 register vec_u8 p1mask;
\ |
| 778 register vec_u8 q1mask;
\ | 778 register vec_u8 q1mask;
\ |
| 779 register vector signed char tc0vec;
\ | 779 register vector signed char tc0vec;
\ |
| 780 register vec_u8 finaltc0;
\ | 780 register vec_u8 finaltc0;
\ |
| 781 register vec_u8 tc0masked;
\ | 781 register vec_u8 tc0masked;
\ |
| 782 register vec_u8 newp1;
\ | 782 register vec_u8 newp1;
\ |
| 783 register vec_u8 newq1;
\ | 783 register vec_u8 newq1;
\ |
| (...skipping 59 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 843 write16x4(pix-2, stride, line1, line2, line3, line4); | 843 write16x4(pix-2, stride, line1, line2, line3, line4); |
| 844 } | 844 } |
| 845 | 845 |
| 846 static av_always_inline | 846 static av_always_inline |
| 847 void weight_h264_WxH_altivec(uint8_t *block, int stride, int log2_denom, int wei
ght, int offset, int w, int h) | 847 void weight_h264_WxH_altivec(uint8_t *block, int stride, int log2_denom, int wei
ght, int offset, int w, int h) |
| 848 { | 848 { |
| 849 int y, aligned; | 849 int y, aligned; |
| 850 vec_u8 vblock; | 850 vec_u8 vblock; |
| 851 vec_s16 vtemp, vweight, voffset, v0, v1; | 851 vec_s16 vtemp, vweight, voffset, v0, v1; |
| 852 vec_u16 vlog2_denom; | 852 vec_u16 vlog2_denom; |
| 853 DECLARE_ALIGNED_16(int32_t, temp)[4]; | 853 DECLARE_ALIGNED(16, int32_t, temp)[4]; |
| 854 LOAD_ZERO; | 854 LOAD_ZERO; |
| 855 | 855 |
| 856 offset <<= log2_denom; | 856 offset <<= log2_denom; |
| 857 if(log2_denom) offset += 1<<(log2_denom-1); | 857 if(log2_denom) offset += 1<<(log2_denom-1); |
| 858 temp[0] = log2_denom; | 858 temp[0] = log2_denom; |
| 859 temp[1] = weight; | 859 temp[1] = weight; |
| 860 temp[2] = offset; | 860 temp[2] = offset; |
| 861 | 861 |
| 862 vtemp = (vec_s16)vec_ld(0, temp); | 862 vtemp = (vec_s16)vec_ld(0, temp); |
| 863 vlog2_denom = (vec_u16)vec_splat(vtemp, 1); | 863 vlog2_denom = (vec_u16)vec_splat(vtemp, 1); |
| (...skipping 25 matching lines...) Expand all Loading... |
| 889 } | 889 } |
| 890 | 890 |
| 891 static av_always_inline | 891 static av_always_inline |
| 892 void biweight_h264_WxH_altivec(uint8_t *dst, uint8_t *src, int stride, int log2_
denom, | 892 void biweight_h264_WxH_altivec(uint8_t *dst, uint8_t *src, int stride, int log2_
denom, |
| 893 int weightd, int weights, int offset, int w, int
h) | 893 int weightd, int weights, int offset, int w, int
h) |
| 894 { | 894 { |
| 895 int y, dst_aligned, src_aligned; | 895 int y, dst_aligned, src_aligned; |
| 896 vec_u8 vsrc, vdst; | 896 vec_u8 vsrc, vdst; |
| 897 vec_s16 vtemp, vweights, vweightd, voffset, v0, v1, v2, v3; | 897 vec_s16 vtemp, vweights, vweightd, voffset, v0, v1, v2, v3; |
| 898 vec_u16 vlog2_denom; | 898 vec_u16 vlog2_denom; |
| 899 DECLARE_ALIGNED_16(int32_t, temp)[4]; | 899 DECLARE_ALIGNED(16, int32_t, temp)[4]; |
| 900 LOAD_ZERO; | 900 LOAD_ZERO; |
| 901 | 901 |
| 902 offset = ((offset + 1) | 1) << log2_denom; | 902 offset = ((offset + 1) | 1) << log2_denom; |
| 903 temp[0] = log2_denom+1; | 903 temp[0] = log2_denom+1; |
| 904 temp[1] = weights; | 904 temp[1] = weights; |
| 905 temp[2] = weightd; | 905 temp[2] = weightd; |
| 906 temp[3] = offset; | 906 temp[3] = offset; |
| 907 | 907 |
| 908 vtemp = (vec_s16)vec_ld(0, temp); | 908 vtemp = (vec_s16)vec_ld(0, temp); |
| 909 vlog2_denom = (vec_u16)vec_splat(vtemp, 1); | 909 vlog2_denom = (vec_u16)vec_splat(vtemp, 1); |
| (...skipping 102 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1012 c->weight_h264_pixels_tab[2] = ff_weight_h264_pixels8x16_altivec; | 1012 c->weight_h264_pixels_tab[2] = ff_weight_h264_pixels8x16_altivec; |
| 1013 c->weight_h264_pixels_tab[3] = ff_weight_h264_pixels8x8_altivec; | 1013 c->weight_h264_pixels_tab[3] = ff_weight_h264_pixels8x8_altivec; |
| 1014 c->weight_h264_pixels_tab[4] = ff_weight_h264_pixels8x4_altivec; | 1014 c->weight_h264_pixels_tab[4] = ff_weight_h264_pixels8x4_altivec; |
| 1015 c->biweight_h264_pixels_tab[0] = ff_biweight_h264_pixels16x16_altivec; | 1015 c->biweight_h264_pixels_tab[0] = ff_biweight_h264_pixels16x16_altivec; |
| 1016 c->biweight_h264_pixels_tab[1] = ff_biweight_h264_pixels16x8_altivec; | 1016 c->biweight_h264_pixels_tab[1] = ff_biweight_h264_pixels16x8_altivec; |
| 1017 c->biweight_h264_pixels_tab[2] = ff_biweight_h264_pixels8x16_altivec; | 1017 c->biweight_h264_pixels_tab[2] = ff_biweight_h264_pixels8x16_altivec; |
| 1018 c->biweight_h264_pixels_tab[3] = ff_biweight_h264_pixels8x8_altivec; | 1018 c->biweight_h264_pixels_tab[3] = ff_biweight_h264_pixels8x8_altivec; |
| 1019 c->biweight_h264_pixels_tab[4] = ff_biweight_h264_pixels8x4_altivec; | 1019 c->biweight_h264_pixels_tab[4] = ff_biweight_h264_pixels8x4_altivec; |
| 1020 } | 1020 } |
| 1021 } | 1021 } |
| OLD | NEW |