OLD | NEW |
1 /* | 1 /* |
2 * Copyright (c) 2012 The WebM project authors. All Rights Reserved. | 2 * Copyright (c) 2012 The WebM project authors. All Rights Reserved. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
9 */ | 9 */ |
10 | 10 |
(...skipping 198 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
209 in[0] = _mm_add_epi16(u[0], u[1]); | 209 in[0] = _mm_add_epi16(u[0], u[1]); |
210 in[1] = _mm_sub_epi16(u[0], u[1]); | 210 in[1] = _mm_sub_epi16(u[0], u[1]); |
211 in[1] = _mm_shuffle_epi32(in[1], 0x4E); | 211 in[1] = _mm_shuffle_epi32(in[1], 0x4E); |
212 } | 212 } |
213 | 213 |
214 static void iadst4_sse2(__m128i *in) { | 214 static void iadst4_sse2(__m128i *in) { |
215 const __m128i k__sinpi_p01_p04 = pair_set_epi16(sinpi_1_9, sinpi_4_9); | 215 const __m128i k__sinpi_p01_p04 = pair_set_epi16(sinpi_1_9, sinpi_4_9); |
216 const __m128i k__sinpi_p03_p02 = pair_set_epi16(sinpi_3_9, sinpi_2_9); | 216 const __m128i k__sinpi_p03_p02 = pair_set_epi16(sinpi_3_9, sinpi_2_9); |
217 const __m128i k__sinpi_p02_m01 = pair_set_epi16(sinpi_2_9, -sinpi_1_9); | 217 const __m128i k__sinpi_p02_m01 = pair_set_epi16(sinpi_2_9, -sinpi_1_9); |
218 const __m128i k__sinpi_p03_m04 = pair_set_epi16(sinpi_3_9, -sinpi_4_9); | 218 const __m128i k__sinpi_p03_m04 = pair_set_epi16(sinpi_3_9, -sinpi_4_9); |
219 const __m128i k__sinpi_p03_p03 = _mm_set1_epi16(sinpi_3_9); | 219 const __m128i k__sinpi_p03_p03 = _mm_set1_epi16((int16_t)sinpi_3_9); |
220 const __m128i kZero = _mm_set1_epi16(0); | 220 const __m128i kZero = _mm_set1_epi16(0); |
221 const __m128i k__DCT_CONST_ROUNDING = _mm_set1_epi32(DCT_CONST_ROUNDING); | 221 const __m128i k__DCT_CONST_ROUNDING = _mm_set1_epi32(DCT_CONST_ROUNDING); |
222 __m128i u[8], v[8], in7; | 222 __m128i u[8], v[8], in7; |
223 | 223 |
224 transpose_4x4(in); | 224 transpose_4x4(in); |
225 in7 = _mm_srli_si128(in[1], 8); | 225 in7 = _mm_srli_si128(in[1], 8); |
226 in7 = _mm_add_epi16(in7, in[0]); | 226 in7 = _mm_add_epi16(in7, in[0]); |
227 in7 = _mm_sub_epi16(in7, in[1]); | 227 in7 = _mm_sub_epi16(in7, in[1]); |
228 | 228 |
229 u[0] = _mm_unpacklo_epi16(in[0], in[1]); | 229 u[0] = _mm_unpacklo_epi16(in[0], in[1]); |
(...skipping 404 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
634 const __m128i k__cospi_p10_p22 = pair_set_epi16(cospi_10_64, cospi_22_64); | 634 const __m128i k__cospi_p10_p22 = pair_set_epi16(cospi_10_64, cospi_22_64); |
635 const __m128i k__cospi_p22_m10 = pair_set_epi16(cospi_22_64, -cospi_10_64); | 635 const __m128i k__cospi_p22_m10 = pair_set_epi16(cospi_22_64, -cospi_10_64); |
636 const __m128i k__cospi_p18_p14 = pair_set_epi16(cospi_18_64, cospi_14_64); | 636 const __m128i k__cospi_p18_p14 = pair_set_epi16(cospi_18_64, cospi_14_64); |
637 const __m128i k__cospi_p14_m18 = pair_set_epi16(cospi_14_64, -cospi_18_64); | 637 const __m128i k__cospi_p14_m18 = pair_set_epi16(cospi_14_64, -cospi_18_64); |
638 const __m128i k__cospi_p26_p06 = pair_set_epi16(cospi_26_64, cospi_6_64); | 638 const __m128i k__cospi_p26_p06 = pair_set_epi16(cospi_26_64, cospi_6_64); |
639 const __m128i k__cospi_p06_m26 = pair_set_epi16(cospi_6_64, -cospi_26_64); | 639 const __m128i k__cospi_p06_m26 = pair_set_epi16(cospi_6_64, -cospi_26_64); |
640 const __m128i k__cospi_p08_p24 = pair_set_epi16(cospi_8_64, cospi_24_64); | 640 const __m128i k__cospi_p08_p24 = pair_set_epi16(cospi_8_64, cospi_24_64); |
641 const __m128i k__cospi_p24_m08 = pair_set_epi16(cospi_24_64, -cospi_8_64); | 641 const __m128i k__cospi_p24_m08 = pair_set_epi16(cospi_24_64, -cospi_8_64); |
642 const __m128i k__cospi_m24_p08 = pair_set_epi16(-cospi_24_64, cospi_8_64); | 642 const __m128i k__cospi_m24_p08 = pair_set_epi16(-cospi_24_64, cospi_8_64); |
643 const __m128i k__cospi_p16_m16 = pair_set_epi16(cospi_16_64, -cospi_16_64); | 643 const __m128i k__cospi_p16_m16 = pair_set_epi16(cospi_16_64, -cospi_16_64); |
644 const __m128i k__cospi_p16_p16 = _mm_set1_epi16(cospi_16_64); | 644 const __m128i k__cospi_p16_p16 = _mm_set1_epi16((int16_t)cospi_16_64); |
645 const __m128i k__const_0 = _mm_set1_epi16(0); | 645 const __m128i k__const_0 = _mm_set1_epi16(0); |
646 const __m128i k__DCT_CONST_ROUNDING = _mm_set1_epi32(DCT_CONST_ROUNDING); | 646 const __m128i k__DCT_CONST_ROUNDING = _mm_set1_epi32(DCT_CONST_ROUNDING); |
647 | 647 |
648 __m128i u0, u1, u2, u3, u4, u5, u6, u7, u8, u9, u10, u11, u12, u13, u14, u15; | 648 __m128i u0, u1, u2, u3, u4, u5, u6, u7, u8, u9, u10, u11, u12, u13, u14, u15; |
649 __m128i v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15; | 649 __m128i v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15; |
650 __m128i w0, w1, w2, w3, w4, w5, w6, w7, w8, w9, w10, w11, w12, w13, w14, w15; | 650 __m128i w0, w1, w2, w3, w4, w5, w6, w7, w8, w9, w10, w11, w12, w13, w14, w15; |
651 __m128i s0, s1, s2, s3, s4, s5, s6, s7; | 651 __m128i s0, s1, s2, s3, s4, s5, s6, s7; |
652 __m128i in0, in1, in2, in3, in4, in5, in6, in7; | 652 __m128i in0, in1, in2, in3, in4, in5, in6, in7; |
653 | 653 |
654 // transpose | 654 // transpose |
(...skipping 868 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1523 const __m128i k__cospi_p03_m29 = pair_set_epi16(cospi_3_64, -cospi_29_64); | 1523 const __m128i k__cospi_p03_m29 = pair_set_epi16(cospi_3_64, -cospi_29_64); |
1524 const __m128i k__cospi_p04_p28 = pair_set_epi16(cospi_4_64, cospi_28_64); | 1524 const __m128i k__cospi_p04_p28 = pair_set_epi16(cospi_4_64, cospi_28_64); |
1525 const __m128i k__cospi_p28_m04 = pair_set_epi16(cospi_28_64, -cospi_4_64); | 1525 const __m128i k__cospi_p28_m04 = pair_set_epi16(cospi_28_64, -cospi_4_64); |
1526 const __m128i k__cospi_p20_p12 = pair_set_epi16(cospi_20_64, cospi_12_64); | 1526 const __m128i k__cospi_p20_p12 = pair_set_epi16(cospi_20_64, cospi_12_64); |
1527 const __m128i k__cospi_p12_m20 = pair_set_epi16(cospi_12_64, -cospi_20_64); | 1527 const __m128i k__cospi_p12_m20 = pair_set_epi16(cospi_12_64, -cospi_20_64); |
1528 const __m128i k__cospi_m28_p04 = pair_set_epi16(-cospi_28_64, cospi_4_64); | 1528 const __m128i k__cospi_m28_p04 = pair_set_epi16(-cospi_28_64, cospi_4_64); |
1529 const __m128i k__cospi_m12_p20 = pair_set_epi16(-cospi_12_64, cospi_20_64); | 1529 const __m128i k__cospi_m12_p20 = pair_set_epi16(-cospi_12_64, cospi_20_64); |
1530 const __m128i k__cospi_p08_p24 = pair_set_epi16(cospi_8_64, cospi_24_64); | 1530 const __m128i k__cospi_p08_p24 = pair_set_epi16(cospi_8_64, cospi_24_64); |
1531 const __m128i k__cospi_p24_m08 = pair_set_epi16(cospi_24_64, -cospi_8_64); | 1531 const __m128i k__cospi_p24_m08 = pair_set_epi16(cospi_24_64, -cospi_8_64); |
1532 const __m128i k__cospi_m24_p08 = pair_set_epi16(-cospi_24_64, cospi_8_64); | 1532 const __m128i k__cospi_m24_p08 = pair_set_epi16(-cospi_24_64, cospi_8_64); |
1533 const __m128i k__cospi_m16_m16 = _mm_set1_epi16(-cospi_16_64); | 1533 const __m128i k__cospi_m16_m16 = _mm_set1_epi16((int16_t)-cospi_16_64); |
1534 const __m128i k__cospi_p16_p16 = _mm_set1_epi16(cospi_16_64); | 1534 const __m128i k__cospi_p16_p16 = _mm_set1_epi16((int16_t)cospi_16_64); |
1535 const __m128i k__cospi_p16_m16 = pair_set_epi16(cospi_16_64, -cospi_16_64); | 1535 const __m128i k__cospi_p16_m16 = pair_set_epi16(cospi_16_64, -cospi_16_64); |
1536 const __m128i k__cospi_m16_p16 = pair_set_epi16(-cospi_16_64, cospi_16_64); | 1536 const __m128i k__cospi_m16_p16 = pair_set_epi16(-cospi_16_64, cospi_16_64); |
1537 const __m128i k__DCT_CONST_ROUNDING = _mm_set1_epi32(DCT_CONST_ROUNDING); | 1537 const __m128i k__DCT_CONST_ROUNDING = _mm_set1_epi32(DCT_CONST_ROUNDING); |
1538 const __m128i kZero = _mm_set1_epi16(0); | 1538 const __m128i kZero = _mm_set1_epi16(0); |
1539 | 1539 |
1540 u[0] = _mm_unpacklo_epi16(in[15], in[0]); | 1540 u[0] = _mm_unpacklo_epi16(in[15], in[0]); |
1541 u[1] = _mm_unpackhi_epi16(in[15], in[0]); | 1541 u[1] = _mm_unpackhi_epi16(in[15], in[0]); |
1542 u[2] = _mm_unpacklo_epi16(in[13], in[2]); | 1542 u[2] = _mm_unpacklo_epi16(in[13], in[2]); |
1543 u[3] = _mm_unpackhi_epi16(in[13], in[2]); | 1543 u[3] = _mm_unpackhi_epi16(in[13], in[2]); |
1544 u[4] = _mm_unpacklo_epi16(in[11], in[4]); | 1544 u[4] = _mm_unpacklo_epi16(in[11], in[4]); |
(...skipping 433 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1978 const __m128i k__cospi_p14_m18 = pair_set_epi16(cospi_14_64, -cospi_18_64); | 1978 const __m128i k__cospi_p14_m18 = pair_set_epi16(cospi_14_64, -cospi_18_64); |
1979 const __m128i k__cospi_p18_p14 = pair_set_epi16(cospi_18_64, cospi_14_64); | 1979 const __m128i k__cospi_p18_p14 = pair_set_epi16(cospi_18_64, cospi_14_64); |
1980 const __m128i k__cospi_p22_m10 = pair_set_epi16(cospi_22_64, -cospi_10_64); | 1980 const __m128i k__cospi_p22_m10 = pair_set_epi16(cospi_22_64, -cospi_10_64); |
1981 const __m128i k__cospi_p10_p22 = pair_set_epi16(cospi_10_64, cospi_22_64); | 1981 const __m128i k__cospi_p10_p22 = pair_set_epi16(cospi_10_64, cospi_22_64); |
1982 const __m128i k__cospi_p06_m26 = pair_set_epi16(cospi_6_64, -cospi_26_64); | 1982 const __m128i k__cospi_p06_m26 = pair_set_epi16(cospi_6_64, -cospi_26_64); |
1983 const __m128i k__cospi_p26_p06 = pair_set_epi16(cospi_26_64, cospi_6_64); | 1983 const __m128i k__cospi_p26_p06 = pair_set_epi16(cospi_26_64, cospi_6_64); |
1984 const __m128i k__cospi_p28_m04 = pair_set_epi16(cospi_28_64, -cospi_4_64); | 1984 const __m128i k__cospi_p28_m04 = pair_set_epi16(cospi_28_64, -cospi_4_64); |
1985 const __m128i k__cospi_p04_p28 = pair_set_epi16(cospi_4_64, cospi_28_64); | 1985 const __m128i k__cospi_p04_p28 = pair_set_epi16(cospi_4_64, cospi_28_64); |
1986 const __m128i k__cospi_p12_m20 = pair_set_epi16(cospi_12_64, -cospi_20_64); | 1986 const __m128i k__cospi_p12_m20 = pair_set_epi16(cospi_12_64, -cospi_20_64); |
1987 const __m128i k__cospi_p20_p12 = pair_set_epi16(cospi_20_64, cospi_12_64); | 1987 const __m128i k__cospi_p20_p12 = pair_set_epi16(cospi_20_64, cospi_12_64); |
1988 const __m128i k__cospi_p16_p16 = _mm_set1_epi16(cospi_16_64); | 1988 const __m128i k__cospi_p16_p16 = _mm_set1_epi16((int16_t)cospi_16_64); |
1989 const __m128i k__cospi_p16_m16 = pair_set_epi16(cospi_16_64, -cospi_16_64); | 1989 const __m128i k__cospi_p16_m16 = pair_set_epi16(cospi_16_64, -cospi_16_64); |
1990 const __m128i k__cospi_p24_m08 = pair_set_epi16(cospi_24_64, -cospi_8_64); | 1990 const __m128i k__cospi_p24_m08 = pair_set_epi16(cospi_24_64, -cospi_8_64); |
1991 const __m128i k__cospi_p08_p24 = pair_set_epi16(cospi_8_64, cospi_24_64); | 1991 const __m128i k__cospi_p08_p24 = pair_set_epi16(cospi_8_64, cospi_24_64); |
1992 const __m128i k__cospi_m08_p24 = pair_set_epi16(-cospi_8_64, cospi_24_64); | 1992 const __m128i k__cospi_m08_p24 = pair_set_epi16(-cospi_8_64, cospi_24_64); |
1993 const __m128i k__cospi_p24_p08 = pair_set_epi16(cospi_24_64, cospi_8_64); | 1993 const __m128i k__cospi_p24_p08 = pair_set_epi16(cospi_24_64, cospi_8_64); |
1994 const __m128i k__cospi_m24_m08 = pair_set_epi16(-cospi_24_64, -cospi_8_64); | 1994 const __m128i k__cospi_m24_m08 = pair_set_epi16(-cospi_24_64, -cospi_8_64); |
1995 const __m128i k__cospi_m16_p16 = pair_set_epi16(-cospi_16_64, cospi_16_64); | 1995 const __m128i k__cospi_m16_p16 = pair_set_epi16(-cospi_16_64, cospi_16_64); |
1996 const __m128i k__DCT_CONST_ROUNDING = _mm_set1_epi32(DCT_CONST_ROUNDING); | 1996 const __m128i k__DCT_CONST_ROUNDING = _mm_set1_epi32(DCT_CONST_ROUNDING); |
1997 __m128i v[16], u[16], s[16], t[16]; | 1997 __m128i v[16], u[16], s[16], t[16]; |
1998 | 1998 |
(...skipping 1979 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
3978 RECON_AND_STORE(dest, dc_value); | 3978 RECON_AND_STORE(dest, dc_value); |
3979 RECON_AND_STORE(dest, dc_value); | 3979 RECON_AND_STORE(dest, dc_value); |
3980 RECON_AND_STORE(dest, dc_value); | 3980 RECON_AND_STORE(dest, dc_value); |
3981 RECON_AND_STORE(dest, dc_value); | 3981 RECON_AND_STORE(dest, dc_value); |
3982 RECON_AND_STORE(dest, dc_value); | 3982 RECON_AND_STORE(dest, dc_value); |
3983 RECON_AND_STORE(dest, dc_value); | 3983 RECON_AND_STORE(dest, dc_value); |
3984 RECON_AND_STORE(dest, dc_value); | 3984 RECON_AND_STORE(dest, dc_value); |
3985 dest += 8 - (stride * 32); | 3985 dest += 8 - (stride * 32); |
3986 } | 3986 } |
3987 } | 3987 } |
OLD | NEW |