| OLD | NEW |
| 1 /* | 1 /* |
| 2 * Copyright (c) 2012 The WebM project authors. All Rights Reserved. | 2 * Copyright (c) 2012 The WebM project authors. All Rights Reserved. |
| 3 * | 3 * |
| 4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
| 5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
| 6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
| 7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
| 8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
| 9 */ | 9 */ |
| 10 | 10 |
| (...skipping 198 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 209 in[0] = _mm_add_epi16(u[0], u[1]); | 209 in[0] = _mm_add_epi16(u[0], u[1]); |
| 210 in[1] = _mm_sub_epi16(u[0], u[1]); | 210 in[1] = _mm_sub_epi16(u[0], u[1]); |
| 211 in[1] = _mm_shuffle_epi32(in[1], 0x4E); | 211 in[1] = _mm_shuffle_epi32(in[1], 0x4E); |
| 212 } | 212 } |
| 213 | 213 |
| 214 static void iadst4_sse2(__m128i *in) { | 214 static void iadst4_sse2(__m128i *in) { |
| 215 const __m128i k__sinpi_p01_p04 = pair_set_epi16(sinpi_1_9, sinpi_4_9); | 215 const __m128i k__sinpi_p01_p04 = pair_set_epi16(sinpi_1_9, sinpi_4_9); |
| 216 const __m128i k__sinpi_p03_p02 = pair_set_epi16(sinpi_3_9, sinpi_2_9); | 216 const __m128i k__sinpi_p03_p02 = pair_set_epi16(sinpi_3_9, sinpi_2_9); |
| 217 const __m128i k__sinpi_p02_m01 = pair_set_epi16(sinpi_2_9, -sinpi_1_9); | 217 const __m128i k__sinpi_p02_m01 = pair_set_epi16(sinpi_2_9, -sinpi_1_9); |
| 218 const __m128i k__sinpi_p03_m04 = pair_set_epi16(sinpi_3_9, -sinpi_4_9); | 218 const __m128i k__sinpi_p03_m04 = pair_set_epi16(sinpi_3_9, -sinpi_4_9); |
| 219 const __m128i k__sinpi_p03_p03 = _mm_set1_epi16(sinpi_3_9); | 219 const __m128i k__sinpi_p03_p03 = _mm_set1_epi16((int16_t)sinpi_3_9); |
| 220 const __m128i kZero = _mm_set1_epi16(0); | 220 const __m128i kZero = _mm_set1_epi16(0); |
| 221 const __m128i k__DCT_CONST_ROUNDING = _mm_set1_epi32(DCT_CONST_ROUNDING); | 221 const __m128i k__DCT_CONST_ROUNDING = _mm_set1_epi32(DCT_CONST_ROUNDING); |
| 222 __m128i u[8], v[8], in7; | 222 __m128i u[8], v[8], in7; |
| 223 | 223 |
| 224 transpose_4x4(in); | 224 transpose_4x4(in); |
| 225 in7 = _mm_srli_si128(in[1], 8); | 225 in7 = _mm_srli_si128(in[1], 8); |
| 226 in7 = _mm_add_epi16(in7, in[0]); | 226 in7 = _mm_add_epi16(in7, in[0]); |
| 227 in7 = _mm_sub_epi16(in7, in[1]); | 227 in7 = _mm_sub_epi16(in7, in[1]); |
| 228 | 228 |
| 229 u[0] = _mm_unpacklo_epi16(in[0], in[1]); | 229 u[0] = _mm_unpacklo_epi16(in[0], in[1]); |
| (...skipping 404 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 634 const __m128i k__cospi_p10_p22 = pair_set_epi16(cospi_10_64, cospi_22_64); | 634 const __m128i k__cospi_p10_p22 = pair_set_epi16(cospi_10_64, cospi_22_64); |
| 635 const __m128i k__cospi_p22_m10 = pair_set_epi16(cospi_22_64, -cospi_10_64); | 635 const __m128i k__cospi_p22_m10 = pair_set_epi16(cospi_22_64, -cospi_10_64); |
| 636 const __m128i k__cospi_p18_p14 = pair_set_epi16(cospi_18_64, cospi_14_64); | 636 const __m128i k__cospi_p18_p14 = pair_set_epi16(cospi_18_64, cospi_14_64); |
| 637 const __m128i k__cospi_p14_m18 = pair_set_epi16(cospi_14_64, -cospi_18_64); | 637 const __m128i k__cospi_p14_m18 = pair_set_epi16(cospi_14_64, -cospi_18_64); |
| 638 const __m128i k__cospi_p26_p06 = pair_set_epi16(cospi_26_64, cospi_6_64); | 638 const __m128i k__cospi_p26_p06 = pair_set_epi16(cospi_26_64, cospi_6_64); |
| 639 const __m128i k__cospi_p06_m26 = pair_set_epi16(cospi_6_64, -cospi_26_64); | 639 const __m128i k__cospi_p06_m26 = pair_set_epi16(cospi_6_64, -cospi_26_64); |
| 640 const __m128i k__cospi_p08_p24 = pair_set_epi16(cospi_8_64, cospi_24_64); | 640 const __m128i k__cospi_p08_p24 = pair_set_epi16(cospi_8_64, cospi_24_64); |
| 641 const __m128i k__cospi_p24_m08 = pair_set_epi16(cospi_24_64, -cospi_8_64); | 641 const __m128i k__cospi_p24_m08 = pair_set_epi16(cospi_24_64, -cospi_8_64); |
| 642 const __m128i k__cospi_m24_p08 = pair_set_epi16(-cospi_24_64, cospi_8_64); | 642 const __m128i k__cospi_m24_p08 = pair_set_epi16(-cospi_24_64, cospi_8_64); |
| 643 const __m128i k__cospi_p16_m16 = pair_set_epi16(cospi_16_64, -cospi_16_64); | 643 const __m128i k__cospi_p16_m16 = pair_set_epi16(cospi_16_64, -cospi_16_64); |
| 644 const __m128i k__cospi_p16_p16 = _mm_set1_epi16(cospi_16_64); | 644 const __m128i k__cospi_p16_p16 = _mm_set1_epi16((int16_t)cospi_16_64); |
| 645 const __m128i k__const_0 = _mm_set1_epi16(0); | 645 const __m128i k__const_0 = _mm_set1_epi16(0); |
| 646 const __m128i k__DCT_CONST_ROUNDING = _mm_set1_epi32(DCT_CONST_ROUNDING); | 646 const __m128i k__DCT_CONST_ROUNDING = _mm_set1_epi32(DCT_CONST_ROUNDING); |
| 647 | 647 |
| 648 __m128i u0, u1, u2, u3, u4, u5, u6, u7, u8, u9, u10, u11, u12, u13, u14, u15; | 648 __m128i u0, u1, u2, u3, u4, u5, u6, u7, u8, u9, u10, u11, u12, u13, u14, u15; |
| 649 __m128i v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15; | 649 __m128i v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15; |
| 650 __m128i w0, w1, w2, w3, w4, w5, w6, w7, w8, w9, w10, w11, w12, w13, w14, w15; | 650 __m128i w0, w1, w2, w3, w4, w5, w6, w7, w8, w9, w10, w11, w12, w13, w14, w15; |
| 651 __m128i s0, s1, s2, s3, s4, s5, s6, s7; | 651 __m128i s0, s1, s2, s3, s4, s5, s6, s7; |
| 652 __m128i in0, in1, in2, in3, in4, in5, in6, in7; | 652 __m128i in0, in1, in2, in3, in4, in5, in6, in7; |
| 653 | 653 |
| 654 // transpose | 654 // transpose |
| (...skipping 868 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1523 const __m128i k__cospi_p03_m29 = pair_set_epi16(cospi_3_64, -cospi_29_64); | 1523 const __m128i k__cospi_p03_m29 = pair_set_epi16(cospi_3_64, -cospi_29_64); |
| 1524 const __m128i k__cospi_p04_p28 = pair_set_epi16(cospi_4_64, cospi_28_64); | 1524 const __m128i k__cospi_p04_p28 = pair_set_epi16(cospi_4_64, cospi_28_64); |
| 1525 const __m128i k__cospi_p28_m04 = pair_set_epi16(cospi_28_64, -cospi_4_64); | 1525 const __m128i k__cospi_p28_m04 = pair_set_epi16(cospi_28_64, -cospi_4_64); |
| 1526 const __m128i k__cospi_p20_p12 = pair_set_epi16(cospi_20_64, cospi_12_64); | 1526 const __m128i k__cospi_p20_p12 = pair_set_epi16(cospi_20_64, cospi_12_64); |
| 1527 const __m128i k__cospi_p12_m20 = pair_set_epi16(cospi_12_64, -cospi_20_64); | 1527 const __m128i k__cospi_p12_m20 = pair_set_epi16(cospi_12_64, -cospi_20_64); |
| 1528 const __m128i k__cospi_m28_p04 = pair_set_epi16(-cospi_28_64, cospi_4_64); | 1528 const __m128i k__cospi_m28_p04 = pair_set_epi16(-cospi_28_64, cospi_4_64); |
| 1529 const __m128i k__cospi_m12_p20 = pair_set_epi16(-cospi_12_64, cospi_20_64); | 1529 const __m128i k__cospi_m12_p20 = pair_set_epi16(-cospi_12_64, cospi_20_64); |
| 1530 const __m128i k__cospi_p08_p24 = pair_set_epi16(cospi_8_64, cospi_24_64); | 1530 const __m128i k__cospi_p08_p24 = pair_set_epi16(cospi_8_64, cospi_24_64); |
| 1531 const __m128i k__cospi_p24_m08 = pair_set_epi16(cospi_24_64, -cospi_8_64); | 1531 const __m128i k__cospi_p24_m08 = pair_set_epi16(cospi_24_64, -cospi_8_64); |
| 1532 const __m128i k__cospi_m24_p08 = pair_set_epi16(-cospi_24_64, cospi_8_64); | 1532 const __m128i k__cospi_m24_p08 = pair_set_epi16(-cospi_24_64, cospi_8_64); |
| 1533 const __m128i k__cospi_m16_m16 = _mm_set1_epi16(-cospi_16_64); | 1533 const __m128i k__cospi_m16_m16 = _mm_set1_epi16((int16_t)-cospi_16_64); |
| 1534 const __m128i k__cospi_p16_p16 = _mm_set1_epi16(cospi_16_64); | 1534 const __m128i k__cospi_p16_p16 = _mm_set1_epi16((int16_t)cospi_16_64); |
| 1535 const __m128i k__cospi_p16_m16 = pair_set_epi16(cospi_16_64, -cospi_16_64); | 1535 const __m128i k__cospi_p16_m16 = pair_set_epi16(cospi_16_64, -cospi_16_64); |
| 1536 const __m128i k__cospi_m16_p16 = pair_set_epi16(-cospi_16_64, cospi_16_64); | 1536 const __m128i k__cospi_m16_p16 = pair_set_epi16(-cospi_16_64, cospi_16_64); |
| 1537 const __m128i k__DCT_CONST_ROUNDING = _mm_set1_epi32(DCT_CONST_ROUNDING); | 1537 const __m128i k__DCT_CONST_ROUNDING = _mm_set1_epi32(DCT_CONST_ROUNDING); |
| 1538 const __m128i kZero = _mm_set1_epi16(0); | 1538 const __m128i kZero = _mm_set1_epi16(0); |
| 1539 | 1539 |
| 1540 u[0] = _mm_unpacklo_epi16(in[15], in[0]); | 1540 u[0] = _mm_unpacklo_epi16(in[15], in[0]); |
| 1541 u[1] = _mm_unpackhi_epi16(in[15], in[0]); | 1541 u[1] = _mm_unpackhi_epi16(in[15], in[0]); |
| 1542 u[2] = _mm_unpacklo_epi16(in[13], in[2]); | 1542 u[2] = _mm_unpacklo_epi16(in[13], in[2]); |
| 1543 u[3] = _mm_unpackhi_epi16(in[13], in[2]); | 1543 u[3] = _mm_unpackhi_epi16(in[13], in[2]); |
| 1544 u[4] = _mm_unpacklo_epi16(in[11], in[4]); | 1544 u[4] = _mm_unpacklo_epi16(in[11], in[4]); |
| (...skipping 433 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1978 const __m128i k__cospi_p14_m18 = pair_set_epi16(cospi_14_64, -cospi_18_64); | 1978 const __m128i k__cospi_p14_m18 = pair_set_epi16(cospi_14_64, -cospi_18_64); |
| 1979 const __m128i k__cospi_p18_p14 = pair_set_epi16(cospi_18_64, cospi_14_64); | 1979 const __m128i k__cospi_p18_p14 = pair_set_epi16(cospi_18_64, cospi_14_64); |
| 1980 const __m128i k__cospi_p22_m10 = pair_set_epi16(cospi_22_64, -cospi_10_64); | 1980 const __m128i k__cospi_p22_m10 = pair_set_epi16(cospi_22_64, -cospi_10_64); |
| 1981 const __m128i k__cospi_p10_p22 = pair_set_epi16(cospi_10_64, cospi_22_64); | 1981 const __m128i k__cospi_p10_p22 = pair_set_epi16(cospi_10_64, cospi_22_64); |
| 1982 const __m128i k__cospi_p06_m26 = pair_set_epi16(cospi_6_64, -cospi_26_64); | 1982 const __m128i k__cospi_p06_m26 = pair_set_epi16(cospi_6_64, -cospi_26_64); |
| 1983 const __m128i k__cospi_p26_p06 = pair_set_epi16(cospi_26_64, cospi_6_64); | 1983 const __m128i k__cospi_p26_p06 = pair_set_epi16(cospi_26_64, cospi_6_64); |
| 1984 const __m128i k__cospi_p28_m04 = pair_set_epi16(cospi_28_64, -cospi_4_64); | 1984 const __m128i k__cospi_p28_m04 = pair_set_epi16(cospi_28_64, -cospi_4_64); |
| 1985 const __m128i k__cospi_p04_p28 = pair_set_epi16(cospi_4_64, cospi_28_64); | 1985 const __m128i k__cospi_p04_p28 = pair_set_epi16(cospi_4_64, cospi_28_64); |
| 1986 const __m128i k__cospi_p12_m20 = pair_set_epi16(cospi_12_64, -cospi_20_64); | 1986 const __m128i k__cospi_p12_m20 = pair_set_epi16(cospi_12_64, -cospi_20_64); |
| 1987 const __m128i k__cospi_p20_p12 = pair_set_epi16(cospi_20_64, cospi_12_64); | 1987 const __m128i k__cospi_p20_p12 = pair_set_epi16(cospi_20_64, cospi_12_64); |
| 1988 const __m128i k__cospi_p16_p16 = _mm_set1_epi16(cospi_16_64); | 1988 const __m128i k__cospi_p16_p16 = _mm_set1_epi16((int16_t)cospi_16_64); |
| 1989 const __m128i k__cospi_p16_m16 = pair_set_epi16(cospi_16_64, -cospi_16_64); | 1989 const __m128i k__cospi_p16_m16 = pair_set_epi16(cospi_16_64, -cospi_16_64); |
| 1990 const __m128i k__cospi_p24_m08 = pair_set_epi16(cospi_24_64, -cospi_8_64); | 1990 const __m128i k__cospi_p24_m08 = pair_set_epi16(cospi_24_64, -cospi_8_64); |
| 1991 const __m128i k__cospi_p08_p24 = pair_set_epi16(cospi_8_64, cospi_24_64); | 1991 const __m128i k__cospi_p08_p24 = pair_set_epi16(cospi_8_64, cospi_24_64); |
| 1992 const __m128i k__cospi_m08_p24 = pair_set_epi16(-cospi_8_64, cospi_24_64); | 1992 const __m128i k__cospi_m08_p24 = pair_set_epi16(-cospi_8_64, cospi_24_64); |
| 1993 const __m128i k__cospi_p24_p08 = pair_set_epi16(cospi_24_64, cospi_8_64); | 1993 const __m128i k__cospi_p24_p08 = pair_set_epi16(cospi_24_64, cospi_8_64); |
| 1994 const __m128i k__cospi_m24_m08 = pair_set_epi16(-cospi_24_64, -cospi_8_64); | 1994 const __m128i k__cospi_m24_m08 = pair_set_epi16(-cospi_24_64, -cospi_8_64); |
| 1995 const __m128i k__cospi_m16_p16 = pair_set_epi16(-cospi_16_64, cospi_16_64); | 1995 const __m128i k__cospi_m16_p16 = pair_set_epi16(-cospi_16_64, cospi_16_64); |
| 1996 const __m128i k__DCT_CONST_ROUNDING = _mm_set1_epi32(DCT_CONST_ROUNDING); | 1996 const __m128i k__DCT_CONST_ROUNDING = _mm_set1_epi32(DCT_CONST_ROUNDING); |
| 1997 __m128i v[16], u[16], s[16], t[16]; | 1997 __m128i v[16], u[16], s[16], t[16]; |
| 1998 | 1998 |
| (...skipping 1979 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 3978 RECON_AND_STORE(dest, dc_value); | 3978 RECON_AND_STORE(dest, dc_value); |
| 3979 RECON_AND_STORE(dest, dc_value); | 3979 RECON_AND_STORE(dest, dc_value); |
| 3980 RECON_AND_STORE(dest, dc_value); | 3980 RECON_AND_STORE(dest, dc_value); |
| 3981 RECON_AND_STORE(dest, dc_value); | 3981 RECON_AND_STORE(dest, dc_value); |
| 3982 RECON_AND_STORE(dest, dc_value); | 3982 RECON_AND_STORE(dest, dc_value); |
| 3983 RECON_AND_STORE(dest, dc_value); | 3983 RECON_AND_STORE(dest, dc_value); |
| 3984 RECON_AND_STORE(dest, dc_value); | 3984 RECON_AND_STORE(dest, dc_value); |
| 3985 dest += 8 - (stride * 32); | 3985 dest += 8 - (stride * 32); |
| 3986 } | 3986 } |
| 3987 } | 3987 } |
| OLD | NEW |