OLD | NEW |
1 /* | 1 /* |
2 * Copyright (c) 2012 The WebM project authors. All Rights Reserved. | 2 * Copyright (c) 2012 The WebM project authors. All Rights Reserved. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
9 */ | 9 */ |
10 | 10 |
(...skipping 17 matching lines...) Expand all Loading... |
28 if (cmp) \ | 28 if (cmp) \ |
29 break; \ | 29 break; \ |
30 qcoeff_ptr[z] = y[z]; \ | 30 qcoeff_ptr[z] = y[z]; \ |
31 eob = i; \ | 31 eob = i; \ |
32 zbin_boost_ptr = b->zrun_zbin_boost; \ | 32 zbin_boost_ptr = b->zrun_zbin_boost; \ |
33 } while (0) | 33 } while (0) |
34 | 34 |
35 void vp8_regular_quantize_b_sse2(BLOCK *b, BLOCKD *d) | 35 void vp8_regular_quantize_b_sse2(BLOCK *b, BLOCKD *d) |
36 { | 36 { |
37 char eob = 0; | 37 char eob = 0; |
38 short *zbin_boost_ptr = b->zrun_zbin_boost; | 38 short *zbin_boost_ptr; |
39 short *qcoeff_ptr = d->qcoeff; | 39 short *qcoeff_ptr = d->qcoeff; |
40 DECLARE_ALIGNED_ARRAY(16, short, x, 16); | 40 DECLARE_ALIGNED(16, short, x[16]); |
41 DECLARE_ALIGNED_ARRAY(16, short, y, 16); | 41 DECLARE_ALIGNED(16, short, y[16]); |
42 | 42 |
43 __m128i sz0, x0, sz1, x1, y0, y1, x_minus_zbin0, x_minus_zbin1; | 43 __m128i sz0, x0, sz1, x1, y0, y1, x_minus_zbin0, x_minus_zbin1; |
44 __m128i quant_shift0 = _mm_load_si128((__m128i *)(b->quant_shift)); | 44 __m128i quant_shift0 = _mm_load_si128((__m128i *)(b->quant_shift)); |
45 __m128i quant_shift1 = _mm_load_si128((__m128i *)(b->quant_shift + 8)); | 45 __m128i quant_shift1 = _mm_load_si128((__m128i *)(b->quant_shift + 8)); |
46 __m128i z0 = _mm_load_si128((__m128i *)(b->coeff)); | 46 __m128i z0 = _mm_load_si128((__m128i *)(b->coeff)); |
47 __m128i z1 = _mm_load_si128((__m128i *)(b->coeff+8)); | 47 __m128i z1 = _mm_load_si128((__m128i *)(b->coeff+8)); |
48 __m128i zbin_extra = _mm_cvtsi32_si128(b->zbin_extra); | 48 __m128i zbin_extra = _mm_cvtsi32_si128(b->zbin_extra); |
49 __m128i zbin0 = _mm_load_si128((__m128i *)(b->zbin)); | 49 __m128i zbin0 = _mm_load_si128((__m128i *)(b->zbin)); |
50 __m128i zbin1 = _mm_load_si128((__m128i *)(b->zbin + 8)); | 50 __m128i zbin1 = _mm_load_si128((__m128i *)(b->zbin + 8)); |
51 __m128i round0 = _mm_load_si128((__m128i *)(b->round)); | 51 __m128i round0 = _mm_load_si128((__m128i *)(b->round)); |
52 __m128i round1 = _mm_load_si128((__m128i *)(b->round + 8)); | 52 __m128i round1 = _mm_load_si128((__m128i *)(b->round + 8)); |
53 __m128i quant0 = _mm_load_si128((__m128i *)(b->quant)); | 53 __m128i quant0 = _mm_load_si128((__m128i *)(b->quant)); |
54 __m128i quant1 = _mm_load_si128((__m128i *)(b->quant + 8)); | 54 __m128i quant1 = _mm_load_si128((__m128i *)(b->quant + 8)); |
55 __m128i dequant0 = _mm_load_si128((__m128i *)(d->dequant)); | 55 __m128i dequant0 = _mm_load_si128((__m128i *)(d->dequant)); |
56 __m128i dequant1 = _mm_load_si128((__m128i *)(d->dequant + 8)); | 56 __m128i dequant1 = _mm_load_si128((__m128i *)(d->dequant + 8)); |
57 | 57 |
58 vpx_memset(qcoeff_ptr, 0, 32); | 58 memset(qcoeff_ptr, 0, 32); |
59 | 59 |
60 /* Duplicate to all lanes. */ | 60 /* Duplicate to all lanes. */ |
61 zbin_extra = _mm_shufflelo_epi16(zbin_extra, 0); | 61 zbin_extra = _mm_shufflelo_epi16(zbin_extra, 0); |
62 zbin_extra = _mm_unpacklo_epi16(zbin_extra, zbin_extra); | 62 zbin_extra = _mm_unpacklo_epi16(zbin_extra, zbin_extra); |
63 | 63 |
64 /* Sign of z: z >> 15 */ | 64 /* Sign of z: z >> 15 */ |
65 sz0 = _mm_srai_epi16(z0, 15); | 65 sz0 = _mm_srai_epi16(z0, 15); |
66 sz1 = _mm_srai_epi16(z1, 15); | 66 sz1 = _mm_srai_epi16(z1, 15); |
67 | 67 |
68 /* x = abs(z): (z ^ sz) - sz */ | 68 /* x = abs(z): (z ^ sz) - sz */ |
(...skipping 150 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
219 | 219 |
220 x0 = _mm_max_epi16(x0, x1); | 220 x0 = _mm_max_epi16(x0, x1); |
221 | 221 |
222 /* okay, just 2! */ | 222 /* okay, just 2! */ |
223 x1 = _mm_shufflelo_epi16(x0, 0x1); // 0b00000001 | 223 x1 = _mm_shufflelo_epi16(x0, 0x1); // 0b00000001 |
224 | 224 |
225 x0 = _mm_max_epi16(x0, x1); | 225 x0 = _mm_max_epi16(x0, x1); |
226 | 226 |
227 *d->eob = 0xFF & _mm_cvtsi128_si32(x0); | 227 *d->eob = 0xFF & _mm_cvtsi128_si32(x0); |
228 } | 228 } |
OLD | NEW |