| OLD | NEW |
| 1 /* | 1 /* |
| 2 * Copyright (c) 2012 The WebM project authors. All Rights Reserved. | 2 * Copyright (c) 2012 The WebM project authors. All Rights Reserved. |
| 3 * | 3 * |
| 4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
| 5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
| 6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
| 7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
| 8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
| 9 */ | 9 */ |
| 10 | 10 |
| (...skipping 17 matching lines...) Expand all Loading... |
| 28 if (cmp) \ | 28 if (cmp) \ |
| 29 break; \ | 29 break; \ |
| 30 qcoeff_ptr[z] = y[z]; \ | 30 qcoeff_ptr[z] = y[z]; \ |
| 31 eob = i; \ | 31 eob = i; \ |
| 32 zbin_boost_ptr = b->zrun_zbin_boost; \ | 32 zbin_boost_ptr = b->zrun_zbin_boost; \ |
| 33 } while (0) | 33 } while (0) |
| 34 | 34 |
| 35 void vp8_regular_quantize_b_sse2(BLOCK *b, BLOCKD *d) | 35 void vp8_regular_quantize_b_sse2(BLOCK *b, BLOCKD *d) |
| 36 { | 36 { |
| 37 char eob = 0; | 37 char eob = 0; |
| 38 short *zbin_boost_ptr = b->zrun_zbin_boost; | 38 short *zbin_boost_ptr; |
| 39 short *qcoeff_ptr = d->qcoeff; | 39 short *qcoeff_ptr = d->qcoeff; |
| 40 DECLARE_ALIGNED_ARRAY(16, short, x, 16); | 40 DECLARE_ALIGNED(16, short, x[16]); |
| 41 DECLARE_ALIGNED_ARRAY(16, short, y, 16); | 41 DECLARE_ALIGNED(16, short, y[16]); |
| 42 | 42 |
| 43 __m128i sz0, x0, sz1, x1, y0, y1, x_minus_zbin0, x_minus_zbin1; | 43 __m128i sz0, x0, sz1, x1, y0, y1, x_minus_zbin0, x_minus_zbin1; |
| 44 __m128i quant_shift0 = _mm_load_si128((__m128i *)(b->quant_shift)); | 44 __m128i quant_shift0 = _mm_load_si128((__m128i *)(b->quant_shift)); |
| 45 __m128i quant_shift1 = _mm_load_si128((__m128i *)(b->quant_shift + 8)); | 45 __m128i quant_shift1 = _mm_load_si128((__m128i *)(b->quant_shift + 8)); |
| 46 __m128i z0 = _mm_load_si128((__m128i *)(b->coeff)); | 46 __m128i z0 = _mm_load_si128((__m128i *)(b->coeff)); |
| 47 __m128i z1 = _mm_load_si128((__m128i *)(b->coeff+8)); | 47 __m128i z1 = _mm_load_si128((__m128i *)(b->coeff+8)); |
| 48 __m128i zbin_extra = _mm_cvtsi32_si128(b->zbin_extra); | 48 __m128i zbin_extra = _mm_cvtsi32_si128(b->zbin_extra); |
| 49 __m128i zbin0 = _mm_load_si128((__m128i *)(b->zbin)); | 49 __m128i zbin0 = _mm_load_si128((__m128i *)(b->zbin)); |
| 50 __m128i zbin1 = _mm_load_si128((__m128i *)(b->zbin + 8)); | 50 __m128i zbin1 = _mm_load_si128((__m128i *)(b->zbin + 8)); |
| 51 __m128i round0 = _mm_load_si128((__m128i *)(b->round)); | 51 __m128i round0 = _mm_load_si128((__m128i *)(b->round)); |
| 52 __m128i round1 = _mm_load_si128((__m128i *)(b->round + 8)); | 52 __m128i round1 = _mm_load_si128((__m128i *)(b->round + 8)); |
| 53 __m128i quant0 = _mm_load_si128((__m128i *)(b->quant)); | 53 __m128i quant0 = _mm_load_si128((__m128i *)(b->quant)); |
| 54 __m128i quant1 = _mm_load_si128((__m128i *)(b->quant + 8)); | 54 __m128i quant1 = _mm_load_si128((__m128i *)(b->quant + 8)); |
| 55 __m128i dequant0 = _mm_load_si128((__m128i *)(d->dequant)); | 55 __m128i dequant0 = _mm_load_si128((__m128i *)(d->dequant)); |
| 56 __m128i dequant1 = _mm_load_si128((__m128i *)(d->dequant + 8)); | 56 __m128i dequant1 = _mm_load_si128((__m128i *)(d->dequant + 8)); |
| 57 | 57 |
| 58 vpx_memset(qcoeff_ptr, 0, 32); | 58 memset(qcoeff_ptr, 0, 32); |
| 59 | 59 |
| 60 /* Duplicate to all lanes. */ | 60 /* Duplicate to all lanes. */ |
| 61 zbin_extra = _mm_shufflelo_epi16(zbin_extra, 0); | 61 zbin_extra = _mm_shufflelo_epi16(zbin_extra, 0); |
| 62 zbin_extra = _mm_unpacklo_epi16(zbin_extra, zbin_extra); | 62 zbin_extra = _mm_unpacklo_epi16(zbin_extra, zbin_extra); |
| 63 | 63 |
| 64 /* Sign of z: z >> 15 */ | 64 /* Sign of z: z >> 15 */ |
| 65 sz0 = _mm_srai_epi16(z0, 15); | 65 sz0 = _mm_srai_epi16(z0, 15); |
| 66 sz1 = _mm_srai_epi16(z1, 15); | 66 sz1 = _mm_srai_epi16(z1, 15); |
| 67 | 67 |
| 68 /* x = abs(z): (z ^ sz) - sz */ | 68 /* x = abs(z): (z ^ sz) - sz */ |
| (...skipping 150 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 219 | 219 |
| 220 x0 = _mm_max_epi16(x0, x1); | 220 x0 = _mm_max_epi16(x0, x1); |
| 221 | 221 |
| 222 /* okay, just 2! */ | 222 /* okay, just 2! */ |
| 223 x1 = _mm_shufflelo_epi16(x0, 0x1); // 0b00000001 | 223 x1 = _mm_shufflelo_epi16(x0, 0x1); // 0b00000001 |
| 224 | 224 |
| 225 x0 = _mm_max_epi16(x0, x1); | 225 x0 = _mm_max_epi16(x0, x1); |
| 226 | 226 |
| 227 *d->eob = 0xFF & _mm_cvtsi128_si32(x0); | 227 *d->eob = 0xFF & _mm_cvtsi128_si32(x0); |
| 228 } | 228 } |
| OLD | NEW |