| Index: source/libvpx/vp8/decoder/arm/neon/idct_blk_neon.c
|
| ===================================================================
|
| --- source/libvpx/vp8/decoder/arm/neon/idct_blk_neon.c (revision 60257)
|
| +++ source/libvpx/vp8/decoder/arm/neon/idct_blk_neon.c (working copy)
|
| @@ -1,5 +1,5 @@
|
| /*
|
| - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
|
| + * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
| *
|
| * Use of this source code is governed by a BSD-style license
|
| * that can be found in the LICENSE file in the root of the source
|
| @@ -12,6 +12,21 @@
|
| #include "idct.h"
|
| #include "dequantize.h"
|
|
|
| +/* place these declarations here because we don't want to maintain them
|
| + * outside of this scope
|
| + */
|
| +void idct_dequant_dc_full_2x_neon
|
| + (short *input, short *dq, unsigned char *pre, unsigned char *dst,
|
| + int stride, short *dc);
|
| +void idct_dequant_dc_0_2x_neon
|
| + (short *dc, unsigned char *pre, unsigned char *dst, int stride);
|
| +void idct_dequant_full_2x_neon
|
| + (short *q, short *dq, unsigned char *pre, unsigned char *dst,
|
| + int pitch, int stride);
|
| +void idct_dequant_0_2x_neon
|
| + (short *q, short dq, unsigned char *pre, int pitch,
|
| + unsigned char *dst, int stride);
|
| +
|
| void vp8_dequant_dc_idct_add_y_block_neon
|
| (short *q, short *dq, unsigned char *pre,
|
| unsigned char *dst, int stride, char *eobs, short *dc)
|
| @@ -20,26 +35,16 @@
|
|
|
| for (i = 0; i < 4; i++)
|
| {
|
| - if (eobs[0] > 1)
|
| - vp8_dequant_dc_idct_add_neon (q, dq, pre, dst, 16, stride, dc[0]);
|
| + if (((short *)eobs)[0] & 0xfefe)
|
| + idct_dequant_dc_full_2x_neon (q, dq, pre, dst, stride, dc);
|
| else
|
| - vp8_dc_only_idct_add_neon (dc[0], pre, dst, 16, stride);
|
| + idct_dequant_dc_0_2x_neon(dc, pre, dst, stride);
|
|
|
| - if (eobs[1] > 1)
|
| - vp8_dequant_dc_idct_add_neon (q+16, dq, pre+4, dst+4, 16, stride, dc[1]);
|
| + if (((short *)eobs)[1] & 0xfefe)
|
| + idct_dequant_dc_full_2x_neon (q+32, dq, pre+8, dst+8, stride, dc+2);
|
| else
|
| - vp8_dc_only_idct_add_neon (dc[1], pre+4, dst+4, 16, stride);
|
| + idct_dequant_dc_0_2x_neon(dc+2, pre+8, dst+8, stride);
|
|
|
| - if (eobs[2] > 1)
|
| - vp8_dequant_dc_idct_add_neon (q+32, dq, pre+8, dst+8, 16, stride, dc[2]);
|
| - else
|
| - vp8_dc_only_idct_add_neon (dc[2], pre+8, dst+8, 16, stride);
|
| -
|
| - if (eobs[3] > 1)
|
| - vp8_dequant_dc_idct_add_neon (q+48, dq, pre+12, dst+12, 16, stride, dc[3]);
|
| - else
|
| - vp8_dc_only_idct_add_neon (dc[3], pre+12, dst+12, 16, stride);
|
| -
|
| q += 64;
|
| dc += 4;
|
| pre += 64;
|
| @@ -56,38 +61,16 @@
|
|
|
| for (i = 0; i < 4; i++)
|
| {
|
| - if (eobs[0] > 1)
|
| - vp8_dequant_idct_add_neon (q, dq, pre, dst, 16, stride);
|
| + if (((short *)eobs)[0] & 0xfefe)
|
| + idct_dequant_full_2x_neon (q, dq, pre, dst, 16, stride);
|
| else
|
| - {
|
| - vp8_dc_only_idct_add_neon (q[0]*dq[0], pre, dst, 16, stride);
|
| - ((int *)q)[0] = 0;
|
| - }
|
| + idct_dequant_0_2x_neon (q, dq[0], pre, 16, dst, stride);
|
|
|
| - if (eobs[1] > 1)
|
| - vp8_dequant_idct_add_neon (q+16, dq, pre+4, dst+4, 16, stride);
|
| + if (((short *)eobs)[1] & 0xfefe)
|
| + idct_dequant_full_2x_neon (q+32, dq, pre+8, dst+8, 16, stride);
|
| else
|
| - {
|
| - vp8_dc_only_idct_add_neon (q[16]*dq[0], pre+4, dst+4, 16, stride);
|
| - ((int *)(q+16))[0] = 0;
|
| - }
|
| + idct_dequant_0_2x_neon (q+32, dq[0], pre+8, 16, dst+8, stride);
|
|
|
| - if (eobs[2] > 1)
|
| - vp8_dequant_idct_add_neon (q+32, dq, pre+8, dst+8, 16, stride);
|
| - else
|
| - {
|
| - vp8_dc_only_idct_add_neon (q[32]*dq[0], pre+8, dst+8, 16, stride);
|
| - ((int *)(q+32))[0] = 0;
|
| - }
|
| -
|
| - if (eobs[3] > 1)
|
| - vp8_dequant_idct_add_neon (q+48, dq, pre+12, dst+12, 16, stride);
|
| - else
|
| - {
|
| - vp8_dc_only_idct_add_neon (q[48]*dq[0], pre+12, dst+12, 16, stride);
|
| - ((int *)(q+48))[0] = 0;
|
| - }
|
| -
|
| q += 64;
|
| pre += 64;
|
| dst += 4*stride;
|
| @@ -99,53 +82,34 @@
|
| (short *q, short *dq, unsigned char *pre,
|
| unsigned char *dstu, unsigned char *dstv, int stride, char *eobs)
|
| {
|
| - int i;
|
| + if (((short *)eobs)[0] & 0xfefe)
|
| + idct_dequant_full_2x_neon (q, dq, pre, dstu, 8, stride);
|
| + else
|
| + idct_dequant_0_2x_neon (q, dq[0], pre, 8, dstu, stride);
|
|
|
| - for (i = 0; i < 2; i++)
|
| - {
|
| - if (eobs[0] > 1)
|
| - vp8_dequant_idct_add_neon (q, dq, pre, dstu, 8, stride);
|
| - else
|
| - {
|
| - vp8_dc_only_idct_add_neon (q[0]*dq[0], pre, dstu, 8, stride);
|
| - ((int *)q)[0] = 0;
|
| - }
|
| + q += 32;
|
| + pre += 32;
|
| + dstu += 4*stride;
|
|
|
| - if (eobs[1] > 1)
|
| - vp8_dequant_idct_add_neon (q+16, dq, pre+4, dstu+4, 8, stride);
|
| - else
|
| - {
|
| - vp8_dc_only_idct_add_neon (q[16]*dq[0], pre+4, dstu+4, 8, stride);
|
| - ((int *)(q+16))[0] = 0;
|
| - }
|
| + if (((short *)eobs)[1] & 0xfefe)
|
| + idct_dequant_full_2x_neon (q, dq, pre, dstu, 8, stride);
|
| + else
|
| + idct_dequant_0_2x_neon (q, dq[0], pre, 8, dstu, stride);
|
|
|
| - q += 32;
|
| - pre += 32;
|
| - dstu += 4*stride;
|
| - eobs += 2;
|
| - }
|
| + q += 32;
|
| + pre += 32;
|
|
|
| - for (i = 0; i < 2; i++)
|
| - {
|
| - if (eobs[0] > 1)
|
| - vp8_dequant_idct_add_neon (q, dq, pre, dstv, 8, stride);
|
| - else
|
| - {
|
| - vp8_dc_only_idct_add_neon (q[0]*dq[0], pre, dstv, 8, stride);
|
| - ((int *)q)[0] = 0;
|
| - }
|
| + if (((short *)eobs)[2] & 0xfefe)
|
| + idct_dequant_full_2x_neon (q, dq, pre, dstv, 8, stride);
|
| + else
|
| + idct_dequant_0_2x_neon (q, dq[0], pre, 8, dstv, stride);
|
|
|
| - if (eobs[1] > 1)
|
| - vp8_dequant_idct_add_neon (q+16, dq, pre+4, dstv+4, 8, stride);
|
| - else
|
| - {
|
| - vp8_dc_only_idct_add_neon (q[16]*dq[0], pre+4, dstv+4, 8, stride);
|
| - ((int *)(q+16))[0] = 0;
|
| - }
|
| + q += 32;
|
| + pre += 32;
|
| + dstv += 4*stride;
|
|
|
| - q += 32;
|
| - pre += 32;
|
| - dstv += 4*stride;
|
| - eobs += 2;
|
| - }
|
| + if (((short *)eobs)[3] & 0xfefe)
|
| + idct_dequant_full_2x_neon (q, dq, pre, dstv, 8, stride);
|
| + else
|
| + idct_dequant_0_2x_neon (q, dq[0], pre, 8, dstv, stride);
|
| }
|
|
|