Index: source/libvpx/vp8/decoder/arm/neon/idct_blk_neon.c |
=================================================================== |
--- source/libvpx/vp8/decoder/arm/neon/idct_blk_neon.c (revision 60257) |
+++ source/libvpx/vp8/decoder/arm/neon/idct_blk_neon.c (working copy) |
@@ -1,5 +1,5 @@ |
/* |
- * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. |
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved. |
* |
* Use of this source code is governed by a BSD-style license |
* that can be found in the LICENSE file in the root of the source |
@@ -12,6 +12,21 @@ |
#include "idct.h" |
#include "dequantize.h" |
+/* place these declarations here because we don't want to maintain them |
+ * outside of this scope |
+ */ |
+void idct_dequant_dc_full_2x_neon |
+ (short *input, short *dq, unsigned char *pre, unsigned char *dst, |
+ int stride, short *dc); |
+void idct_dequant_dc_0_2x_neon |
+ (short *dc, unsigned char *pre, unsigned char *dst, int stride); |
+void idct_dequant_full_2x_neon |
+ (short *q, short *dq, unsigned char *pre, unsigned char *dst, |
+ int pitch, int stride); |
+void idct_dequant_0_2x_neon |
+ (short *q, short dq, unsigned char *pre, int pitch, |
+ unsigned char *dst, int stride); |
+ |
void vp8_dequant_dc_idct_add_y_block_neon |
(short *q, short *dq, unsigned char *pre, |
unsigned char *dst, int stride, char *eobs, short *dc) |
@@ -20,26 +35,16 @@ |
for (i = 0; i < 4; i++) |
{ |
- if (eobs[0] > 1) |
- vp8_dequant_dc_idct_add_neon (q, dq, pre, dst, 16, stride, dc[0]); |
+ if (((short *)eobs)[0] & 0xfefe) |
+ idct_dequant_dc_full_2x_neon (q, dq, pre, dst, stride, dc); |
else |
- vp8_dc_only_idct_add_neon (dc[0], pre, dst, 16, stride); |
+ idct_dequant_dc_0_2x_neon(dc, pre, dst, stride); |
- if (eobs[1] > 1) |
- vp8_dequant_dc_idct_add_neon (q+16, dq, pre+4, dst+4, 16, stride, dc[1]); |
+ if (((short *)eobs)[1] & 0xfefe) |
+ idct_dequant_dc_full_2x_neon (q+32, dq, pre+8, dst+8, stride, dc+2); |
else |
- vp8_dc_only_idct_add_neon (dc[1], pre+4, dst+4, 16, stride); |
+ idct_dequant_dc_0_2x_neon(dc+2, pre+8, dst+8, stride); |
- if (eobs[2] > 1) |
- vp8_dequant_dc_idct_add_neon (q+32, dq, pre+8, dst+8, 16, stride, dc[2]); |
- else |
- vp8_dc_only_idct_add_neon (dc[2], pre+8, dst+8, 16, stride); |
- |
- if (eobs[3] > 1) |
- vp8_dequant_dc_idct_add_neon (q+48, dq, pre+12, dst+12, 16, stride, dc[3]); |
- else |
- vp8_dc_only_idct_add_neon (dc[3], pre+12, dst+12, 16, stride); |
- |
q += 64; |
dc += 4; |
pre += 64; |
@@ -56,38 +61,16 @@ |
for (i = 0; i < 4; i++) |
{ |
- if (eobs[0] > 1) |
- vp8_dequant_idct_add_neon (q, dq, pre, dst, 16, stride); |
+ if (((short *)eobs)[0] & 0xfefe) |
+ idct_dequant_full_2x_neon (q, dq, pre, dst, 16, stride); |
else |
- { |
- vp8_dc_only_idct_add_neon (q[0]*dq[0], pre, dst, 16, stride); |
- ((int *)q)[0] = 0; |
- } |
+ idct_dequant_0_2x_neon (q, dq[0], pre, 16, dst, stride); |
- if (eobs[1] > 1) |
- vp8_dequant_idct_add_neon (q+16, dq, pre+4, dst+4, 16, stride); |
+ if (((short *)eobs)[1] & 0xfefe) |
+ idct_dequant_full_2x_neon (q+32, dq, pre+8, dst+8, 16, stride); |
else |
- { |
- vp8_dc_only_idct_add_neon (q[16]*dq[0], pre+4, dst+4, 16, stride); |
- ((int *)(q+16))[0] = 0; |
- } |
+ idct_dequant_0_2x_neon (q+32, dq[0], pre+8, 16, dst+8, stride); |
- if (eobs[2] > 1) |
- vp8_dequant_idct_add_neon (q+32, dq, pre+8, dst+8, 16, stride); |
- else |
- { |
- vp8_dc_only_idct_add_neon (q[32]*dq[0], pre+8, dst+8, 16, stride); |
- ((int *)(q+32))[0] = 0; |
- } |
- |
- if (eobs[3] > 1) |
- vp8_dequant_idct_add_neon (q+48, dq, pre+12, dst+12, 16, stride); |
- else |
- { |
- vp8_dc_only_idct_add_neon (q[48]*dq[0], pre+12, dst+12, 16, stride); |
- ((int *)(q+48))[0] = 0; |
- } |
- |
q += 64; |
pre += 64; |
dst += 4*stride; |
@@ -99,53 +82,34 @@ |
(short *q, short *dq, unsigned char *pre, |
unsigned char *dstu, unsigned char *dstv, int stride, char *eobs) |
{ |
- int i; |
+ if (((short *)eobs)[0] & 0xfefe) |
+ idct_dequant_full_2x_neon (q, dq, pre, dstu, 8, stride); |
+ else |
+ idct_dequant_0_2x_neon (q, dq[0], pre, 8, dstu, stride); |
- for (i = 0; i < 2; i++) |
- { |
- if (eobs[0] > 1) |
- vp8_dequant_idct_add_neon (q, dq, pre, dstu, 8, stride); |
- else |
- { |
- vp8_dc_only_idct_add_neon (q[0]*dq[0], pre, dstu, 8, stride); |
- ((int *)q)[0] = 0; |
- } |
+ q += 32; |
+ pre += 32; |
+ dstu += 4*stride; |
- if (eobs[1] > 1) |
- vp8_dequant_idct_add_neon (q+16, dq, pre+4, dstu+4, 8, stride); |
- else |
- { |
- vp8_dc_only_idct_add_neon (q[16]*dq[0], pre+4, dstu+4, 8, stride); |
- ((int *)(q+16))[0] = 0; |
- } |
+ if (((short *)eobs)[1] & 0xfefe) |
+ idct_dequant_full_2x_neon (q, dq, pre, dstu, 8, stride); |
+ else |
+ idct_dequant_0_2x_neon (q, dq[0], pre, 8, dstu, stride); |
- q += 32; |
- pre += 32; |
- dstu += 4*stride; |
- eobs += 2; |
- } |
+ q += 32; |
+ pre += 32; |
- for (i = 0; i < 2; i++) |
- { |
- if (eobs[0] > 1) |
- vp8_dequant_idct_add_neon (q, dq, pre, dstv, 8, stride); |
- else |
- { |
- vp8_dc_only_idct_add_neon (q[0]*dq[0], pre, dstv, 8, stride); |
- ((int *)q)[0] = 0; |
- } |
+ if (((short *)eobs)[2] & 0xfefe) |
+ idct_dequant_full_2x_neon (q, dq, pre, dstv, 8, stride); |
+ else |
+ idct_dequant_0_2x_neon (q, dq[0], pre, 8, dstv, stride); |
- if (eobs[1] > 1) |
- vp8_dequant_idct_add_neon (q+16, dq, pre+4, dstv+4, 8, stride); |
- else |
- { |
- vp8_dc_only_idct_add_neon (q[16]*dq[0], pre+4, dstv+4, 8, stride); |
- ((int *)(q+16))[0] = 0; |
- } |
+ q += 32; |
+ pre += 32; |
+ dstv += 4*stride; |
- q += 32; |
- pre += 32; |
- dstv += 4*stride; |
- eobs += 2; |
- } |
+ if (((short *)eobs)[3] & 0xfefe) |
+ idct_dequant_full_2x_neon (q, dq, pre, dstv, 8, stride); |
+ else |
+ idct_dequant_0_2x_neon (q, dq[0], pre, 8, dstv, stride); |
} |