OLD | NEW |
1 /* | 1 /* |
2 * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. | 2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
9 */ | 9 */ |
10 | 10 |
11 #include "vpx_ports/config.h" | 11 #include "vpx_ports/config.h" |
12 #include "idct.h" | 12 #include "idct.h" |
13 #include "dequantize.h" | 13 #include "dequantize.h" |
14 | 14 |
| 15 /* place these declarations here because we don't want to maintain them |
| 16 * outside of this scope |
| 17 */ |
| 18 void idct_dequant_dc_full_2x_neon |
| 19 (short *input, short *dq, unsigned char *pre, unsigned char *dst, |
| 20 int stride, short *dc); |
| 21 void idct_dequant_dc_0_2x_neon |
| 22 (short *dc, unsigned char *pre, unsigned char *dst, int stride); |
| 23 void idct_dequant_full_2x_neon |
| 24 (short *q, short *dq, unsigned char *pre, unsigned char *dst, |
| 25 int pitch, int stride); |
| 26 void idct_dequant_0_2x_neon |
| 27 (short *q, short dq, unsigned char *pre, int pitch, |
| 28 unsigned char *dst, int stride); |
| 29 |
15 void vp8_dequant_dc_idct_add_y_block_neon | 30 void vp8_dequant_dc_idct_add_y_block_neon |
16 (short *q, short *dq, unsigned char *pre, | 31 (short *q, short *dq, unsigned char *pre, |
17 unsigned char *dst, int stride, char *eobs, short *dc) | 32 unsigned char *dst, int stride, char *eobs, short *dc) |
18 { | 33 { |
19 int i; | 34 int i; |
20 | 35 |
21 for (i = 0; i < 4; i++) | 36 for (i = 0; i < 4; i++) |
22 { | 37 { |
23 if (eobs[0] > 1) | 38 if (((short *)eobs)[0] & 0xfefe) |
24 vp8_dequant_dc_idct_add_neon (q, dq, pre, dst, 16, stride, dc[0]); | 39 idct_dequant_dc_full_2x_neon (q, dq, pre, dst, stride, dc); |
25 else | 40 else |
26 vp8_dc_only_idct_add_neon (dc[0], pre, dst, 16, stride); | 41 idct_dequant_dc_0_2x_neon(dc, pre, dst, stride); |
27 | 42 |
28 if (eobs[1] > 1) | 43 if (((short *)eobs)[1] & 0xfefe) |
29 vp8_dequant_dc_idct_add_neon (q+16, dq, pre+4, dst+4, 16, stride, dc
[1]); | 44 idct_dequant_dc_full_2x_neon (q+32, dq, pre+8, dst+8, stride, dc+2); |
30 else | 45 else |
31 vp8_dc_only_idct_add_neon (dc[1], pre+4, dst+4, 16, stride); | 46 idct_dequant_dc_0_2x_neon(dc+2, pre+8, dst+8, stride); |
32 | |
33 if (eobs[2] > 1) | |
34 vp8_dequant_dc_idct_add_neon (q+32, dq, pre+8, dst+8, 16, stride, dc
[2]); | |
35 else | |
36 vp8_dc_only_idct_add_neon (dc[2], pre+8, dst+8, 16, stride); | |
37 | |
38 if (eobs[3] > 1) | |
39 vp8_dequant_dc_idct_add_neon (q+48, dq, pre+12, dst+12, 16, stride,
dc[3]); | |
40 else | |
41 vp8_dc_only_idct_add_neon (dc[3], pre+12, dst+12, 16, stride); | |
42 | 47 |
43 q += 64; | 48 q += 64; |
44 dc += 4; | 49 dc += 4; |
45 pre += 64; | 50 pre += 64; |
46 dst += 4*stride; | 51 dst += 4*stride; |
47 eobs += 4; | 52 eobs += 4; |
48 } | 53 } |
49 } | 54 } |
50 | 55 |
51 void vp8_dequant_idct_add_y_block_neon | 56 void vp8_dequant_idct_add_y_block_neon |
52 (short *q, short *dq, unsigned char *pre, | 57 (short *q, short *dq, unsigned char *pre, |
53 unsigned char *dst, int stride, char *eobs) | 58 unsigned char *dst, int stride, char *eobs) |
54 { | 59 { |
55 int i; | 60 int i; |
56 | 61 |
57 for (i = 0; i < 4; i++) | 62 for (i = 0; i < 4; i++) |
58 { | 63 { |
59 if (eobs[0] > 1) | 64 if (((short *)eobs)[0] & 0xfefe) |
60 vp8_dequant_idct_add_neon (q, dq, pre, dst, 16, stride); | 65 idct_dequant_full_2x_neon (q, dq, pre, dst, 16, stride); |
61 else | 66 else |
62 { | 67 idct_dequant_0_2x_neon (q, dq[0], pre, 16, dst, stride); |
63 vp8_dc_only_idct_add_neon (q[0]*dq[0], pre, dst, 16, stride); | |
64 ((int *)q)[0] = 0; | |
65 } | |
66 | 68 |
67 if (eobs[1] > 1) | 69 if (((short *)eobs)[1] & 0xfefe) |
68 vp8_dequant_idct_add_neon (q+16, dq, pre+4, dst+4, 16, stride); | 70 idct_dequant_full_2x_neon (q+32, dq, pre+8, dst+8, 16, stride); |
69 else | 71 else |
70 { | 72 idct_dequant_0_2x_neon (q+32, dq[0], pre+8, 16, dst+8, stride); |
71 vp8_dc_only_idct_add_neon (q[16]*dq[0], pre+4, dst+4, 16, stride); | |
72 ((int *)(q+16))[0] = 0; | |
73 } | |
74 | |
75 if (eobs[2] > 1) | |
76 vp8_dequant_idct_add_neon (q+32, dq, pre+8, dst+8, 16, stride); | |
77 else | |
78 { | |
79 vp8_dc_only_idct_add_neon (q[32]*dq[0], pre+8, dst+8, 16, stride); | |
80 ((int *)(q+32))[0] = 0; | |
81 } | |
82 | |
83 if (eobs[3] > 1) | |
84 vp8_dequant_idct_add_neon (q+48, dq, pre+12, dst+12, 16, stride); | |
85 else | |
86 { | |
87 vp8_dc_only_idct_add_neon (q[48]*dq[0], pre+12, dst+12, 16, stride); | |
88 ((int *)(q+48))[0] = 0; | |
89 } | |
90 | 73 |
91 q += 64; | 74 q += 64; |
92 pre += 64; | 75 pre += 64; |
93 dst += 4*stride; | 76 dst += 4*stride; |
94 eobs += 4; | 77 eobs += 4; |
95 } | 78 } |
96 } | 79 } |
97 | 80 |
98 void vp8_dequant_idct_add_uv_block_neon | 81 void vp8_dequant_idct_add_uv_block_neon |
99 (short *q, short *dq, unsigned char *pre, | 82 (short *q, short *dq, unsigned char *pre, |
100 unsigned char *dstu, unsigned char *dstv, int stride, char *eobs) | 83 unsigned char *dstu, unsigned char *dstv, int stride, char *eobs) |
101 { | 84 { |
102 int i; | 85 if (((short *)eobs)[0] & 0xfefe) |
| 86 idct_dequant_full_2x_neon (q, dq, pre, dstu, 8, stride); |
| 87 else |
| 88 idct_dequant_0_2x_neon (q, dq[0], pre, 8, dstu, stride); |
103 | 89 |
104 for (i = 0; i < 2; i++) | 90 q += 32; |
105 { | 91 pre += 32; |
106 if (eobs[0] > 1) | 92 dstu += 4*stride; |
107 vp8_dequant_idct_add_neon (q, dq, pre, dstu, 8, stride); | |
108 else | |
109 { | |
110 vp8_dc_only_idct_add_neon (q[0]*dq[0], pre, dstu, 8, stride); | |
111 ((int *)q)[0] = 0; | |
112 } | |
113 | 93 |
114 if (eobs[1] > 1) | 94 if (((short *)eobs)[1] & 0xfefe) |
115 vp8_dequant_idct_add_neon (q+16, dq, pre+4, dstu+4, 8, stride); | 95 idct_dequant_full_2x_neon (q, dq, pre, dstu, 8, stride); |
116 else | 96 else |
117 { | 97 idct_dequant_0_2x_neon (q, dq[0], pre, 8, dstu, stride); |
118 vp8_dc_only_idct_add_neon (q[16]*dq[0], pre+4, dstu+4, 8, stride); | |
119 ((int *)(q+16))[0] = 0; | |
120 } | |
121 | 98 |
122 q += 32; | 99 q += 32; |
123 pre += 32; | 100 pre += 32; |
124 dstu += 4*stride; | |
125 eobs += 2; | |
126 } | |
127 | 101 |
128 for (i = 0; i < 2; i++) | 102 if (((short *)eobs)[2] & 0xfefe) |
129 { | 103 idct_dequant_full_2x_neon (q, dq, pre, dstv, 8, stride); |
130 if (eobs[0] > 1) | 104 else |
131 vp8_dequant_idct_add_neon (q, dq, pre, dstv, 8, stride); | 105 idct_dequant_0_2x_neon (q, dq[0], pre, 8, dstv, stride); |
132 else | |
133 { | |
134 vp8_dc_only_idct_add_neon (q[0]*dq[0], pre, dstv, 8, stride); | |
135 ((int *)q)[0] = 0; | |
136 } | |
137 | 106 |
138 if (eobs[1] > 1) | 107 q += 32; |
139 vp8_dequant_idct_add_neon (q+16, dq, pre+4, dstv+4, 8, stride); | 108 pre += 32; |
140 else | 109 dstv += 4*stride; |
141 { | |
142 vp8_dc_only_idct_add_neon (q[16]*dq[0], pre+4, dstv+4, 8, stride); | |
143 ((int *)(q+16))[0] = 0; | |
144 } | |
145 | 110 |
146 q += 32; | 111 if (((short *)eobs)[3] & 0xfefe) |
147 pre += 32; | 112 idct_dequant_full_2x_neon (q, dq, pre, dstv, 8, stride); |
148 dstv += 4*stride; | 113 else |
149 eobs += 2; | 114 idct_dequant_0_2x_neon (q, dq[0], pre, 8, dstv, stride); |
150 } | |
151 } | 115 } |
OLD | NEW |