Index: source/libvpx/vp9/encoder/vp9_dct.c |
=================================================================== |
--- source/libvpx/vp9/encoder/vp9_dct.c (revision 207064) |
+++ source/libvpx/vp9/encoder/vp9_dct.c (working copy) |
@@ -991,8 +991,18 @@ |
} |
} |
+static INLINE int dct_32_round(int input) { |
+ int rv = ROUND_POWER_OF_TWO(input, DCT_CONST_BITS); |
+ assert(-131072 <= rv && rv <= 131071); |
+ return rv; |
+} |
-static void dct32_1d(int *input, int *output) { |
+static INLINE int half_round_shift(int input) { |
+ int rv = (input + 1 + (input < 0)) >> 2; |
+ return rv; |
+} |
+ |
+static void dct32_1d(int *input, int *output, int round) { |
int step[32]; |
// Stage 1 |
step[0] = input[0] + input[(32 - 1)]; |
@@ -1101,6 +1111,44 @@ |
step[30] = output[30] + output[25]; |
step[31] = output[31] + output[24]; |
+ // dump the magnitude by half, hence the intermediate values are within 1108 |
+ // the range of 16 bits. |
+ if (round) { |
+ step[0] = half_round_shift(step[0]); |
+ step[1] = half_round_shift(step[1]); |
+ step[2] = half_round_shift(step[2]); |
+ step[3] = half_round_shift(step[3]); |
+ step[4] = half_round_shift(step[4]); |
+ step[5] = half_round_shift(step[5]); |
+ step[6] = half_round_shift(step[6]); |
+ step[7] = half_round_shift(step[7]); |
+ step[8] = half_round_shift(step[8]); |
+ step[9] = half_round_shift(step[9]); |
+ step[10] = half_round_shift(step[10]); |
+ step[11] = half_round_shift(step[11]); |
+ step[12] = half_round_shift(step[12]); |
+ step[13] = half_round_shift(step[13]); |
+ step[14] = half_round_shift(step[14]); |
+ step[15] = half_round_shift(step[15]); |
+ |
+ step[16] = half_round_shift(step[16]); |
+ step[17] = half_round_shift(step[17]); |
+ step[18] = half_round_shift(step[18]); |
+ step[19] = half_round_shift(step[19]); |
+ step[20] = half_round_shift(step[20]); |
+ step[21] = half_round_shift(step[21]); |
+ step[22] = half_round_shift(step[22]); |
+ step[23] = half_round_shift(step[23]); |
+ step[24] = half_round_shift(step[24]); |
+ step[25] = half_round_shift(step[25]); |
+ step[26] = half_round_shift(step[26]); |
+ step[27] = half_round_shift(step[27]); |
+ step[28] = half_round_shift(step[28]); |
+ step[29] = half_round_shift(step[29]); |
+ step[30] = half_round_shift(step[30]); |
+ step[31] = half_round_shift(step[31]); |
+ } |
+ |
// Stage 4 |
output[0] = step[0] + step[3]; |
output[1] = step[1] + step[2]; |
@@ -1283,12 +1331,12 @@ |
int output[32 * 32]; |
// Columns |
- for (i = 0; i < 32; i++) { |
+ for (i = 0; i < 32; ++i) { |
int temp_in[32], temp_out[32]; |
- for (j = 0; j < 32; j++) |
+ for (j = 0; j < 32; ++j) |
temp_in[j] = input[j * shortpitch + i] << 2; |
- dct32_1d(temp_in, temp_out); |
- for (j = 0; j < 32; j++) |
+ dct32_1d(temp_in, temp_out, 0); |
+ for (j = 0; j < 32; ++j) |
output[j * 32 + i] = (temp_out[j] + 1 + (temp_out[j] > 0)) >> 2; |
} |
@@ -1297,8 +1345,37 @@ |
int temp_in[32], temp_out[32]; |
for (j = 0; j < 32; ++j) |
temp_in[j] = output[j + i * 32]; |
- dct32_1d(temp_in, temp_out); |
+ dct32_1d(temp_in, temp_out, 0); |
for (j = 0; j < 32; ++j) |
out[j + i * 32] = (temp_out[j] + 1 + (temp_out[j] < 0)) >> 2; |
} |
} |
+ |
+// Note that although we use dct_32_round in dct32_1d computation flow, |
+// this 2d fdct32x32 for rate-distortion optimization loop is operating |
+// within 16 bits precision. |
+void vp9_short_fdct32x32_rd_c(int16_t *input, int16_t *out, int pitch) { |
+ int shortpitch = pitch >> 1; |
+ int i, j; |
+ int output[32 * 32]; |
+ |
+ // Columns |
+ for (i = 0; i < 32; ++i) { |
+ int temp_in[32], temp_out[32]; |
+ for (j = 0; j < 32; ++j) |
+ temp_in[j] = input[j * shortpitch + i] << 2; |
+ dct32_1d(temp_in, temp_out, 0); |
+ for (j = 0; j < 32; ++j) |
+ output[j * 32 + i] = (temp_out[j] + 1 + (temp_out[j] > 0)) >> 2; |
+ } |
+ |
+ // Rows |
+ for (i = 0; i < 32; ++i) { |
+ int temp_in[32], temp_out[32]; |
+ for (j = 0; j < 32; ++j) |
+ temp_in[j] = output[j + i * 32]; |
+ dct32_1d(temp_in, temp_out, 1); |
+ for (j = 0; j < 32; ++j) |
+ out[j + i * 32] = temp_out[j]; |
+ } |
+} |