OLD | NEW |
1 /* | 1 /* |
2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved. | 2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
9 */ | 9 */ |
10 | 10 |
(...skipping 973 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
984 // Rows | 984 // Rows |
985 for (i = 0; i < 16; ++i) { | 985 for (i = 0; i < 16; ++i) { |
986 for (j = 0; j < 16; ++j) | 986 for (j = 0; j < 16; ++j) |
987 temp_in[j] = out[j + i * 16]; | 987 temp_in[j] = out[j + i * 16]; |
988 ht.rows(temp_in, temp_out); | 988 ht.rows(temp_in, temp_out); |
989 for (j = 0; j < 16; ++j) | 989 for (j = 0; j < 16; ++j) |
990 output[j + i * 16] = temp_out[j]; | 990 output[j + i * 16] = temp_out[j]; |
991 } | 991 } |
992 } | 992 } |
993 | 993 |
| 994 static INLINE int dct_32_round(int input) { |
| 995 int rv = ROUND_POWER_OF_TWO(input, DCT_CONST_BITS); |
| 996 assert(-131072 <= rv && rv <= 131071); |
| 997 return rv; |
| 998 } |
994 | 999 |
995 static void dct32_1d(int *input, int *output) { | 1000 static INLINE int half_round_shift(int input) { |
| 1001 int rv = (input + 1 + (input < 0)) >> 2; |
| 1002 return rv; |
| 1003 } |
| 1004 |
| 1005 static void dct32_1d(int *input, int *output, int round) { |
996 int step[32]; | 1006 int step[32]; |
997 // Stage 1 | 1007 // Stage 1 |
998 step[0] = input[0] + input[(32 - 1)]; | 1008 step[0] = input[0] + input[(32 - 1)]; |
999 step[1] = input[1] + input[(32 - 2)]; | 1009 step[1] = input[1] + input[(32 - 2)]; |
1000 step[2] = input[2] + input[(32 - 3)]; | 1010 step[2] = input[2] + input[(32 - 3)]; |
1001 step[3] = input[3] + input[(32 - 4)]; | 1011 step[3] = input[3] + input[(32 - 4)]; |
1002 step[4] = input[4] + input[(32 - 5)]; | 1012 step[4] = input[4] + input[(32 - 5)]; |
1003 step[5] = input[5] + input[(32 - 6)]; | 1013 step[5] = input[5] + input[(32 - 6)]; |
1004 step[6] = input[6] + input[(32 - 7)]; | 1014 step[6] = input[6] + input[(32 - 7)]; |
1005 step[7] = input[7] + input[(32 - 8)]; | 1015 step[7] = input[7] + input[(32 - 8)]; |
(...skipping 88 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1094 step[23] = -output[23] + output[16]; | 1104 step[23] = -output[23] + output[16]; |
1095 step[24] = -output[24] + output[31]; | 1105 step[24] = -output[24] + output[31]; |
1096 step[25] = -output[25] + output[30]; | 1106 step[25] = -output[25] + output[30]; |
1097 step[26] = -output[26] + output[29]; | 1107 step[26] = -output[26] + output[29]; |
1098 step[27] = -output[27] + output[28]; | 1108 step[27] = -output[27] + output[28]; |
1099 step[28] = output[28] + output[27]; | 1109 step[28] = output[28] + output[27]; |
1100 step[29] = output[29] + output[26]; | 1110 step[29] = output[29] + output[26]; |
1101 step[30] = output[30] + output[25]; | 1111 step[30] = output[30] + output[25]; |
1102 step[31] = output[31] + output[24]; | 1112 step[31] = output[31] + output[24]; |
1103 | 1113 |
| 1114 // dump the magnitude by half, hence the intermediate values are within 1108 |
| 1115 // the range of 16 bits. |
| 1116 if (round) { |
| 1117 step[0] = half_round_shift(step[0]); |
| 1118 step[1] = half_round_shift(step[1]); |
| 1119 step[2] = half_round_shift(step[2]); |
| 1120 step[3] = half_round_shift(step[3]); |
| 1121 step[4] = half_round_shift(step[4]); |
| 1122 step[5] = half_round_shift(step[5]); |
| 1123 step[6] = half_round_shift(step[6]); |
| 1124 step[7] = half_round_shift(step[7]); |
| 1125 step[8] = half_round_shift(step[8]); |
| 1126 step[9] = half_round_shift(step[9]); |
| 1127 step[10] = half_round_shift(step[10]); |
| 1128 step[11] = half_round_shift(step[11]); |
| 1129 step[12] = half_round_shift(step[12]); |
| 1130 step[13] = half_round_shift(step[13]); |
| 1131 step[14] = half_round_shift(step[14]); |
| 1132 step[15] = half_round_shift(step[15]); |
| 1133 |
| 1134 step[16] = half_round_shift(step[16]); |
| 1135 step[17] = half_round_shift(step[17]); |
| 1136 step[18] = half_round_shift(step[18]); |
| 1137 step[19] = half_round_shift(step[19]); |
| 1138 step[20] = half_round_shift(step[20]); |
| 1139 step[21] = half_round_shift(step[21]); |
| 1140 step[22] = half_round_shift(step[22]); |
| 1141 step[23] = half_round_shift(step[23]); |
| 1142 step[24] = half_round_shift(step[24]); |
| 1143 step[25] = half_round_shift(step[25]); |
| 1144 step[26] = half_round_shift(step[26]); |
| 1145 step[27] = half_round_shift(step[27]); |
| 1146 step[28] = half_round_shift(step[28]); |
| 1147 step[29] = half_round_shift(step[29]); |
| 1148 step[30] = half_round_shift(step[30]); |
| 1149 step[31] = half_round_shift(step[31]); |
| 1150 } |
| 1151 |
1104 // Stage 4 | 1152 // Stage 4 |
1105 output[0] = step[0] + step[3]; | 1153 output[0] = step[0] + step[3]; |
1106 output[1] = step[1] + step[2]; | 1154 output[1] = step[1] + step[2]; |
1107 output[2] = -step[2] + step[1]; | 1155 output[2] = -step[2] + step[1]; |
1108 output[3] = -step[3] + step[0]; | 1156 output[3] = -step[3] + step[0]; |
1109 output[4] = step[4]; | 1157 output[4] = step[4]; |
1110 output[5] = dct_32_round((-step[5] + step[6]) * cospi_16_64); | 1158 output[5] = dct_32_round((-step[5] + step[6]) * cospi_16_64); |
1111 output[6] = dct_32_round((step[6] + step[5]) * cospi_16_64); | 1159 output[6] = dct_32_round((step[6] + step[5]) * cospi_16_64); |
1112 output[7] = step[7]; | 1160 output[7] = step[7]; |
1113 output[8] = step[8] + step[11]; | 1161 output[8] = step[8] + step[11]; |
(...skipping 162 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1276 output[15] = dct_32_round(step[30] * cospi_15_64 + step[17] * -cospi_17_64); | 1324 output[15] = dct_32_round(step[30] * cospi_15_64 + step[17] * -cospi_17_64); |
1277 output[31] = dct_32_round(step[31] * cospi_31_64 + step[16] * -cospi_1_64); | 1325 output[31] = dct_32_round(step[31] * cospi_31_64 + step[16] * -cospi_1_64); |
1278 } | 1326 } |
1279 | 1327 |
1280 void vp9_short_fdct32x32_c(int16_t *input, int16_t *out, int pitch) { | 1328 void vp9_short_fdct32x32_c(int16_t *input, int16_t *out, int pitch) { |
1281 int shortpitch = pitch >> 1; | 1329 int shortpitch = pitch >> 1; |
1282 int i, j; | 1330 int i, j; |
1283 int output[32 * 32]; | 1331 int output[32 * 32]; |
1284 | 1332 |
1285 // Columns | 1333 // Columns |
1286 for (i = 0; i < 32; i++) { | 1334 for (i = 0; i < 32; ++i) { |
1287 int temp_in[32], temp_out[32]; | 1335 int temp_in[32], temp_out[32]; |
1288 for (j = 0; j < 32; j++) | 1336 for (j = 0; j < 32; ++j) |
1289 temp_in[j] = input[j * shortpitch + i] << 2; | 1337 temp_in[j] = input[j * shortpitch + i] << 2; |
1290 dct32_1d(temp_in, temp_out); | 1338 dct32_1d(temp_in, temp_out, 0); |
1291 for (j = 0; j < 32; j++) | 1339 for (j = 0; j < 32; ++j) |
1292 output[j * 32 + i] = (temp_out[j] + 1 + (temp_out[j] > 0)) >> 2; | 1340 output[j * 32 + i] = (temp_out[j] + 1 + (temp_out[j] > 0)) >> 2; |
1293 } | 1341 } |
1294 | 1342 |
1295 // Rows | 1343 // Rows |
1296 for (i = 0; i < 32; ++i) { | 1344 for (i = 0; i < 32; ++i) { |
1297 int temp_in[32], temp_out[32]; | 1345 int temp_in[32], temp_out[32]; |
1298 for (j = 0; j < 32; ++j) | 1346 for (j = 0; j < 32; ++j) |
1299 temp_in[j] = output[j + i * 32]; | 1347 temp_in[j] = output[j + i * 32]; |
1300 dct32_1d(temp_in, temp_out); | 1348 dct32_1d(temp_in, temp_out, 0); |
1301 for (j = 0; j < 32; ++j) | 1349 for (j = 0; j < 32; ++j) |
1302 out[j + i * 32] = (temp_out[j] + 1 + (temp_out[j] < 0)) >> 2; | 1350 out[j + i * 32] = (temp_out[j] + 1 + (temp_out[j] < 0)) >> 2; |
1303 } | 1351 } |
1304 } | 1352 } |
| 1353 |
| 1354 // Note that although we use dct_32_round in dct32_1d computation flow, |
| 1355 // this 2d fdct32x32 for rate-distortion optimization loop is operating |
| 1356 // within 16 bits precision. |
| 1357 void vp9_short_fdct32x32_rd_c(int16_t *input, int16_t *out, int pitch) { |
| 1358 int shortpitch = pitch >> 1; |
| 1359 int i, j; |
| 1360 int output[32 * 32]; |
| 1361 |
| 1362 // Columns |
| 1363 for (i = 0; i < 32; ++i) { |
| 1364 int temp_in[32], temp_out[32]; |
| 1365 for (j = 0; j < 32; ++j) |
| 1366 temp_in[j] = input[j * shortpitch + i] << 2; |
| 1367 dct32_1d(temp_in, temp_out, 0); |
| 1368 for (j = 0; j < 32; ++j) |
| 1369 output[j * 32 + i] = (temp_out[j] + 1 + (temp_out[j] > 0)) >> 2; |
| 1370 } |
| 1371 |
| 1372 // Rows |
| 1373 for (i = 0; i < 32; ++i) { |
| 1374 int temp_in[32], temp_out[32]; |
| 1375 for (j = 0; j < 32; ++j) |
| 1376 temp_in[j] = output[j + i * 32]; |
| 1377 dct32_1d(temp_in, temp_out, 1); |
| 1378 for (j = 0; j < 32; ++j) |
| 1379 out[j + i * 32] = temp_out[j]; |
| 1380 } |
| 1381 } |
OLD | NEW |