source/libvpx/vp9/encoder/vp9_dct.c - Issue 17009012: libvpx: Pull from upstream

Side by Side Diff: source/libvpx/vp9/encoder/vp9_dct.c

Issue 17009012: libvpx: Pull from upstream (Closed) Base URL: http://src.chromium.org/chrome/trunk/deps/third_party/libvpx/

Patch Set: Created 7 years, 6 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 /*	1 /*

2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved.	2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved.

3 *	3 *

4 * Use of this source code is governed by a BSD-style license	4 * Use of this source code is governed by a BSD-style license

5 * that can be found in the LICENSE file in the root of the source	5 * that can be found in the LICENSE file in the root of the source

6 * tree. An additional intellectual property rights grant can be found	6 * tree. An additional intellectual property rights grant can be found

7 * in the file PATENTS. All contributing project authors may	7 * in the file PATENTS. All contributing project authors may

8 * be found in the AUTHORS file in the root of the source tree.	8 * be found in the AUTHORS file in the root of the source tree.

9 */	9 */

10	10

(...skipping 973 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
984 // Rows	984 // Rows

985 for (i = 0; i < 16; ++i) {	985 for (i = 0; i < 16; ++i) {

986 for (j = 0; j < 16; ++j)	986 for (j = 0; j < 16; ++j)

987 temp_in[j] = out[j + i * 16];	987 temp_in[j] = out[j + i * 16];

988 ht.rows(temp_in, temp_out);	988 ht.rows(temp_in, temp_out);

989 for (j = 0; j < 16; ++j)	989 for (j = 0; j < 16; ++j)

990 output[j + i * 16] = temp_out[j];	990 output[j + i * 16] = temp_out[j];

991 }	991 }

992 }	992 }

993	993

	994 static INLINE int dct_32_round(int input) {

	995 int rv = ROUND_POWER_OF_TWO(input, DCT_CONST_BITS);

	996 assert(-131072 <= rv && rv <= 131071);

	997 return rv;

	998 }

994	999

995 static void dct32_1d(int input, int output) {	1000 static INLINE int half_round_shift(int input) {

	1001 int rv = (input + 1 + (input < 0)) >> 2;

	1002 return rv;

	1003 }

	1004

	1005 static void dct32_1d(int input, int output, int round) {

996 int step[32];	1006 int step[32];

997 // Stage 1	1007 // Stage 1

998 step[0] = input[0] + input[(32 - 1)];	1008 step[0] = input[0] + input[(32 - 1)];

999 step[1] = input[1] + input[(32 - 2)];	1009 step[1] = input[1] + input[(32 - 2)];

1000 step[2] = input[2] + input[(32 - 3)];	1010 step[2] = input[2] + input[(32 - 3)];

1001 step[3] = input[3] + input[(32 - 4)];	1011 step[3] = input[3] + input[(32 - 4)];

1002 step[4] = input[4] + input[(32 - 5)];	1012 step[4] = input[4] + input[(32 - 5)];

1003 step[5] = input[5] + input[(32 - 6)];	1013 step[5] = input[5] + input[(32 - 6)];

1004 step[6] = input[6] + input[(32 - 7)];	1014 step[6] = input[6] + input[(32 - 7)];

1005 step[7] = input[7] + input[(32 - 8)];	1015 step[7] = input[7] + input[(32 - 8)];

(...skipping 88 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1094 step[23] = -output[23] + output[16];	1104 step[23] = -output[23] + output[16];

1095 step[24] = -output[24] + output[31];	1105 step[24] = -output[24] + output[31];

1096 step[25] = -output[25] + output[30];	1106 step[25] = -output[25] + output[30];

1097 step[26] = -output[26] + output[29];	1107 step[26] = -output[26] + output[29];

1098 step[27] = -output[27] + output[28];	1108 step[27] = -output[27] + output[28];

1099 step[28] = output[28] + output[27];	1109 step[28] = output[28] + output[27];

1100 step[29] = output[29] + output[26];	1110 step[29] = output[29] + output[26];

1101 step[30] = output[30] + output[25];	1111 step[30] = output[30] + output[25];

1102 step[31] = output[31] + output[24];	1112 step[31] = output[31] + output[24];

1103	1113

	1114 // dump the magnitude by half, hence the intermediate values are within 1108

	1115 // the range of 16 bits.

	1116 if (round) {

	1117 step[0] = half_round_shift(step[0]);

	1118 step[1] = half_round_shift(step[1]);

	1119 step[2] = half_round_shift(step[2]);

	1120 step[3] = half_round_shift(step[3]);

	1121 step[4] = half_round_shift(step[4]);

	1122 step[5] = half_round_shift(step[5]);

	1123 step[6] = half_round_shift(step[6]);

	1124 step[7] = half_round_shift(step[7]);

	1125 step[8] = half_round_shift(step[8]);

	1126 step[9] = half_round_shift(step[9]);

	1127 step[10] = half_round_shift(step[10]);

	1128 step[11] = half_round_shift(step[11]);

	1129 step[12] = half_round_shift(step[12]);

	1130 step[13] = half_round_shift(step[13]);

	1131 step[14] = half_round_shift(step[14]);

	1132 step[15] = half_round_shift(step[15]);

	1133

	1134 step[16] = half_round_shift(step[16]);

	1135 step[17] = half_round_shift(step[17]);

	1136 step[18] = half_round_shift(step[18]);

	1137 step[19] = half_round_shift(step[19]);

	1138 step[20] = half_round_shift(step[20]);

	1139 step[21] = half_round_shift(step[21]);

	1140 step[22] = half_round_shift(step[22]);

	1141 step[23] = half_round_shift(step[23]);

	1142 step[24] = half_round_shift(step[24]);

	1143 step[25] = half_round_shift(step[25]);

	1144 step[26] = half_round_shift(step[26]);

	1145 step[27] = half_round_shift(step[27]);

	1146 step[28] = half_round_shift(step[28]);

	1147 step[29] = half_round_shift(step[29]);

	1148 step[30] = half_round_shift(step[30]);

	1149 step[31] = half_round_shift(step[31]);

	1150 }

	1151

1104 // Stage 4	1152 // Stage 4

1105 output[0] = step[0] + step[3];	1153 output[0] = step[0] + step[3];

1106 output[1] = step[1] + step[2];	1154 output[1] = step[1] + step[2];

1107 output[2] = -step[2] + step[1];	1155 output[2] = -step[2] + step[1];

1108 output[3] = -step[3] + step[0];	1156 output[3] = -step[3] + step[0];

1109 output[4] = step[4];	1157 output[4] = step[4];

1110 output[5] = dct_32_round((-step[5] + step[6]) * cospi_16_64);	1158 output[5] = dct_32_round((-step[5] + step[6]) * cospi_16_64);

1111 output[6] = dct_32_round((step[6] + step[5]) * cospi_16_64);	1159 output[6] = dct_32_round((step[6] + step[5]) * cospi_16_64);

1112 output[7] = step[7];	1160 output[7] = step[7];

1113 output[8] = step[8] + step[11];	1161 output[8] = step[8] + step[11];

(...skipping 162 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1276 output[15] = dct_32_round(step[30] * cospi_15_64 + step[17] * -cospi_17_64);	1324 output[15] = dct_32_round(step[30] * cospi_15_64 + step[17] * -cospi_17_64);

1277 output[31] = dct_32_round(step[31] * cospi_31_64 + step[16] * -cospi_1_64);	1325 output[31] = dct_32_round(step[31] * cospi_31_64 + step[16] * -cospi_1_64);

1278 }	1326 }

1279	1327

1280 void vp9_short_fdct32x32_c(int16_t input, int16_t out, int pitch) {	1328 void vp9_short_fdct32x32_c(int16_t input, int16_t out, int pitch) {

1281 int shortpitch = pitch >> 1;	1329 int shortpitch = pitch >> 1;

1282 int i, j;	1330 int i, j;

1283 int output[32 * 32];	1331 int output[32 * 32];

1284	1332

1285 // Columns	1333 // Columns

1286 for (i = 0; i < 32; i++) {	1334 for (i = 0; i < 32; ++i) {

1287 int temp_in[32], temp_out[32];	1335 int temp_in[32], temp_out[32];

1288 for (j = 0; j < 32; j++)	1336 for (j = 0; j < 32; ++j)

1289 temp_in[j] = input[j * shortpitch + i] << 2;	1337 temp_in[j] = input[j * shortpitch + i] << 2;

1290 dct32_1d(temp_in, temp_out);	1338 dct32_1d(temp_in, temp_out, 0);

1291 for (j = 0; j < 32; j++)	1339 for (j = 0; j < 32; ++j)

1292 output[j * 32 + i] = (temp_out[j] + 1 + (temp_out[j] > 0)) >> 2;	1340 output[j * 32 + i] = (temp_out[j] + 1 + (temp_out[j] > 0)) >> 2;

1293 }	1341 }

1294	1342

1295 // Rows	1343 // Rows

1296 for (i = 0; i < 32; ++i) {	1344 for (i = 0; i < 32; ++i) {

1297 int temp_in[32], temp_out[32];	1345 int temp_in[32], temp_out[32];

1298 for (j = 0; j < 32; ++j)	1346 for (j = 0; j < 32; ++j)

1299 temp_in[j] = output[j + i * 32];	1347 temp_in[j] = output[j + i * 32];

1300 dct32_1d(temp_in, temp_out);	1348 dct32_1d(temp_in, temp_out, 0);

1301 for (j = 0; j < 32; ++j)	1349 for (j = 0; j < 32; ++j)

1302 out[j + i * 32] = (temp_out[j] + 1 + (temp_out[j] < 0)) >> 2;	1350 out[j + i * 32] = (temp_out[j] + 1 + (temp_out[j] < 0)) >> 2;

1303 }	1351 }

1304 }	1352 }

	1353

	1354 // Note that although we use dct_32_round in dct32_1d computation flow,

	1355 // this 2d fdct32x32 for rate-distortion optimization loop is operating

	1356 // within 16 bits precision.

	1357 void vp9_short_fdct32x32_rd_c(int16_t input, int16_t out, int pitch) {

	1358 int shortpitch = pitch >> 1;

	1359 int i, j;

	1360 int output[32 * 32];

	1361

	1362 // Columns

	1363 for (i = 0; i < 32; ++i) {

	1364 int temp_in[32], temp_out[32];

	1365 for (j = 0; j < 32; ++j)

	1366 temp_in[j] = input[j * shortpitch + i] << 2;

	1367 dct32_1d(temp_in, temp_out, 0);

	1368 for (j = 0; j < 32; ++j)

	1369 output[j * 32 + i] = (temp_out[j] + 1 + (temp_out[j] > 0)) >> 2;

	1370 }

	1371

	1372 // Rows

	1373 for (i = 0; i < 32; ++i) {

	1374 int temp_in[32], temp_out[32];

	1375 for (j = 0; j < 32; ++j)

	1376 temp_in[j] = output[j + i * 32];

	1377 dct32_1d(temp_in, temp_out, 1);

	1378 for (j = 0; j < 32; ++j)

	1379 out[j + i * 32] = temp_out[j];

	1380 }

	1381 }

OLD	NEW

« no previous file with comments | « source/libvpx/vp9/encoder/vp9_block.h ('k') | source/libvpx/vp9/encoder/vp9_encodeframe.c » ('j') | no next file with comments »