source/libvpx/vp9/common/vp9_idctllm.c - Issue 11974002: libvpx: Pull from upstream

Side by Side Diff: source/libvpx/vp9/common/vp9_idctllm.c

Issue 11974002: libvpx: Pull from upstream (Closed) Base URL: svn://chrome-svn/chrome/trunk/deps/third_party/libvpx/

Patch Set: Created 7 years, 11 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 /*	1 /*

2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved.	2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved.

3 *	3 *

4 * Use of this source code is governed by a BSD-style license	4 * Use of this source code is governed by a BSD-style license

5 * that can be found in the LICENSE file in the root of the source	5 * that can be found in the LICENSE file in the root of the source

6 * tree. An additional intellectual property rights grant can be found	6 * tree. An additional intellectual property rights grant can be found

7 * in the file PATENTS. All contributing project authors may	7 * in the file PATENTS. All contributing project authors may

8 * be found in the AUTHORS file in the root of the source tree.	8 * be found in the AUTHORS file in the root of the source tree.

9 */	9 */

10	10

11	11

12 /****************************************************************************	12 /****************************************************************************

13 * Notes:	13 * Notes:

14 *	14 *

15 * This implementation makes use of 16 bit fixed point verio of two multiply	15 * This implementation makes use of 16 bit fixed point verio of two multiply

16 * constants:	16 * constants:

17 * 1. sqrt(2) * cos (pi/8)	17 * 1. sqrt(2) * cos (pi/8)

18 * 2. sqrt(2) * sin (pi/8)	18 * 2. sqrt(2) * sin (pi/8)

19 * Becuase the first constant is bigger than 1, to maintain the same 16 bit	19 * Becuase the first constant is bigger than 1, to maintain the same 16 bit

20 * fixed point precision as the second one, we use a trick of	20 * fixed point precision as the second one, we use a trick of

21 * x * a = x + x*(a-1)	21 * x * a = x + x*(a-1)

22 * so	22 * so

23 * x * sqrt(2) * cos (pi/8) = x + x * (sqrt(2) *cos(pi/8)-1).	23 * x * sqrt(2) * cos (pi/8) = x + x * (sqrt(2) *cos(pi/8)-1).

24 **************************************************************************/	24 **************************************************************************/

25 #include <assert.h>	25 #include <assert.h>

26 #include <math.h>	26 #include <math.h>

27 #include "vpx_ports/config.h"	27 #include "./vpx_config.h"

28 #include "vp9/common/vp9_systemdependent.h"	28 #include "vp9/common/vp9_systemdependent.h"

29

30 #include "vp9/common/vp9_blockd.h"	29 #include "vp9/common/vp9_blockd.h"

	30 #include "vp9/common/vp9_common.h"

31	31

32 static const int cospi8sqrt2minus1 = 20091;	32 static const int cospi8sqrt2minus1 = 20091;

33 static const int sinpi8sqrt2 = 35468;	33 static const int sinpi8sqrt2 = 35468;

34 static const int rounding = 0;	34 static const int rounding = 0;

35	35

36 static const int16_t idct_i4[16] = {	36 static const int16_t idct_i4[16] = {

37 8192, 10703, 8192, 4433,	37 8192, 10703, 8192, 4433,

38 8192, 4433, -8192, -10703,	38 8192, 4433, -8192, -10703,

39 8192, -4433, -8192, 10703,	39 8192, -4433, -8192, 10703,

40 8192, -10703, 8192, -4433	40 8192, -10703, 8192, -4433

(...skipping 111 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
152 5543, -4311, 2120, 542, -3084, 4940, -5698, 5189,	152 5543, -4311, 2120, 542, -3084, 4940, -5698, 5189,

153 -3526, 1080, 1607, -3936, 5390, -5646, 4646, -2614,	153 -3526, 1080, 1607, -3936, 5390, -5646, 4646, -2614,

154 5646, -5189, 4311, -3084, 1607, 0, -1607, 3084,	154 5646, -5189, 4311, -3084, 1607, 0, -1607, 3084,

155 -4311, 5189, -5646, 5646, -5189, 4311, -3084, 1607,	155 -4311, 5189, -5646, 5646, -5189, 4311, -3084, 1607,

156 5698, -5646, 5543, -5390, 5189, -4940, 4646, -4311,	156 5698, -5646, 5543, -5390, 5189, -4940, 4646, -4311,

157 3936, -3526, 3084, -2614, 2120, -1607, 1080, -542	157 3936, -3526, 3084, -2614, 2120, -1607, 1080, -542

158 };	158 };

159	159

160	160

161 /* Converted the transforms to integer form. */	161 /* Converted the transforms to integer form. */

162 #define VERTICAL_SHIFT 14 // 16	162 #define HORIZONTAL_SHIFT 14 // 16

	163 #define HORIZONTAL_ROUNDING ((1 << (HORIZONTAL_SHIFT - 1)) - 1)

	164 #define VERTICAL_SHIFT 17 // 15

163 #define VERTICAL_ROUNDING ((1 << (VERTICAL_SHIFT - 1)) - 1)	165 #define VERTICAL_ROUNDING ((1 << (VERTICAL_SHIFT - 1)) - 1)

164 #define HORIZONTAL_SHIFT 17 // 15

165 #define HORIZONTAL_ROUNDING ((1 << (HORIZONTAL_SHIFT - 1)) - 1)

166 void vp9_ihtllm_c(const int16_t input, int16_t output, int pitch,	166 void vp9_ihtllm_c(const int16_t input, int16_t output, int pitch,

167 TX_TYPE tx_type, int tx_dim, uint16_t eobs) {	167 TX_TYPE tx_type, int tx_dim, uint16_t eobs) {

168 int i, j, k;	168 int i, j, k;

169 int nz_dim;	169 int nz_dim;

170 int16_t imbuf[256];	170 int16_t imbuf[256];

171	171

172 const int16_t *ip = input;	172 const int16_t *ip = input;

173 int16_t *op = output;	173 int16_t *op = output;

174 int16_t *im = &imbuf[0];	174 int16_t *im = &imbuf[0];

175	175

(...skipping 35 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
211 vpx_memset(im, 0, 512);	211 vpx_memset(im, 0, 512);

212 nz_dim = 8;	212 nz_dim = 8;

213 if(eobs < 3) {	213 if(eobs < 3) {

214 nz_dim = 2;	214 nz_dim = 2;

215 } else if(eobs < 10) {	215 } else if(eobs < 10) {

216 nz_dim = 4;	216 nz_dim = 4;

217 }	217 }

218 }	218 }

219 }	219 }

220	220

221 /* vertical transformation */	221 /* 2-D inverse transform X = M1ZTransposed_M2 is calculated in 2 steps

	222 * from right to left:

	223 * 1. horizontal transform: Y= Z*Transposed_M2

	224 * 2. vertical transform: X = M1*Y

	225 * In SIMD, doing this way could eliminate the transpose needed if it is

	226 * calculated from left to right.

	227 */

	228 /* Horizontal transformation */

222 for (j = 0; j < tx_dim; j++) {	229 for (j = 0; j < tx_dim; j++) {

223 for (i = 0; i < nz_dim; i++) {	230 for (i = 0; i < nz_dim; i++) {

224 int temp = 0;	231 int temp = 0;

225	232

226 for (k = 0; k < nz_dim; k++) {	233 for (k = 0; k < nz_dim; k++) {

227 temp += ptv[k] * ip[(k * tx_dim)];	234 temp += ip[k] * pth[k];

228 }	235 }

229	236

230 im[i] = (int16_t)((temp + VERTICAL_ROUNDING) >> VERTICAL_SHIFT);	237 /* Calculate im and store it in its transposed position. */

231 ip++;	238 im[i] = (int16_t)((temp + HORIZONTAL_ROUNDING) >> HORIZONTAL_SHIFT);

	239 ip += tx_dim;

232 }	240 }

233 im += tx_dim; // 16	241 im += tx_dim;

234 ptv += tx_dim;	242 pth += tx_dim;

235 ip = input;	243 ip = input;

236 }	244 }

237	245

238 /* horizontal transformation */	246 /* Vertical transformation */

239 im = &imbuf[0];	247 im = &imbuf[0];

240	248

241 for (j = 0; j < tx_dim; j++) {	249 for (i = 0; i < tx_dim; i++) {

242 const int16_t *pthc = pth;	250 for (j = 0; j < tx_dim; j++) {

243

244 for (i = 0; i < tx_dim; i++) {

245 int temp = 0;	251 int temp = 0;

246	252

247 for (k = 0; k < nz_dim; k++) {	253 for (k = 0; k < nz_dim; k++) {

248 temp += im[k] * pthc[k];	254 temp += ptv[k] * im[k];

249 }	255 }

250	256

251 op[i] = (int16_t)((temp + HORIZONTAL_ROUNDING) >> HORIZONTAL_SHIFT);	257 op[j] = (int16_t)((temp + VERTICAL_ROUNDING) >> VERTICAL_SHIFT);

252 pthc += tx_dim;	258 im += tx_dim;

253 }	259 }

254	260 im = &imbuf[0];

255 im += tx_dim; // 16	261 ptv += tx_dim;

256 op += shortpitch;	262 op += shortpitch;

257 }	263 }

258 }	264 }

259	265

260 void vp9_short_idct4x4llm_c(short input, short output, int pitch) {	266 void vp9_short_idct4x4llm_c(int16_t input, int16_t output, int pitch) {

261 int i;	267 int i;

262 int a1, b1, c1, d1;	268 int a1, b1, c1, d1;

263	269

264 short *ip = input;	270 int16_t *ip = input;

265 short *op = output;	271 int16_t *op = output;

266 int temp1, temp2;	272 int temp1, temp2;

267 int shortpitch = pitch >> 1;	273 int shortpitch = pitch >> 1;

268	274

269 for (i = 0; i < 4; i++) {	275 for (i = 0; i < 4; i++) {

270 a1 = ip[0] + ip[8];	276 a1 = ip[0] + ip[8];

271 b1 = ip[0] - ip[8];	277 b1 = ip[0] - ip[8];

272	278

273 temp1 = (ip[4] * sinpi8sqrt2 + rounding) >> 16;	279 temp1 = (ip[4] * sinpi8sqrt2 + rounding) >> 16;

274 temp2 = ip[12] + ((ip[12] * cospi8sqrt2minus1 + rounding) >> 16);	280 temp2 = ip[12] + ((ip[12] * cospi8sqrt2minus1 + rounding) >> 16);

275 c1 = temp1 - temp2;	281 c1 = temp1 - temp2;

(...skipping 31 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
307 op[3] = (a1 - d1 + 16) >> 5;	313 op[3] = (a1 - d1 + 16) >> 5;

308	314

309 op[1] = (b1 + c1 + 16) >> 5;	315 op[1] = (b1 + c1 + 16) >> 5;

310 op[2] = (b1 - c1 + 16) >> 5;	316 op[2] = (b1 - c1 + 16) >> 5;

311	317

312 ip += shortpitch;	318 ip += shortpitch;

313 op += shortpitch;	319 op += shortpitch;

314 }	320 }

315 }	321 }

316	322

317 void vp9_short_idct4x4llm_1_c(short input, short output, int pitch) {	323 void vp9_short_idct4x4llm_1_c(int16_t input, int16_t output, int pitch) {

318 int i;	324 int i;

319 int a1;	325 int a1;

320 short *op = output;	326 int16_t *op = output;

321 int shortpitch = pitch >> 1;	327 int shortpitch = pitch >> 1;

322 a1 = ((input[0] + 16) >> 5);	328 a1 = ((input[0] + 16) >> 5);

323 for (i = 0; i < 4; i++) {	329 for (i = 0; i < 4; i++) {

324 op[0] = a1;	330 op[0] = a1;

325 op[1] = a1;	331 op[1] = a1;

326 op[2] = a1;	332 op[2] = a1;

327 op[3] = a1;	333 op[3] = a1;

328 op += shortpitch;	334 op += shortpitch;

329 }	335 }

330 }	336 }

331	337

332 void vp9_dc_only_idct_add_c(short input_dc, unsigned char *pred_ptr,	338 void vp9_dc_only_idct_add_c(int input_dc, uint8_t *pred_ptr,

333 unsigned char *dst_ptr, int pitch, int stride) {	339 uint8_t *dst_ptr, int pitch, int stride) {

334 int a1 = ((input_dc + 16) >> 5);	340 int a1 = ((input_dc + 16) >> 5);

335 int r, c;	341 int r, c;

336	342

337 for (r = 0; r < 4; r++) {	343 for (r = 0; r < 4; r++) {

338 for (c = 0; c < 4; c++) {	344 for (c = 0; c < 4; c++) {

339 int a = a1 + pred_ptr[c];	345 dst_ptr[c] = clip_pixel(a1 + pred_ptr[c]);

340

341 if (a < 0)

342 a = 0;

343

344 if (a > 255)

345 a = 255;

346

347 dst_ptr[c] = (unsigned char) a;

348 }	346 }

349	347

350 dst_ptr += stride;	348 dst_ptr += stride;

351 pred_ptr += pitch;	349 pred_ptr += pitch;

352 }	350 }

353 }	351 }

354	352

355 void vp9_short_inv_walsh4x4_c(short input, short output) {	353 void vp9_short_inv_walsh4x4_c(int16_t input, int16_t output) {

356 int i;	354 int i;

357 int a1, b1, c1, d1;	355 int a1, b1, c1, d1;

358 short *ip = input;	356 int16_t *ip = input;

359 short *op = output;	357 int16_t *op = output;

360	358

361 for (i = 0; i < 4; i++) {	359 for (i = 0; i < 4; i++) {

362 a1 = ((ip[0] + ip[3]));	360 a1 = ((ip[0] + ip[3]));

363 b1 = ((ip[1] + ip[2]));	361 b1 = ((ip[1] + ip[2]));

364 c1 = ((ip[1] - ip[2]));	362 c1 = ((ip[1] - ip[2]));

365 d1 = ((ip[0] - ip[3]));	363 d1 = ((ip[0] - ip[3]));

366	364

367 op[0] = (a1 + b1 + 1) >> 1;	365 op[0] = (a1 + b1 + 1) >> 1;

368 op[1] = (c1 + d1) >> 1;	366 op[1] = (c1 + d1) >> 1;

369 op[2] = (a1 - b1) >> 1;	367 op[2] = (a1 - b1) >> 1;

(...skipping 12 matching lines...) Expand all Loading...
382 d1 = ip[0] - ip[12];	380 d1 = ip[0] - ip[12];

383 op[0] = (a1 + b1 + 1) >> 1;	381 op[0] = (a1 + b1 + 1) >> 1;

384 op[4] = (c1 + d1) >> 1;	382 op[4] = (c1 + d1) >> 1;

385 op[8] = (a1 - b1) >> 1;	383 op[8] = (a1 - b1) >> 1;

386 op[12] = (d1 - c1) >> 1;	384 op[12] = (d1 - c1) >> 1;

387 ip++;	385 ip++;

388 op++;	386 op++;

389 }	387 }

390 }	388 }

391	389

392 void vp9_short_inv_walsh4x4_1_c(short in, short out) {	390 void vp9_short_inv_walsh4x4_1_c(int16_t in, int16_t out) {

393 int i;	391 int i;

394 short tmp[4];	392 int16_t tmp[4];

395 short *ip = in;	393 int16_t *ip = in;

396 short *op = tmp;	394 int16_t *op = tmp;

397	395

398 op[0] = (ip[0] + 1) >> 1;	396 op[0] = (ip[0] + 1) >> 1;

399 op[1] = op[2] = op[3] = (ip[0] >> 1);	397 op[1] = op[2] = op[3] = (ip[0] >> 1);

400	398

401 ip = tmp;	399 ip = tmp;

402 op = out;	400 op = out;

403 for (i = 0; i < 4; i++) {	401 for (i = 0; i < 4; i++) {

404 op[0] = (ip[0] + 1) >> 1;	402 op[0] = (ip[0] + 1) >> 1;

405 op[4] = op[8] = op[12] = (ip[0] >> 1);	403 op[4] = op[8] = op[12] = (ip[0] >> 1);

406 ip++;	404 ip++;

407 op++;	405 op++;

408 }	406 }

409 }	407 }

410	408

411 #if CONFIG_LOSSLESS	409 #if CONFIG_LOSSLESS

412 void vp9_short_inv_walsh4x4_lossless_c(short input, short output) {	410 void vp9_short_inv_walsh4x4_lossless_c(int16_t input, int16_t output) {

413 int i;	411 int i;

414 int a1, b1, c1, d1;	412 int a1, b1, c1, d1;

415 short *ip = input;	413 int16_t *ip = input;

416 short *op = output;	414 int16_t *op = output;

417	415

418 for (i = 0; i < 4; i++) {	416 for (i = 0; i < 4; i++) {

419 a1 = ((ip[0] + ip[3])) >> Y2_WHT_UPSCALE_FACTOR;	417 a1 = ((ip[0] + ip[3])) >> Y2_WHT_UPSCALE_FACTOR;

420 b1 = ((ip[1] + ip[2])) >> Y2_WHT_UPSCALE_FACTOR;	418 b1 = ((ip[1] + ip[2])) >> Y2_WHT_UPSCALE_FACTOR;

421 c1 = ((ip[1] - ip[2])) >> Y2_WHT_UPSCALE_FACTOR;	419 c1 = ((ip[1] - ip[2])) >> Y2_WHT_UPSCALE_FACTOR;

422 d1 = ((ip[0] - ip[3])) >> Y2_WHT_UPSCALE_FACTOR;	420 d1 = ((ip[0] - ip[3])) >> Y2_WHT_UPSCALE_FACTOR;

423	421

424 op[0] = (a1 + b1 + 1) >> 1;	422 op[0] = (a1 + b1 + 1) >> 1;

425 op[1] = (c1 + d1) >> 1;	423 op[1] = (c1 + d1) >> 1;

426 op[2] = (a1 - b1) >> 1;	424 op[2] = (a1 - b1) >> 1;

(...skipping 15 matching lines...) Expand all Loading...
442 op[0] = ((a1 + b1 + 1) >> 1) << Y2_WHT_UPSCALE_FACTOR;	440 op[0] = ((a1 + b1 + 1) >> 1) << Y2_WHT_UPSCALE_FACTOR;

443 op[4] = ((c1 + d1) >> 1) << Y2_WHT_UPSCALE_FACTOR;	441 op[4] = ((c1 + d1) >> 1) << Y2_WHT_UPSCALE_FACTOR;

444 op[8] = ((a1 - b1) >> 1) << Y2_WHT_UPSCALE_FACTOR;	442 op[8] = ((a1 - b1) >> 1) << Y2_WHT_UPSCALE_FACTOR;

445 op[12] = ((d1 - c1) >> 1) << Y2_WHT_UPSCALE_FACTOR;	443 op[12] = ((d1 - c1) >> 1) << Y2_WHT_UPSCALE_FACTOR;

446	444

447 ip++;	445 ip++;

448 op++;	446 op++;

449 }	447 }

450 }	448 }

451	449

452 void vp9_short_inv_walsh4x4_1_lossless_c(short in, short out) {	450 void vp9_short_inv_walsh4x4_1_lossless_c(int16_t in, int16_t out) {

453 int i;	451 int i;

454 short tmp[4];	452 int16_t tmp[4];

455 short *ip = in;	453 int16_t *ip = in;

456 short *op = tmp;	454 int16_t *op = tmp;

457	455

458 op[0] = ((ip[0] >> Y2_WHT_UPSCALE_FACTOR) + 1) >> 1;	456 op[0] = ((ip[0] >> Y2_WHT_UPSCALE_FACTOR) + 1) >> 1;

459 op[1] = op[2] = op[3] = ((ip[0] >> Y2_WHT_UPSCALE_FACTOR) >> 1);	457 op[1] = op[2] = op[3] = ((ip[0] >> Y2_WHT_UPSCALE_FACTOR) >> 1);

460	458

461 ip = tmp;	459 ip = tmp;

462 op = out;	460 op = out;

463 for (i = 0; i < 4; i++) {	461 for (i = 0; i < 4; i++) {

464 op[0] = ((ip[0] + 1) >> 1) << Y2_WHT_UPSCALE_FACTOR;	462 op[0] = ((ip[0] + 1) >> 1) << Y2_WHT_UPSCALE_FACTOR;

465 op[4] = op[8] = op[12] = ((ip[0] >> 1)) << Y2_WHT_UPSCALE_FACTOR;	463 op[4] = op[8] = op[12] = ((ip[0] >> 1)) << Y2_WHT_UPSCALE_FACTOR;

466 ip++;	464 ip++;

467 op++;	465 op++;

468 }	466 }

469 }	467 }

470	468

471 void vp9_short_inv_walsh4x4_x8_c(short input, short output, int pitch) {	469 void vp9_short_inv_walsh4x4_x8_c(int16_t input, int16_t output, int pitch) {

472 int i;	470 int i;

473 int a1, b1, c1, d1;	471 int a1, b1, c1, d1;

474 short *ip = input;	472 int16_t *ip = input;

475 short *op = output;	473 int16_t *op = output;

476 int shortpitch = pitch >> 1;	474 int shortpitch = pitch >> 1;

477	475

478 for (i = 0; i < 4; i++) {	476 for (i = 0; i < 4; i++) {

479 a1 = ((ip[0] + ip[3])) >> WHT_UPSCALE_FACTOR;	477 a1 = ((ip[0] + ip[3])) >> WHT_UPSCALE_FACTOR;

480 b1 = ((ip[1] + ip[2])) >> WHT_UPSCALE_FACTOR;	478 b1 = ((ip[1] + ip[2])) >> WHT_UPSCALE_FACTOR;

481 c1 = ((ip[1] - ip[2])) >> WHT_UPSCALE_FACTOR;	479 c1 = ((ip[1] - ip[2])) >> WHT_UPSCALE_FACTOR;

482 d1 = ((ip[0] - ip[3])) >> WHT_UPSCALE_FACTOR;	480 d1 = ((ip[0] - ip[3])) >> WHT_UPSCALE_FACTOR;

483	481

484 op[0] = (a1 + b1 + 1) >> 1;	482 op[0] = (a1 + b1 + 1) >> 1;

485 op[1] = (c1 + d1) >> 1;	483 op[1] = (c1 + d1) >> 1;

(...skipping 16 matching lines...) Expand all Loading...
502 op[shortpitch * 0] = (a1 + b1 + 1) >> 1;	500 op[shortpitch * 0] = (a1 + b1 + 1) >> 1;

503 op[shortpitch * 1] = (c1 + d1) >> 1;	501 op[shortpitch * 1] = (c1 + d1) >> 1;

504 op[shortpitch * 2] = (a1 - b1) >> 1;	502 op[shortpitch * 2] = (a1 - b1) >> 1;

505 op[shortpitch * 3] = (d1 - c1) >> 1;	503 op[shortpitch * 3] = (d1 - c1) >> 1;

506	504

507 ip++;	505 ip++;

508 op++;	506 op++;

509 }	507 }

510 }	508 }

511	509

512 void vp9_short_inv_walsh4x4_1_x8_c(short in, short out, int pitch) {	510 void vp9_short_inv_walsh4x4_1_x8_c(int16_t in, int16_t out, int pitch) {

513 int i;	511 int i;

514 short tmp[4];	512 int16_t tmp[4];

515 short *ip = in;	513 int16_t *ip = in;

516 short *op = tmp;	514 int16_t *op = tmp;

517 int shortpitch = pitch >> 1;	515 int shortpitch = pitch >> 1;

518	516

519 op[0] = ((ip[0] >> WHT_UPSCALE_FACTOR) + 1) >> 1;	517 op[0] = ((ip[0] >> WHT_UPSCALE_FACTOR) + 1) >> 1;

520 op[1] = op[2] = op[3] = ((ip[0] >> WHT_UPSCALE_FACTOR) >> 1);	518 op[1] = op[2] = op[3] = ((ip[0] >> WHT_UPSCALE_FACTOR) >> 1);

521	519

522	520

523 ip = tmp;	521 ip = tmp;

524 op = out;	522 op = out;

525 for (i = 0; i < 4; i++) {	523 for (i = 0; i < 4; i++) {

526 op[shortpitch * 0] = (ip[0] + 1) >> 1;	524 op[shortpitch * 0] = (ip[0] + 1) >> 1;

527 op[shortpitch * 1] = op[shortpitch * 2] = op[shortpitch * 3] = ip[0] >> 1;	525 op[shortpitch * 1] = op[shortpitch * 2] = op[shortpitch * 3] = ip[0] >> 1;

528 ip++;	526 ip++;

529 op++;	527 op++;

530 }	528 }

531 }	529 }

532	530

533 void vp9_dc_only_inv_walsh_add_c(short input_dc, unsigned char *pred_ptr,	531 void vp9_dc_only_inv_walsh_add_c(short input_dc, uint8_t *pred_ptr,

534 unsigned char *dst_ptr,	532 uint8_t *dst_ptr,

535 int pitch, int stride) {	533 int pitch, int stride) {

536 int r, c;	534 int r, c;

537 short tmp[16];	535 short tmp[16];

538 vp9_short_inv_walsh4x4_1_x8_c(&input_dc, tmp, 4 << 1);	536 vp9_short_inv_walsh4x4_1_x8_c(&input_dc, tmp, 4 << 1);

539	537

540 for (r = 0; r < 4; r++) {	538 for (r = 0; r < 4; r++) {

541 for (c = 0; c < 4; c++) {	539 for (c = 0; c < 4; c++) {

542 int a = tmp[r * 4 + c] + pred_ptr[c];	540 dst_ptr[c] = clip_pixel(tmp[r * 4 + c] + pred_ptr[c]);

543 if (a < 0)

544 a = 0;

545

546 if (a > 255)

547 a = 255;

548

549 dst_ptr[c] = (unsigned char) a;

550 }	541 }

551	542

552 dst_ptr += stride;	543 dst_ptr += stride;

553 pred_ptr += pitch;	544 pred_ptr += pitch;

554 }	545 }

555 }	546 }

556 #endif	547 #endif

557	548

558 void vp9_dc_only_idct_add_8x8_c(short input_dc,	549 void vp9_dc_only_idct_add_8x8_c(short input_dc,

559 unsigned char *pred_ptr,	550 uint8_t *pred_ptr,

560 unsigned char *dst_ptr,	551 uint8_t *dst_ptr,

561 int pitch, int stride) {	552 int pitch, int stride) {

562 int a1 = ((input_dc + 16) >> 5);	553 int a1 = ((input_dc + 16) >> 5);

563 int r, c, b;	554 int r, c, b;

564 unsigned char *orig_pred = pred_ptr;	555 uint8_t *orig_pred = pred_ptr;

565 unsigned char *orig_dst = dst_ptr;	556 uint8_t *orig_dst = dst_ptr;

566 for (b = 0; b < 4; b++) {	557 for (b = 0; b < 4; b++) {

567 for (r = 0; r < 4; r++) {	558 for (r = 0; r < 4; r++) {

568 for (c = 0; c < 4; c++) {	559 for (c = 0; c < 4; c++) {

569 int a = a1 + pred_ptr[c];	560 dst_ptr[c] = clip_pixel(a1 + pred_ptr[c]);

570

571 if (a < 0)

572 a = 0;

573

574 if (a > 255)

575 a = 255;

576

577 dst_ptr[c] = (unsigned char) a;

578 }	561 }

579	562

580 dst_ptr += stride;	563 dst_ptr += stride;

581 pred_ptr += pitch;	564 pred_ptr += pitch;

582 }	565 }

583 dst_ptr = orig_dst + (b + 1) % 2 * 4 + (b + 1) / 2 * 4 * stride;	566 dst_ptr = orig_dst + (b + 1) % 2 * 4 + (b + 1) / 2 * 4 * stride;

584 pred_ptr = orig_pred + (b + 1) % 2 * 4 + (b + 1) / 2 * 4 * pitch;	567 pred_ptr = orig_pred + (b + 1) % 2 * 4 + (b + 1) / 2 * 4 * pitch;

585 }	568 }

586 }	569 }

587	570

(...skipping 67 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
655 *	638 *

656 * where: c[0] = 1/1024 c[1..7] = (1/1024)sqrt(2) /	639 * where: c[0] = 1/1024 c[1..7] = (1/1024)sqrt(2) /

657 static void idctcol(int *blk) {	640 static void idctcol(int *blk) {

658 int x0, x1, x2, x3, x4, x5, x6, x7, x8;	641 int x0, x1, x2, x3, x4, x5, x6, x7, x8;

659	642

660 /* shortcut */	643 /* shortcut */

661 if (!((x1 = (blk[8 * 4] << 8)) \| (x2 = blk[8 * 6]) \| (x3 = blk[8 * 2]) \|	644 if (!((x1 = (blk[8 * 4] << 8)) \| (x2 = blk[8 * 6]) \| (x3 = blk[8 * 2]) \|

662 (x4 = blk[8 * 1]) \| (x5 = blk[8 * 7]) \| (x6 = blk[8 * 5]) \|	645 (x4 = blk[8 * 1]) \| (x5 = blk[8 * 7]) \| (x6 = blk[8 * 5]) \|

663 (x7 = blk[8 * 3]))) {	646 (x7 = blk[8 * 3]))) {

664 blk[8 * 0] = blk[8 * 1] = blk[8 * 2] = blk[8 * 3]	647 blk[8 * 0] = blk[8 * 1] = blk[8 * 2] = blk[8 * 3]

665 = blk[8 * 4] = blk[8 * 5] = blk[8 * 6 ]	648 = blk[8 * 4] = blk[8 * 5] = blk[8 * 6]

666 = blk[8 * 7] = ((blk[8 * 0] + 32) >> 6);	649 = blk[8 * 7] = ((blk[8 * 0] + 32) >> 6);

667 return;	650 return;

668 }	651 }

669	652

670 x0 = (blk[8 * 0] << 8) + 16384;	653 x0 = (blk[8 * 0] << 8) + 16384;

671	654

672 /* first stage */	655 /* first stage */

673 x8 = W7 * (x4 + x5) + 4;	656 x8 = W7 * (x4 + x5) + 4;

674 x4 = (x8 + (W1 - W7) * x4) >> 3;	657 x4 = (x8 + (W1 - W7) * x4) >> 3;

675 x5 = (x8 - (W1 + W7) * x5) >> 3;	658 x5 = (x8 - (W1 + W7) * x5) >> 3;

676 x8 = W3 * (x6 + x7) + 4;	659 x8 = W3 * (x6 + x7) + 4;

(...skipping 24 matching lines...) Expand all Loading...
701 blk[8 * 1] = (x3 + x2) >> 14;	684 blk[8 * 1] = (x3 + x2) >> 14;

702 blk[8 * 2] = (x0 + x4) >> 14;	685 blk[8 * 2] = (x0 + x4) >> 14;

703 blk[8 * 3] = (x8 + x6) >> 14;	686 blk[8 * 3] = (x8 + x6) >> 14;

704 blk[8 * 4] = (x8 - x6) >> 14;	687 blk[8 * 4] = (x8 - x6) >> 14;

705 blk[8 * 5] = (x0 - x4) >> 14;	688 blk[8 * 5] = (x0 - x4) >> 14;

706 blk[8 * 6] = (x3 - x2) >> 14;	689 blk[8 * 6] = (x3 - x2) >> 14;

707 blk[8 * 7] = (x7 - x1) >> 14;	690 blk[8 * 7] = (x7 - x1) >> 14;

708 }	691 }

709	692

710 #define TX_DIM 8	693 #define TX_DIM 8

711 void vp9_short_idct8x8_c(short coefs, short block, int pitch) {	694 void vp9_short_idct8x8_c(int16_t coefs, int16_t block, int pitch) {

712 int X[TX_DIM * TX_DIM];	695 int X[TX_DIM * TX_DIM];

713 int i, j;	696 int i, j;

714 int shortpitch = pitch >> 1;	697 int shortpitch = pitch >> 1;

715	698

716 for (i = 0; i < TX_DIM; i++) {	699 for (i = 0; i < TX_DIM; i++) {

717 for (j = 0; j < TX_DIM; j++) {	700 for (j = 0; j < TX_DIM; j++) {

718 X[i * TX_DIM + j] = (int)(coefs[i * TX_DIM + j] + 1	701 X[i * TX_DIM + j] = (int)(coefs[i * TX_DIM + j] + 1

719 + (coefs[i * TX_DIM + j] < 0)) >> 2;	702 + (coefs[i * TX_DIM + j] < 0)) >> 2;

720 }	703 }

721 }	704 }

(...skipping 98 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
820 blk[8 * 0] = (x7 + x1) >> 14;	803 blk[8 * 0] = (x7 + x1) >> 14;

821 blk[8 * 1] = (x3 + x2) >> 14;	804 blk[8 * 1] = (x3 + x2) >> 14;

822 blk[8 * 2] = (x0 + x4) >> 14;	805 blk[8 * 2] = (x0 + x4) >> 14;

823 blk[8 * 3] = (x8 + x6) >> 14;	806 blk[8 * 3] = (x8 + x6) >> 14;

824 blk[8 * 4] = (x8 - x6) >> 14;	807 blk[8 * 4] = (x8 - x6) >> 14;

825 blk[8 * 5] = (x0 - x4) >> 14;	808 blk[8 * 5] = (x0 - x4) >> 14;

826 blk[8 * 6] = (x3 - x2) >> 14;	809 blk[8 * 6] = (x3 - x2) >> 14;

827 blk[8 * 7] = (x7 - x1) >> 14;	810 blk[8 * 7] = (x7 - x1) >> 14;

828 }	811 }

829	812

830 void vp9_short_idct10_8x8_c(short coefs, short block, int pitch) {	813 void vp9_short_idct10_8x8_c(int16_t coefs, int16_t block, int pitch) {

831 int X[TX_DIM * TX_DIM];	814 int X[TX_DIM * TX_DIM];

832 int i, j;	815 int i, j;

833 int shortpitch = pitch >> 1;	816 int shortpitch = pitch >> 1;

834	817

835 for (i = 0; i < TX_DIM; i++) {	818 for (i = 0; i < TX_DIM; i++) {

836 for (j = 0; j < TX_DIM; j++) {	819 for (j = 0; j < TX_DIM; j++) {

837 X[i * TX_DIM + j] = (int)(coefs[i * TX_DIM + j] + 1	820 X[i * TX_DIM + j] = (int)(coefs[i * TX_DIM + j] + 1

838 + (coefs[i * TX_DIM + j] < 0)) >> 2;	821 + (coefs[i * TX_DIM + j] < 0)) >> 2;

839 }	822 }

840 }	823 }

841	824

842 /* Do first 4 row idct only since non-zero dct coefficients are all in	825 /* Do first 4 row idct only since non-zero dct coefficients are all in

843 * upper-left 4x4 area. */	826 * upper-left 4x4 area. */

844 for (i = 0; i < 4; i++)	827 for (i = 0; i < 4; i++)

845 idctrow10(X + 8 * i);	828 idctrow10(X + 8 * i);

846	829

847 for (i = 0; i < 8; i++)	830 for (i = 0; i < 8; i++)

848 idctcol10(X + i);	831 idctcol10(X + i);

849	832

850 for (i = 0; i < TX_DIM; i++) {	833 for (i = 0; i < TX_DIM; i++) {

851 for (j = 0; j < TX_DIM; j++) {	834 for (j = 0; j < TX_DIM; j++) {

852 block[i * shortpitch + j] = X[i * TX_DIM + j] >> 1;	835 block[i * shortpitch + j] = X[i * TX_DIM + j] >> 1;

853 }	836 }

854 }	837 }

855 }	838 }

856	839

857 void vp9_short_ihaar2x2_c(short input, short output, int pitch) {	840 void vp9_short_ihaar2x2_c(int16_t input, int16_t output, int pitch) {

858 int i;	841 int i;

859 short *ip = input; // 0,1, 4, 8	842 int16_t *ip = input; // 0, 1, 4, 8

860 short *op = output;	843 int16_t *op = output;

861 for (i = 0; i < 16; i++) {	844 for (i = 0; i < 16; i++) {

862 op[i] = 0;	845 op[i] = 0;

863 }	846 }

864	847

865 op[0] = (ip[0] + ip[1] + ip[4] + ip[8] + 1) >> 1;	848 op[0] = (ip[0] + ip[1] + ip[4] + ip[8] + 1) >> 1;

866 op[1] = (ip[0] - ip[1] + ip[4] - ip[8]) >> 1;	849 op[1] = (ip[0] - ip[1] + ip[4] - ip[8]) >> 1;

867 op[4] = (ip[0] + ip[1] - ip[4] - ip[8]) >> 1;	850 op[4] = (ip[0] + ip[1] - ip[4] - ip[8]) >> 1;

868 op[8] = (ip[0] - ip[1] - ip[4] + ip[8]) >> 1;	851 op[8] = (ip[0] - ip[1] - ip[4] + ip[8]) >> 1;

869 }	852 }

870	853

871	854

872 #if 0	855 #if 0

873 // Keep a really bad float version as reference for now.	856 // Keep a really bad float version as reference for now.

874 void vp9_short_idct16x16_c(short input, short output, int pitch) {	857 void vp9_short_idct16x16_c(int16_t input, int16_t output, int pitch) {

875	858

876 vp9_clear_system_state(); // Make it simd safe : __asm emms;	859 vp9_clear_system_state(); // Make it simd safe : __asm emms;

877 {	860 {

878 double x;	861 double x;

879 const int short_pitch = pitch >> 1;	862 const int short_pitch = pitch >> 1;

880 int i, j, k, l;	863 int i, j, k, l;

881 for (l = 0; l < 16; ++l) {	864 for (l = 0; l < 16; ++l) {

882 for (k = 0; k < 16; ++k) {	865 for (k = 0; k < 16; ++k) {

883 double s = 0;	866 double s = 0;

884 for (i = 0; i < 16; ++i) {	867 for (i = 0; i < 16; ++i) {

885 for (j = 0; j < 16; ++j) {	868 for (j = 0; j < 16; ++j) {

886 x=cos(PIj(l+0.5)/16.0)cos(PIi(k+0.5)/16.0)input[i*16+j]/32;	869 x=cos(PIj(l+0.5)/16.0)cos(PIi(k+0.5)/16.0)input[i*16+j]/32;

887 if (i != 0)	870 if (i != 0)

888 x *= sqrt(2.0);	871 x *= sqrt(2.0);

889 if (j != 0)	872 if (j != 0)

890 x *= sqrt(2.0);	873 x *= sqrt(2.0);

891 s += x;	874 s += x;

892 }	875 }

893 }	876 }

894 output[k*short_pitch+l] = (short)round(s);	877 output[k*short_pitch+l] = (short)round(s);

895 }	878 }

896 }	879 }

897 }	880 }

898 vp9_clear_system_state(); // Make it simd safe : __asm emms;	881 vp9_clear_system_state(); // Make it simd safe : __asm emms;

899 }	882 }

900 #endif	883 #endif

901	884

902 #define TEST_INT_16x16_IDCT 1	885 #define TEST_INT_16x16_IDCT 1

903 #if !TEST_INT_16x16_IDCT	886 #if !TEST_INT_16x16_IDCT

904 static const double C1 = 0.995184726672197;

905 static const double C2 = 0.98078528040323;

906 static const double C3 = 0.956940335732209;

907 static const double C4 = 0.923879532511287;

908 static const double C5 = 0.881921264348355;

909 static const double C6 = 0.831469612302545;

910 static const double C7 = 0.773010453362737;

911 static const double C8 = 0.707106781186548;

912 static const double C9 = 0.634393284163646;

913 static const double C10 = 0.555570233019602;

914 static const double C11 = 0.471396736825998;

915 static const double C12 = 0.38268343236509;

916 static const double C13 = 0.290284677254462;

917 static const double C14 = 0.195090322016128;

918 static const double C15 = 0.098017140329561;

919

920	887

921 static void butterfly_16x16_idct_1d(double input[16], double output[16]) {	888 static void butterfly_16x16_idct_1d(double input[16], double output[16]) {

922	889

	890 static const double C1 = 0.995184726672197;

	891 static const double C2 = 0.98078528040323;

	892 static const double C3 = 0.956940335732209;

	893 static const double C4 = 0.923879532511287;

	894 static const double C5 = 0.881921264348355;

	895 static const double C6 = 0.831469612302545;

	896 static const double C7 = 0.773010453362737;

	897 static const double C8 = 0.707106781186548;

	898 static const double C9 = 0.634393284163646;

	899 static const double C10 = 0.555570233019602;

	900 static const double C11 = 0.471396736825998;

	901 static const double C12 = 0.38268343236509;

	902 static const double C13 = 0.290284677254462;

	903 static const double C14 = 0.195090322016128;

	904 static const double C15 = 0.098017140329561;

	905

923 vp9_clear_system_state(); // Make it simd safe : __asm emms;	906 vp9_clear_system_state(); // Make it simd safe : __asm emms;

924 {	907 {

925 double step[16];	908 double step[16];

926 double intermediate[16];	909 double intermediate[16];

927 double temp1, temp2;	910 double temp1, temp2;

928	911

929	912

930 // step 1 and 2	913 // step 1 and 2

931 step[ 0] = input[0] + input[8];	914 step[ 0] = input[0] + input[8];

932 step[ 1] = input[0] - input[8];	915 step[ 1] = input[0] - input[8];

(...skipping 191 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1124 output[k] += input[n]cos(kPi(2k+1)n/32.0);	1107 output[k] += input[n]cos(kPi(2k+1)n/32.0);

1125 if (n == 0)	1108 if (n == 0)

1126 output[k] = output[k]/kSqrt2;	1109 output[k] = output[k]/kSqrt2;

1127 }	1110 }

1128 }	1111 }

1129 }	1112 }

1130 vp9_clear_system_state(); // Make it simd safe : __asm emms;	1113 vp9_clear_system_state(); // Make it simd safe : __asm emms;

1131 }	1114 }

1132 #endif	1115 #endif

1133	1116

1134 void vp9_short_idct16x16_c(short input, short output, int pitch) {	1117 void vp9_short_idct16x16_c(int16_t input, int16_t output, int pitch) {

1135	1118

1136 vp9_clear_system_state(); // Make it simd safe : __asm emms;	1119 vp9_clear_system_state(); // Make it simd safe : __asm emms;

1137 {	1120 {

1138 double out[1616], out2[1616];	1121 double out[1616], out2[1616];

1139 const int short_pitch = pitch >> 1;	1122 const int short_pitch = pitch >> 1;

1140 int i, j;	1123 int i, j;

1141 // First transform rows	1124 // First transform rows

1142 for (i = 0; i < 16; ++i) {	1125 for (i = 0; i < 16; ++i) {

1143 double temp_in[16], temp_out[16];	1126 double temp_in[16], temp_out[16];

1144 for (j = 0; j < 16; ++j)	1127 for (j = 0; j < 16; ++j)

(...skipping 11 matching lines...) Expand all Loading...
1156 for (j = 0; j < 16; ++j)	1139 for (j = 0; j < 16; ++j)

1157 out2[j*16 + i] = temp_out[j];	1140 out2[j*16 + i] = temp_out[j];

1158 }	1141 }

1159 for (i = 0; i < 16*16; ++i)	1142 for (i = 0; i < 16*16; ++i)

1160 output[i] = round(out2[i]/128);	1143 output[i] = round(out2[i]/128);

1161 }	1144 }

1162 vp9_clear_system_state(); // Make it simd safe : __asm emms;	1145 vp9_clear_system_state(); // Make it simd safe : __asm emms;

1163 }	1146 }

1164	1147

1165 #else	1148 #else

	1149

	1150 #define INITIAL_SHIFT 2

	1151 #define INITIAL_ROUNDING (1 << (INITIAL_SHIFT - 1))

	1152 #define RIGHT_SHIFT 14

	1153 #define RIGHT_ROUNDING (1 << (RIGHT_SHIFT - 1))

	1154

1166 static const int16_t C1 = 16305;	1155 static const int16_t C1 = 16305;

1167 static const int16_t C2 = 16069;	1156 static const int16_t C2 = 16069;

1168 static const int16_t C3 = 15679;	1157 static const int16_t C3 = 15679;

1169 static const int16_t C4 = 15137;	1158 static const int16_t C4 = 15137;

1170 static const int16_t C5 = 14449;	1159 static const int16_t C5 = 14449;

1171 static const int16_t C6 = 13623;	1160 static const int16_t C6 = 13623;

1172 static const int16_t C7 = 12665;	1161 static const int16_t C7 = 12665;

1173 static const int16_t C8 = 11585;	1162 static const int16_t C8 = 11585;

1174 static const int16_t C9 = 10394;	1163 static const int16_t C9 = 10394;

1175 static const int16_t C10 = 9102;	1164 static const int16_t C10 = 9102;

1176 static const int16_t C11 = 7723;	1165 static const int16_t C11 = 7723;

1177 static const int16_t C12 = 6270;	1166 static const int16_t C12 = 6270;

1178 static const int16_t C13 = 4756;	1167 static const int16_t C13 = 4756;

1179 static const int16_t C14 = 3196;	1168 static const int16_t C14 = 3196;

1180 static const int16_t C15 = 1606;	1169 static const int16_t C15 = 1606;

1181	1170

1182 #define INITIAL_SHIFT 2

1183 #define INITIAL_ROUNDING (1 << (INITIAL_SHIFT - 1))

1184 #define RIGHT_SHIFT 14

1185 #define RIGHT_ROUNDING (1 << (RIGHT_SHIFT - 1))

1186

1187 static void butterfly_16x16_idct_1d(int16_t input[16], int16_t output[16],	1171 static void butterfly_16x16_idct_1d(int16_t input[16], int16_t output[16],

1188 int last_shift_bits) {	1172 int last_shift_bits) {

1189 int16_t step[16];	1173 int16_t step[16];

1190 int intermediate[16];	1174 int intermediate[16];

1191 int temp1, temp2;	1175 int temp1, temp2;

1192	1176

1193 int step1_shift = RIGHT_SHIFT + INITIAL_SHIFT;	1177 int step1_shift = RIGHT_SHIFT + INITIAL_SHIFT;

1194 int step1_rounding = 1 << (step1_shift - 1);	1178 int step1_rounding = 1 << (step1_shift - 1);

1195 int last_rounding = 0;	1179 int last_rounding = 0;

1196	1180

1197 if (last_shift_bits > 0)	1181 if (last_shift_bits > 0)

1198 last_rounding = 1 << (last_shift_bits - 1);	1182 last_rounding = 1 << (last_shift_bits - 1);

1199	1183

1200 // step 1 and 2	1184 // step 1 and 2

1201 step[ 0] = (input[0] + input[8] + INITIAL_ROUNDING) >> INITIAL_SHIFT;	1185 step[ 0] = (input[0] + input[8] + INITIAL_ROUNDING) >> INITIAL_SHIFT;

1202 step[ 1] = (input[0] - input[8] + INITIAL_ROUNDING) >> INITIAL_SHIFT;	1186 step[ 1] = (input[0] - input[8] + INITIAL_ROUNDING) >> INITIAL_SHIFT;

1203	1187

1204 temp1 = input[4] * C12;	1188 temp1 = input[4] * C12;

1205 temp2 = input[12] * C4;	1189 temp2 = input[12] * C4;

1206 temp1 = (temp1 - temp2 + RIGHT_ROUNDING) >> RIGHT_SHIFT;	1190 temp1 = (temp1 - temp2 + RIGHT_ROUNDING) >> RIGHT_SHIFT;

1207 temp1 *= C8;	1191 temp1 *= C8;

1208 step[ 2] = (2 * (temp1) + step1_rounding) >> step1_shift;	1192 step[ 2] = (2 * (temp1) + step1_rounding) >> step1_shift;

1209	1193

1210 temp1 = input[4] * C4;	1194 temp1 = input[4] * C4;

1211 temp2 = input[12] * C12;	1195 temp2 = input[12] * C12;

1212 temp1 = (temp1 + temp2 + RIGHT_ROUNDING) >> RIGHT_SHIFT;	1196 temp1 = (temp1 + temp2 + RIGHT_ROUNDING) >> RIGHT_SHIFT;

1213 temp1 *= C8;	1197 temp1 *= C8;

1214 step[ 3] = (2 * (temp1) + step1_rounding) >> step1_shift;	1198 step[ 3] = (2 * (temp1) + step1_rounding) >> step1_shift;

1215	1199

1216 temp1 = input[2] * C8;	1200 temp1 = input[2] * C8;

1217 temp1 = (2 * (temp1) + RIGHT_ROUNDING) >> RIGHT_SHIFT;	1201 temp1 = (2 * (temp1) + RIGHT_ROUNDING) >> RIGHT_SHIFT;

1218 temp2 = input[6] + input[10];	1202 temp2 = input[6] + input[10];

1219 step[ 4] = (temp1 + temp2 + INITIAL_ROUNDING) >> INITIAL_SHIFT;	1203 step[ 4] = (temp1 + temp2 + INITIAL_ROUNDING) >> INITIAL_SHIFT;

1220 step[ 5] = (temp1 - temp2 + INITIAL_ROUNDING) >> INITIAL_SHIFT;	1204 step[ 5] = (temp1 - temp2 + INITIAL_ROUNDING) >> INITIAL_SHIFT;

1221	1205

1222 temp1 = input[14] * C8;	1206 temp1 = input[14] * C8;

1223 temp1 = (2 * (temp1) + RIGHT_ROUNDING) >> RIGHT_SHIFT;	1207 temp1 = (2 * (temp1) + RIGHT_ROUNDING) >> RIGHT_SHIFT;

1224 temp2 = input[6] - input[10];	1208 temp2 = input[6] - input[10];

1225 step[ 6] = (temp2 - temp1 + INITIAL_ROUNDING) >> INITIAL_SHIFT;	1209 step[ 6] = (temp2 - temp1 + INITIAL_ROUNDING) >> INITIAL_SHIFT;

1226 step[ 7] = (temp2 + temp1 + INITIAL_ROUNDING) >> INITIAL_SHIFT;	1210 step[ 7] = (temp2 + temp1 + INITIAL_ROUNDING) >> INITIAL_SHIFT;

1227	1211

1228 // for odd input	1212 // for odd input

1229 temp1 = input[3] * C12;	1213 temp1 = input[3] * C12;

1230 temp2 = input[13] * C4;	1214 temp2 = input[13] * C4;

1231 temp1 = (temp1 + temp2 + RIGHT_ROUNDING) >> RIGHT_SHIFT;	1215 temp1 = (temp1 + temp2 + RIGHT_ROUNDING) >> RIGHT_SHIFT;

1232 temp1 *= C8;	1216 temp1 *= C8;

1233 intermediate[ 8] = (2 * (temp1) + RIGHT_ROUNDING) >> RIGHT_SHIFT;	1217 intermediate[ 8] = (2 * (temp1) + RIGHT_ROUNDING) >> RIGHT_SHIFT;

1234	1218

1235 temp1 = input[3] * C4;	1219 temp1 = input[3] * C4;

1236 temp2 = input[13] * C12;	1220 temp2 = input[13] * C12;

1237 temp2 = (temp2 - temp1 + RIGHT_ROUNDING) >> RIGHT_SHIFT;	1221 temp2 = (temp2 - temp1 + RIGHT_ROUNDING) >> RIGHT_SHIFT;

1238 temp2 *= C8;	1222 temp2 *= C8;

1239 intermediate[ 9] = (2 * (temp2) + RIGHT_ROUNDING) >> RIGHT_SHIFT;	1223 intermediate[ 9] = (2 * (temp2) + RIGHT_ROUNDING) >> RIGHT_SHIFT;

1240	1224

1241 intermediate[10] = (2 * (input[9] * C8) + RIGHT_ROUNDING) >> RIGHT_SHIFT;	1225 intermediate[10] = (2 * (input[9] * C8) + RIGHT_ROUNDING) >> RIGHT_SHIFT;

1242 intermediate[11] = input[15] - input[1];	1226 intermediate[11] = input[15] - input[1];

1243 intermediate[12] = input[15] + input[1];	1227 intermediate[12] = input[15] + input[1];

1244 intermediate[13] = (2 * (input[7] * C8) + RIGHT_ROUNDING) >> RIGHT_SHIFT;	1228 intermediate[13] = (2 * (input[7] * C8) + RIGHT_ROUNDING) >> RIGHT_SHIFT;

1245	1229

1246 temp1 = input[11] * C12;	1230 temp1 = input[11] * C12;

1247 temp2 = input[5] * C4;	1231 temp2 = input[5] * C4;

1248 temp2 = (temp2 - temp1 + RIGHT_ROUNDING) >> RIGHT_SHIFT;	1232 temp2 = (temp2 - temp1 + RIGHT_ROUNDING) >> RIGHT_SHIFT;

1249 temp2 *= C8;	1233 temp2 *= C8;

1250 intermediate[14] = (2 * (temp2) + RIGHT_ROUNDING) >> RIGHT_SHIFT;	1234 intermediate[14] = (2 * (temp2) + RIGHT_ROUNDING) >> RIGHT_SHIFT;

1251	1235

1252 temp1 = input[11] * C4;	1236 temp1 = input[11] * C4;

1253 temp2 = input[5] * C12;	1237 temp2 = input[5] * C12;

1254 temp1 = (temp1 + temp2 + RIGHT_ROUNDING) >> RIGHT_SHIFT;	1238 temp1 = (temp1 + temp2 + RIGHT_ROUNDING) >> RIGHT_SHIFT;

1255 temp1 *= C8;	1239 temp1 *= C8;

1256 intermediate[15] = (2 * (temp1) + RIGHT_ROUNDING) >> RIGHT_SHIFT;	1240 intermediate[15] = (2 * (temp1) + RIGHT_ROUNDING) >> RIGHT_SHIFT;

1257	1241

1258 step[ 8] = (intermediate[ 8] + intermediate[14] + INITIAL_ROUNDING)	1242 step[ 8] = (intermediate[ 8] + intermediate[14] + INITIAL_ROUNDING)

1259 >> INITIAL_SHIFT;	1243 >> INITIAL_SHIFT;

1260 step[ 9] = (intermediate[ 9] + intermediate[15] + INITIAL_ROUNDING)	1244 step[ 9] = (intermediate[ 9] + intermediate[15] + INITIAL_ROUNDING)

1261 >> INITIAL_SHIFT;	1245 >> INITIAL_SHIFT;

1262 step[10] = (intermediate[10] + intermediate[11] + INITIAL_ROUNDING)	1246 step[10] = (intermediate[10] + intermediate[11] + INITIAL_ROUNDING)

1263 >> INITIAL_SHIFT;	1247 >> INITIAL_SHIFT;

1264 step[11] = (intermediate[10] - intermediate[11] + INITIAL_ROUNDING)	1248 step[11] = (intermediate[10] - intermediate[11] + INITIAL_ROUNDING)

1265 >> INITIAL_SHIFT;	1249 >> INITIAL_SHIFT;

1266 step[12] = (intermediate[12] + intermediate[13] + INITIAL_ROUNDING)	1250 step[12] = (intermediate[12] + intermediate[13] + INITIAL_ROUNDING)

1267 >> INITIAL_SHIFT;	1251 >> INITIAL_SHIFT;

1268 step[13] = (intermediate[12] - intermediate[13] + INITIAL_ROUNDING)	1252 step[13] = (intermediate[12] - intermediate[13] + INITIAL_ROUNDING)

1269 >> INITIAL_SHIFT;	1253 >> INITIAL_SHIFT;

1270 step[14] = (intermediate[ 8] - intermediate[14] + INITIAL_ROUNDING)	1254 step[14] = (intermediate[ 8] - intermediate[14] + INITIAL_ROUNDING)

1271 >> INITIAL_SHIFT;	1255 >> INITIAL_SHIFT;

1272 step[15] = (intermediate[ 9] - intermediate[15] + INITIAL_ROUNDING)	1256 step[15] = (intermediate[ 9] - intermediate[15] + INITIAL_ROUNDING)

1273 >> INITIAL_SHIFT;	1257 >> INITIAL_SHIFT;

1274	1258

1275 // step 3	1259 // step 3

1276 output[0] = step[ 0] + step[ 3];	1260 output[0] = step[ 0] + step[ 3];

1277 output[1] = step[ 1] + step[ 2];	1261 output[1] = step[ 1] + step[ 2];

1278 output[2] = step[ 1] - step[ 2];	1262 output[2] = step[ 1] - step[ 2];

1279 output[3] = step[ 0] - step[ 3];	1263 output[3] = step[ 0] - step[ 3];

1280	1264

1281 temp1 = step[ 4] * C14;	1265 temp1 = step[ 4] * C14;

1282 temp2 = step[ 7] * C2;	1266 temp2 = step[ 7] * C2;

1283 output[4] = (temp1 - temp2 + RIGHT_ROUNDING) >> RIGHT_SHIFT;	1267 output[4] = (temp1 - temp2 + RIGHT_ROUNDING) >> RIGHT_SHIFT;

1284	1268

1285 temp1 = step[ 4] * C2;	1269 temp1 = step[ 4] * C2;

1286 temp2 = step[ 7] * C14;	1270 temp2 = step[ 7] * C14;

1287 output[7] = (temp1 + temp2 + RIGHT_ROUNDING) >> RIGHT_SHIFT;	1271 output[7] = (temp1 + temp2 + RIGHT_ROUNDING) >> RIGHT_SHIFT;

1288	1272

1289 temp1 = step[ 5] * C10;	1273 temp1 = step[ 5] * C10;

1290 temp2 = step[ 6] * C6;	1274 temp2 = step[ 6] * C6;

1291 output[5] = (temp1 - temp2 + RIGHT_ROUNDING) >> RIGHT_SHIFT;	1275 output[5] = (temp1 - temp2 + RIGHT_ROUNDING) >> RIGHT_SHIFT;

1292	1276

1293 temp1 = step[ 5] * C6;	1277 temp1 = step[ 5] * C6;

1294 temp2 = step[ 6] * C10;	1278 temp2 = step[ 6] * C10;

1295 output[6] = (temp1 + temp2 + RIGHT_ROUNDING) >> RIGHT_SHIFT;	1279 output[6] = (temp1 + temp2 + RIGHT_ROUNDING) >> RIGHT_SHIFT;

1296	1280

1297 output[8] = step[ 8] + step[11];	1281 output[8] = step[ 8] + step[11];

1298 output[9] = step[ 9] + step[10];	1282 output[9] = step[ 9] + step[10];

1299 output[10] = step[ 9] - step[10];	1283 output[10] = step[ 9] - step[10];

1300 output[11] = step[ 8] - step[11];	1284 output[11] = step[ 8] - step[11];

1301 output[12] = step[12] + step[15];	1285 output[12] = step[12] + step[15];

1302 output[13] = step[13] + step[14];	1286 output[13] = step[13] + step[14];

1303 output[14] = step[13] - step[14];	1287 output[14] = step[13] - step[14];

1304 output[15] = step[12] - step[15];	1288 output[15] = step[12] - step[15];

1305	1289

1306 // output 4	1290 // output 4

1307 step[ 0] = output[0] + output[7];	1291 step[ 0] = output[0] + output[7];

1308 step[ 1] = output[1] + output[6];	1292 step[ 1] = output[1] + output[6];

1309 step[ 2] = output[2] + output[5];	1293 step[ 2] = output[2] + output[5];

1310 step[ 3] = output[3] + output[4];	1294 step[ 3] = output[3] + output[4];

1311 step[ 4] = output[3] - output[4];	1295 step[ 4] = output[3] - output[4];

1312 step[ 5] = output[2] - output[5];	1296 step[ 5] = output[2] - output[5];

1313 step[ 6] = output[1] - output[6];	1297 step[ 6] = output[1] - output[6];

1314 step[ 7] = output[0] - output[7];	1298 step[ 7] = output[0] - output[7];

1315	1299

1316 temp1 = output[8] * C7;	1300 temp1 = output[8] * C7;

1317 temp2 = output[15] * C9;	1301 temp2 = output[15] * C9;

1318 step[ 8] = (temp1 - temp2 + RIGHT_ROUNDING) >> RIGHT_SHIFT;	1302 step[ 8] = (temp1 - temp2 + RIGHT_ROUNDING) >> RIGHT_SHIFT;

1319	1303

1320 temp1 = output[9] * C11;	1304 temp1 = output[9] * C11;

1321 temp2 = output[14] * C5;	1305 temp2 = output[14] * C5;

1322 step[ 9] = (temp1 + temp2 + RIGHT_ROUNDING) >> RIGHT_SHIFT;	1306 step[ 9] = (temp1 + temp2 + RIGHT_ROUNDING) >> RIGHT_SHIFT;

1323	1307

1324 temp1 = output[10] * C3;	1308 temp1 = output[10] * C3;

1325 temp2 = output[13] * C13;	1309 temp2 = output[13] * C13;

1326 step[10] = (temp1 - temp2 + RIGHT_ROUNDING) >> RIGHT_SHIFT;	1310 step[10] = (temp1 - temp2 + RIGHT_ROUNDING) >> RIGHT_SHIFT;

1327	1311

1328 temp1 = output[11] * C15;	1312 temp1 = output[11] * C15;

1329 temp2 = output[12] * C1;	1313 temp2 = output[12] * C1;

1330 step[11] = (temp1 + temp2 + RIGHT_ROUNDING) >> RIGHT_SHIFT;	1314 step[11] = (temp1 + temp2 + RIGHT_ROUNDING) >> RIGHT_SHIFT;

1331	1315

1332 temp1 = output[11] * C1;	1316 temp1 = output[11] * C1;

1333 temp2 = output[12] * C15;	1317 temp2 = output[12] * C15;

1334 step[12] = (temp2 - temp1 + RIGHT_ROUNDING) >> RIGHT_SHIFT;	1318 step[12] = (temp2 - temp1 + RIGHT_ROUNDING) >> RIGHT_SHIFT;

1335	1319

1336 temp1 = output[10] * C13;	1320 temp1 = output[10] * C13;

1337 temp2 = output[13] * C3;	1321 temp2 = output[13] * C3;

1338 step[13] = (temp1 + temp2 + RIGHT_ROUNDING) >> RIGHT_SHIFT;	1322 step[13] = (temp1 + temp2 + RIGHT_ROUNDING) >> RIGHT_SHIFT;

1339	1323

1340 temp1 = output[9] * C5;	1324 temp1 = output[9] * C5;

1341 temp2 = output[14] * C11;	1325 temp2 = output[14] * C11;

1342 step[14] = (temp2 - temp1 + RIGHT_ROUNDING) >> RIGHT_SHIFT;	1326 step[14] = (temp2 - temp1 + RIGHT_ROUNDING) >> RIGHT_SHIFT;

1343	1327

1344 temp1 = output[8] * C9;	1328 temp1 = output[8] * C9;

1345 temp2 = output[15] * C7;	1329 temp2 = output[15] * C7;

1346 step[15] = (temp1 + temp2 + RIGHT_ROUNDING) >> RIGHT_SHIFT;	1330 step[15] = (temp1 + temp2 + RIGHT_ROUNDING) >> RIGHT_SHIFT;

1347	1331

1348 // step 5	1332 // step 5

1349 output[0] = (step[0] + step[15] + last_rounding) >> last_shift_bits;	1333 output[0] = (step[0] + step[15] + last_rounding) >> last_shift_bits;

1350 output[1] = (step[1] + step[14] + last_rounding) >> last_shift_bits;	1334 output[1] = (step[1] + step[14] + last_rounding) >> last_shift_bits;

1351 output[2] = (step[2] + step[13] + last_rounding) >> last_shift_bits;	1335 output[2] = (step[2] + step[13] + last_rounding) >> last_shift_bits;

1352 output[3] = (step[3] + step[12] + last_rounding) >> last_shift_bits;	1336 output[3] = (step[3] + step[12] + last_rounding) >> last_shift_bits;

1353 output[4] = (step[4] + step[11] + last_rounding) >> last_shift_bits;	1337 output[4] = (step[4] + step[11] + last_rounding) >> last_shift_bits;

1354 output[5] = (step[5] + step[10] + last_rounding) >> last_shift_bits;	1338 output[5] = (step[5] + step[10] + last_rounding) >> last_shift_bits;

1355 output[6] = (step[6] + step[ 9] + last_rounding) >> last_shift_bits;	1339 output[6] = (step[6] + step[ 9] + last_rounding) >> last_shift_bits;

1356 output[7] = (step[7] + step[ 8] + last_rounding) >> last_shift_bits;	1340 output[7] = (step[7] + step[ 8] + last_rounding) >> last_shift_bits;

1357	1341

1358 output[15] = (step[0] - step[15] + last_rounding) >> last_shift_bits;	1342 output[15] = (step[0] - step[15] + last_rounding) >> last_shift_bits;

1359 output[14] = (step[1] - step[14] + last_rounding) >> last_shift_bits;	1343 output[14] = (step[1] - step[14] + last_rounding) >> last_shift_bits;

1360 output[13] = (step[2] - step[13] + last_rounding) >> last_shift_bits;	1344 output[13] = (step[2] - step[13] + last_rounding) >> last_shift_bits;

1361 output[12] = (step[3] - step[12] + last_rounding) >> last_shift_bits;	1345 output[12] = (step[3] - step[12] + last_rounding) >> last_shift_bits;

1362 output[11] = (step[4] - step[11] + last_rounding) >> last_shift_bits;	1346 output[11] = (step[4] - step[11] + last_rounding) >> last_shift_bits;

1363 output[10] = (step[5] - step[10] + last_rounding) >> last_shift_bits;	1347 output[10] = (step[5] - step[10] + last_rounding) >> last_shift_bits;

1364 output[9] = (step[6] - step[ 9] + last_rounding) >> last_shift_bits;	1348 output[9] = (step[6] - step[ 9] + last_rounding) >> last_shift_bits;

1365 output[8] = (step[7] - step[ 8] + last_rounding) >> last_shift_bits;	1349 output[8] = (step[7] - step[ 8] + last_rounding) >> last_shift_bits;

1366 }	1350 }

1367	1351

1368 void vp9_short_idct16x16_c(int16_t input, int16_t output, int pitch) {	1352 void vp9_short_idct16x16_c(int16_t input, int16_t output, int pitch) {

1369 int16_t out[16 * 16];	1353 int16_t out[16 * 16];

1370 int16_t *outptr = &out[0];	1354 int16_t *outptr = &out[0];

1371 const int short_pitch = pitch >> 1;	1355 const int short_pitch = pitch >> 1;

1372 int i, j;	1356 int i, j;

1373 int16_t temp_in[16], temp_out[16];	1357 int16_t temp_in[16], temp_out[16];

1374	1358

1375 // First transform rows	1359 // First transform rows

1376 for (i = 0; i < 16; ++i) {	1360 for (i = 0; i < 16; ++i) {

1377 butterfly_16x16_idct_1d(input, outptr, 0);	1361 butterfly_16x16_idct_1d(input, outptr, 0);

1378 input += short_pitch;	1362 input += short_pitch;

1379 outptr += 16;	1363 outptr += 16;

1380 }	1364 }

1381	1365

1382 // Then transform columns	1366 // Then transform columns

1383 for (i = 0; i < 16; ++i) {	1367 for (i = 0; i < 16; ++i) {

1384 for (j = 0; j < 16; ++j)	1368 for (j = 0; j < 16; ++j)

1385 temp_in[j] = out[j * 16 + i];	1369 temp_in[j] = out[j * 16 + i];

1386 butterfly_16x16_idct_1d(temp_in, temp_out, 3);	1370 butterfly_16x16_idct_1d(temp_in, temp_out, 3);

1387 for (j = 0; j < 16; ++j)	1371 for (j = 0; j < 16; ++j)

1388 output[j * 16 + i] = temp_out[j];	1372 output[j * 16 + i] = temp_out[j];

1389 }	1373 }

1390 }	1374 }

1391	1375

1392 /* The following function is called when we know the maximum number of non-zero	1376 /* The following function is called when we know the maximum number of non-zero

1393 * dct coefficients is less or equal 10.	1377 * dct coefficients is less or equal 10.

1394 */	1378 */

1395 static void butterfly_16x16_idct10_1d(int16_t input[16], int16_t output[16],	1379 static void butterfly_16x16_idct10_1d(int16_t input[16], int16_t output[16],

1396 int last_shift_bits) {	1380 int last_shift_bits) {

1397 int16_t step[16] = {0};	1381 int16_t step[16] = {0};

(...skipping 143 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1541 butterfly_16x16_idct10_1d(temp_in, temp_out, 3);	1525 butterfly_16x16_idct10_1d(temp_in, temp_out, 3);

1542 for (j = 0; j < 16; ++j)	1526 for (j = 0; j < 16; ++j)

1543 output[j*16 + i] = temp_out[j];	1527 output[j*16 + i] = temp_out[j];

1544 }	1528 }

1545 }	1529 }

1546 #undef INITIAL_SHIFT	1530 #undef INITIAL_SHIFT

1547 #undef INITIAL_ROUNDING	1531 #undef INITIAL_ROUNDING

1548 #undef RIGHT_SHIFT	1532 #undef RIGHT_SHIFT

1549 #undef RIGHT_ROUNDING	1533 #undef RIGHT_ROUNDING

1550 #endif	1534 #endif

	1535

	1536 #if !CONFIG_DWTDCTHYBRID

	1537 #define DownshiftMultiplyBy2(x) x * 2

	1538 #define DownshiftMultiply(x) x

	1539

	1540 static void idct16(double input, double output, int stride) {

	1541 static const double C1 = 0.995184726672197;

	1542 static const double C2 = 0.98078528040323;

	1543 static const double C3 = 0.956940335732209;

	1544 static const double C4 = 0.923879532511287;

	1545 static const double C5 = 0.881921264348355;

	1546 static const double C6 = 0.831469612302545;

	1547 static const double C7 = 0.773010453362737;

	1548 static const double C8 = 0.707106781186548;

	1549 static const double C9 = 0.634393284163646;

	1550 static const double C10 = 0.555570233019602;

	1551 static const double C11 = 0.471396736825998;

	1552 static const double C12 = 0.38268343236509;

	1553 static const double C13 = 0.290284677254462;

	1554 static const double C14 = 0.195090322016128;

	1555 static const double C15 = 0.098017140329561;

	1556

	1557 double step[16];

	1558 double intermediate[16];

	1559 double temp1, temp2;

	1560

	1561 // step 1 and 2

	1562 step[ 0] = input[stride0] + input[stride8];

	1563 step[ 1] = input[stride0] - input[stride8];

	1564

	1565 temp1 = input[stride4]C12;

	1566 temp2 = input[stride12]C4;

	1567

	1568 temp1 -= temp2;

	1569 temp1 = DownshiftMultiply(temp1);

	1570 temp1 *= C8;

	1571

	1572 step[ 2] = DownshiftMultiplyBy2(temp1);

	1573

	1574 temp1 = input[stride4]C4;

	1575 temp2 = input[stride12]C12;

	1576 temp1 += temp2;

	1577 temp1 = DownshiftMultiply(temp1);

	1578 temp1 *= C8;

	1579 step[ 3] = DownshiftMultiplyBy2(temp1);

	1580

	1581 temp1 = input[stride2]C8;

	1582 temp1 = DownshiftMultiplyBy2(temp1);

	1583 temp2 = input[stride6] + input[stride10];

	1584

	1585 step[ 4] = temp1 + temp2;

	1586 step[ 5] = temp1 - temp2;

	1587

	1588 temp1 = input[stride14]C8;

	1589 temp1 = DownshiftMultiplyBy2(temp1);

	1590 temp2 = input[stride6] - input[stride10];

	1591

	1592 step[ 6] = temp2 - temp1;

	1593 step[ 7] = temp2 + temp1;

	1594

	1595 // for odd input

	1596 temp1 = input[stride3]C12;

	1597 temp2 = input[stride13]C4;

	1598 temp1 += temp2;

	1599 temp1 = DownshiftMultiply(temp1);

	1600 temp1 *= C8;

	1601 intermediate[ 8] = DownshiftMultiplyBy2(temp1);

	1602

	1603 temp1 = input[stride3]C4;

	1604 temp2 = input[stride13]C12;

	1605 temp2 -= temp1;

	1606 temp2 = DownshiftMultiply(temp2);

	1607 temp2 *= C8;

	1608 intermediate[ 9] = DownshiftMultiplyBy2(temp2);

	1609

	1610 intermediate[10] = DownshiftMultiplyBy2(input[stride9]C8);

	1611 intermediate[11] = input[stride15] - input[stride1];

	1612 intermediate[12] = input[stride15] + input[stride1];

	1613 intermediate[13] = DownshiftMultiplyBy2((input[stride7]C8));

	1614

	1615 temp1 = input[stride11]C12;

	1616 temp2 = input[stride5]C4;

	1617 temp2 -= temp1;

	1618 temp2 = DownshiftMultiply(temp2);

	1619 temp2 *= C8;

	1620 intermediate[14] = DownshiftMultiplyBy2(temp2);

	1621

	1622 temp1 = input[stride11]C4;

	1623 temp2 = input[stride5]C12;

	1624 temp1 += temp2;

	1625 temp1 = DownshiftMultiply(temp1);

	1626 temp1 *= C8;

	1627 intermediate[15] = DownshiftMultiplyBy2(temp1);

	1628

	1629 step[ 8] = intermediate[ 8] + intermediate[14];

	1630 step[ 9] = intermediate[ 9] + intermediate[15];

	1631 step[10] = intermediate[10] + intermediate[11];

	1632 step[11] = intermediate[10] - intermediate[11];

	1633 step[12] = intermediate[12] + intermediate[13];

	1634 step[13] = intermediate[12] - intermediate[13];

	1635 step[14] = intermediate[ 8] - intermediate[14];

	1636 step[15] = intermediate[ 9] - intermediate[15];

	1637

	1638 // step 3

	1639 output[stride*0] = step[ 0] + step[ 3];

	1640 output[stride*1] = step[ 1] + step[ 2];

	1641 output[stride*2] = step[ 1] - step[ 2];

	1642 output[stride*3] = step[ 0] - step[ 3];

	1643

	1644 temp1 = step[ 4]*C14;

	1645 temp2 = step[ 7]*C2;

	1646 temp1 -= temp2;

	1647 output[stride*4] = DownshiftMultiply(temp1);

	1648

	1649 temp1 = step[ 4]*C2;

	1650 temp2 = step[ 7]*C14;

	1651 temp1 += temp2;

	1652 output[stride*7] = DownshiftMultiply(temp1);

	1653

	1654 temp1 = step[ 5]*C10;

	1655 temp2 = step[ 6]*C6;

	1656 temp1 -= temp2;

	1657 output[stride*5] = DownshiftMultiply(temp1);

	1658

	1659 temp1 = step[ 5]*C6;

	1660 temp2 = step[ 6]*C10;

	1661 temp1 += temp2;

	1662 output[stride*6] = DownshiftMultiply(temp1);

	1663

	1664 output[stride*8] = step[ 8] + step[11];

	1665 output[stride*9] = step[ 9] + step[10];

	1666 output[stride*10] = step[ 9] - step[10];

	1667 output[stride*11] = step[ 8] - step[11];

	1668 output[stride*12] = step[12] + step[15];

	1669 output[stride*13] = step[13] + step[14];

	1670 output[stride*14] = step[13] - step[14];

	1671 output[stride*15] = step[12] - step[15];

	1672

	1673 // output 4

	1674 step[ 0] = output[stride0] + output[stride7];

	1675 step[ 1] = output[stride1] + output[stride6];

	1676 step[ 2] = output[stride2] + output[stride5];

	1677 step[ 3] = output[stride3] + output[stride4];

	1678 step[ 4] = output[stride3] - output[stride4];

	1679 step[ 5] = output[stride2] - output[stride5];

	1680 step[ 6] = output[stride1] - output[stride6];

	1681 step[ 7] = output[stride0] - output[stride7];

	1682

	1683 temp1 = output[stride8]C7;

	1684 temp2 = output[stride15]C9;

	1685 temp1 -= temp2;

	1686 step[ 8] = DownshiftMultiply(temp1);

	1687

	1688 temp1 = output[stride9]C11;

	1689 temp2 = output[stride14]C5;

	1690 temp1 += temp2;

	1691 step[ 9] = DownshiftMultiply(temp1);

	1692

	1693 temp1 = output[stride10]C3;

	1694 temp2 = output[stride13]C13;

	1695 temp1 -= temp2;

	1696 step[10] = DownshiftMultiply(temp1);

	1697

	1698 temp1 = output[stride11]C15;

	1699 temp2 = output[stride12]C1;

	1700 temp1 += temp2;

	1701 step[11] = DownshiftMultiply(temp1);

	1702

	1703 temp1 = output[stride11]C1;

	1704 temp2 = output[stride12]C15;

	1705 temp2 -= temp1;

	1706 step[12] = DownshiftMultiply(temp2);

	1707

	1708 temp1 = output[stride10]C13;

	1709 temp2 = output[stride13]C3;

	1710 temp1 += temp2;

	1711 step[13] = DownshiftMultiply(temp1);

	1712

	1713 temp1 = output[stride9]C5;

	1714 temp2 = output[stride14]C11;

	1715 temp2 -= temp1;

	1716 step[14] = DownshiftMultiply(temp2);

	1717

	1718 temp1 = output[stride8]C9;

	1719 temp2 = output[stride15]C7;

	1720 temp1 += temp2;

	1721 step[15] = DownshiftMultiply(temp1);

	1722

	1723 // step 5

	1724 output[stride*0] = step[0] + step[15];

	1725 output[stride*1] = step[1] + step[14];

	1726 output[stride*2] = step[2] + step[13];

	1727 output[stride*3] = step[3] + step[12];

	1728 output[stride*4] = step[4] + step[11];

	1729 output[stride*5] = step[5] + step[10];

	1730 output[stride*6] = step[6] + step[ 9];

	1731 output[stride*7] = step[7] + step[ 8];

	1732

	1733 output[stride*15] = step[0] - step[15];

	1734 output[stride*14] = step[1] - step[14];

	1735 output[stride*13] = step[2] - step[13];

	1736 output[stride*12] = step[3] - step[12];

	1737 output[stride*11] = step[4] - step[11];

	1738 output[stride*10] = step[5] - step[10];

	1739 output[stride*9] = step[6] - step[ 9];

	1740 output[stride*8] = step[7] - step[ 8];

	1741 }

	1742

	1743 static void butterfly_32_idct_1d(double input, double output, int stride) {

	1744 static const double C1 = 0.998795456205; // cos(pi * 1 / 64)

	1745 static const double C3 = 0.989176509965; // cos(pi * 3 / 64)

	1746 static const double C5 = 0.970031253195; // cos(pi * 5 / 64)

	1747 static const double C7 = 0.941544065183; // cos(pi * 7 / 64)

	1748 static const double C9 = 0.903989293123; // cos(pi * 9 / 64)

	1749 static const double C11 = 0.857728610000; // cos(pi * 11 / 64)

	1750 static const double C13 = 0.803207531481; // cos(pi * 13 / 64)

	1751 static const double C15 = 0.740951125355; // cos(pi * 15 / 64)

	1752 static const double C16 = 0.707106781187; // cos(pi * 16 / 64)

	1753 static const double C17 = 0.671558954847; // cos(pi * 17 / 64)

	1754 static const double C19 = 0.595699304492; // cos(pi * 19 / 64)

	1755 static const double C21 = 0.514102744193; // cos(pi * 21 / 64)

	1756 static const double C23 = 0.427555093430; // cos(pi * 23 / 64)

	1757 static const double C25 = 0.336889853392; // cos(pi * 25 / 64)

	1758 static const double C27 = 0.242980179903; // cos(pi * 27 / 64)

	1759 static const double C29 = 0.146730474455; // cos(pi * 29 / 64)

	1760 static const double C31 = 0.049067674327; // cos(pi * 31 / 64)

	1761

	1762 double step1[32];

	1763 double step2[32];

	1764

	1765 step1[ 0] = input[stride*0];

	1766 step1[ 1] = input[stride*2];

	1767 step1[ 2] = input[stride*4];

	1768 step1[ 3] = input[stride*6];

	1769 step1[ 4] = input[stride*8];

	1770 step1[ 5] = input[stride*10];

	1771 step1[ 6] = input[stride*12];

	1772 step1[ 7] = input[stride*14];

	1773 step1[ 8] = input[stride*16];

	1774 step1[ 9] = input[stride*18];

	1775 step1[10] = input[stride*20];

	1776 step1[11] = input[stride*22];

	1777 step1[12] = input[stride*24];

	1778 step1[13] = input[stride*26];

	1779 step1[14] = input[stride*28];

	1780 step1[15] = input[stride*30];

	1781

	1782 step1[16] = DownshiftMultiplyBy2(input[stride1]C16);

	1783 step1[17] = (input[stride3] + input[stride1]);

	1784 step1[18] = (input[stride5] + input[stride3]);

	1785 step1[19] = (input[stride7] + input[stride5]);

	1786 step1[20] = (input[stride9] + input[stride7]);

	1787 step1[21] = (input[stride11] + input[stride9]);

	1788 step1[22] = (input[stride13] + input[stride11]);

	1789 step1[23] = (input[stride15] + input[stride13]);

	1790 step1[24] = (input[stride17] + input[stride15]);

	1791 step1[25] = (input[stride19] + input[stride17]);

	1792 step1[26] = (input[stride21] + input[stride19]);

	1793 step1[27] = (input[stride23] + input[stride21]);

	1794 step1[28] = (input[stride25] + input[stride23]);

	1795 step1[29] = (input[stride27] + input[stride25]);

	1796 step1[30] = (input[stride29] + input[stride27]);

	1797 step1[31] = (input[stride31] + input[stride29]);

	1798

	1799 idct16(step1, step2, 1);

	1800 idct16(step1 + 16, step2 + 16, 1);

	1801

	1802 step2[16] = DownshiftMultiply(step2[16] / (2*C1));

	1803 step2[17] = DownshiftMultiply(step2[17] / (2*C3));

	1804 step2[18] = DownshiftMultiply(step2[18] / (2*C5));

	1805 step2[19] = DownshiftMultiply(step2[19] / (2*C7));

	1806 step2[20] = DownshiftMultiply(step2[20] / (2*C9));

	1807 step2[21] = DownshiftMultiply(step2[21] / (2*C11));

	1808 step2[22] = DownshiftMultiply(step2[22] / (2*C13));

	1809 step2[23] = DownshiftMultiply(step2[23] / (2*C15));

	1810 step2[24] = DownshiftMultiply(step2[24] / (2*C17));

	1811 step2[25] = DownshiftMultiply(step2[25] / (2*C19));

	1812 step2[26] = DownshiftMultiply(step2[26] / (2*C21));

	1813 step2[27] = DownshiftMultiply(step2[27] / (2*C23));

	1814 step2[28] = DownshiftMultiply(step2[28] / (2*C25));

	1815 step2[29] = DownshiftMultiply(step2[29] / (2*C27));

	1816 step2[30] = DownshiftMultiply(step2[30] / (2*C29));

	1817 step2[31] = DownshiftMultiply(step2[31] / (2*C31));

	1818

	1819 output[stride* 0] = step2[ 0] + step2[16];

	1820 output[stride* 1] = step2[ 1] + step2[17];

	1821 output[stride* 2] = step2[ 2] + step2[18];

	1822 output[stride* 3] = step2[ 3] + step2[19];

	1823 output[stride* 4] = step2[ 4] + step2[20];

	1824 output[stride* 5] = step2[ 5] + step2[21];

	1825 output[stride* 6] = step2[ 6] + step2[22];

	1826 output[stride* 7] = step2[ 7] + step2[23];

	1827 output[stride* 8] = step2[ 8] + step2[24];

	1828 output[stride* 9] = step2[ 9] + step2[25];

	1829 output[stride*10] = step2[10] + step2[26];

	1830 output[stride*11] = step2[11] + step2[27];

	1831 output[stride*12] = step2[12] + step2[28];

	1832 output[stride*13] = step2[13] + step2[29];

	1833 output[stride*14] = step2[14] + step2[30];

	1834 output[stride*15] = step2[15] + step2[31];

	1835 output[stride*16] = step2[15] - step2[(31 - 0)];

	1836 output[stride*17] = step2[14] - step2[(31 - 1)];

	1837 output[stride*18] = step2[13] - step2[(31 - 2)];

	1838 output[stride*19] = step2[12] - step2[(31 - 3)];

	1839 output[stride*20] = step2[11] - step2[(31 - 4)];

	1840 output[stride*21] = step2[10] - step2[(31 - 5)];

	1841 output[stride*22] = step2[ 9] - step2[(31 - 6)];

	1842 output[stride*23] = step2[ 8] - step2[(31 - 7)];

	1843 output[stride*24] = step2[ 7] - step2[(31 - 8)];

	1844 output[stride*25] = step2[ 6] - step2[(31 - 9)];

	1845 output[stride*26] = step2[ 5] - step2[(31 - 10)];

	1846 output[stride*27] = step2[ 4] - step2[(31 - 11)];

	1847 output[stride*28] = step2[ 3] - step2[(31 - 12)];

	1848 output[stride*29] = step2[ 2] - step2[(31 - 13)];

	1849 output[stride*30] = step2[ 1] - step2[(31 - 14)];

	1850 output[stride*31] = step2[ 0] - step2[(31 - 15)];

	1851 }

	1852

	1853 void vp9_short_idct32x32_c(int16_t input, int16_t output, int pitch) {

	1854 vp9_clear_system_state(); // Make it simd safe : __asm emms;

	1855 {

	1856 double out[3232], out2[3232];

	1857 const int short_pitch = pitch >> 1;

	1858 int i, j;

	1859 // First transform rows

	1860 for (i = 0; i < 32; ++i) {

	1861 double temp_in[32], temp_out[32];

	1862 for (j = 0; j < 32; ++j)

	1863 temp_in[j] = input[j + i*short_pitch];

	1864 butterfly_32_idct_1d(temp_in, temp_out, 1);

	1865 for (j = 0; j < 32; ++j)

	1866 out[j + i*32] = temp_out[j];

	1867 }

	1868 // Then transform columns

	1869 for (i = 0; i < 32; ++i) {

	1870 double temp_in[32], temp_out[32];

	1871 for (j = 0; j < 32; ++j)

	1872 temp_in[j] = out[j*32 + i];

	1873 butterfly_32_idct_1d(temp_in, temp_out, 1);

	1874 for (j = 0; j < 32; ++j)

	1875 out2[j*32 + i] = temp_out[j];

	1876 }

	1877 for (i = 0; i < 32*32; ++i)

	1878 output[i] = round(out2[i]/128);

	1879 }

	1880 vp9_clear_system_state(); // Make it simd safe : __asm emms;

	1881 }

	1882

	1883 #else // !CONFIG_DWTDCTHYBRID

	1884

	1885 #if DWT_TYPE == 53

	1886

	1887 // Note: block length must be even for this implementation

	1888 static void synthesis_53_row(int length, int16_t lowpass, int16_t highpass,

	1889 int16_t *x) {

	1890 int16_t r, a, b;

	1891 int n;

	1892

	1893 n = length >> 1;

	1894 b = highpass;

	1895 a = lowpass;

	1896 r = *highpass;

	1897 while (n--) {

	1898 a++ -= (r + (b) + 1) >> 1;

	1899 r = *b++;

	1900 }

	1901

	1902 n = length >> 1;

	1903 b = highpass;

	1904 a = lowpass;

	1905 while (--n) {

	1906 x++ = ((r = a++) + 1) >> 1;

	1907 x++ = b++ + ((r + (*a) + 2) >> 2);

	1908 }

	1909 x++ = ((r = a) + 1) >> 1;

	1910 x++ = b + ((r + 1) >> 1);

	1911 }

	1912

	1913 static void synthesis_53_col(int length, int16_t lowpass, int16_t highpass,

	1914 int16_t *x) {

	1915 int16_t r, a, b;

	1916 int n;

	1917

	1918 n = length >> 1;

	1919 b = highpass;

	1920 a = lowpass;

	1921 r = *highpass;

	1922 while (n--) {

	1923 a++ -= (r + (b) + 1) >> 1;

	1924 r = *b++;

	1925 }

	1926

	1927 n = length >> 1;

	1928 b = highpass;

	1929 a = lowpass;

	1930 while (--n) {

	1931 r = *a++;

	1932 *x++ = r;

	1933 x++ = ((b++) << 1) + ((r + (*a) + 1) >> 1);

	1934 }

	1935 x++ = a;

	1936 x++ = ((b) << 1) + *a;

	1937 }

	1938

	1939 static void dyadic_synthesize_53(int levels, int width, int height, int16_t *c,

	1940 int pitch_c, int16_t *x, int pitch_x) {

	1941 int th[16], tw[16], lv, i, j, nh, nw, hh = height, hw = width;

	1942 short buffer[2 * DWT_MAX_LENGTH];

	1943

	1944 th[0] = hh;

	1945 tw[0] = hw;

	1946 for (i = 1; i <= levels; i++) {

	1947 th[i] = (th[i - 1] + 1) >> 1;

	1948 tw[i] = (tw[i - 1] + 1) >> 1;

	1949 }

	1950 for (lv = levels - 1; lv >= 0; lv--) {

	1951 nh = th[lv];

	1952 nw = tw[lv];

	1953 hh = th[lv + 1];

	1954 hw = tw[lv + 1];

	1955 if ((nh < 2) \|\| (nw < 2)) continue;

	1956 for (j = 0; j < nw; j++) {

	1957 for (i = 0; i < nh; i++)

	1958 buffer[i] = c[i * pitch_c + j];

	1959 synthesis_53_col(nh, buffer, buffer + hh, buffer + nh);

	1960 for (i = 0; i < nh; i++)

	1961 c[i * pitch_c + j] = buffer[i + nh];

	1962 }

	1963 for (i = 0; i < nh; i++) {

	1964 memcpy(buffer, &c[i * pitch_c], nw * sizeof(*buffer));

	1965 synthesis_53_row(nw, buffer, buffer + hw, &c[i * pitch_c]);

	1966 }

	1967 }

	1968 for (i = 0; i < height; i++) {

	1969 for (j = 0; j < width; j++) {

	1970 x[i * pitch_x + j] = c[i * pitch_c + j] >= 0 ?

	1971 ((c[i * pitch_c + j] + DWT_PRECISION_RND) >> DWT_PRECISION_BITS) :

	1972 -((-c[i * pitch_c + j] + DWT_PRECISION_RND) >> DWT_PRECISION_BITS);

	1973 }

	1974 }

	1975 }

	1976

	1977 #elif DWT_TYPE == 26

	1978

	1979 // Note: block length must be even for this implementation

	1980 static void synthesis_26_row(int length, int16_t lowpass, int16_t highpass,

	1981 int16_t *x) {

	1982 int16_t r, s, a, b;

	1983 int i, n = length >> 1;

	1984

	1985 if (n >= 4) {

	1986 a = lowpass;

	1987 b = highpass;

	1988 r = *lowpass;

	1989 while (--n) {

	1990 *b++ += (r - a[1] + 4) >> 3;

	1991 r = *a++;

	1992 }

	1993 b += (r - a + 4) >> 3;

	1994 }

	1995 a = lowpass;

	1996 b = highpass;

	1997 for (i = length >> 1; i; i--) {

	1998 s = *b++;

	1999 r = *a++;

	2000 *x++ = (r + s + 1) >> 1;

	2001 *x++ = (r - s + 1) >> 1;

	2002 }

	2003 }

	2004

	2005 static void synthesis_26_col(int length, int16_t lowpass, int16_t highpass,

	2006 int16_t *x) {

	2007 int16_t r, s, a, b;

	2008 int i, n = length >> 1;

	2009

	2010 if (n >= 4) {

	2011 a = lowpass;

	2012 b = highpass;

	2013 r = *lowpass;

	2014 while (--n) {

	2015 *b++ += (r - a[1] + 4) >> 3;

	2016 r = *a++;

	2017 }

	2018 b += (r - a + 4) >> 3;

	2019 }

	2020 a = lowpass;

	2021 b = highpass;

	2022 for (i = length >> 1; i; i--) {

	2023 s = *b++;

	2024 r = *a++;

	2025 *x++ = r + s;

	2026 *x++ = r - s;

	2027 }

	2028 }

	2029

	2030 static void dyadic_synthesize_26(int levels, int width, int height, int16_t *c,

	2031 int pitch_c, int16_t *x, int pitch_x) {

	2032 int th[16], tw[16], lv, i, j, nh, nw, hh = height, hw = width;

	2033 int16_t buffer[2 * DWT_MAX_LENGTH];

	2034

	2035 th[0] = hh;

	2036 tw[0] = hw;

	2037 for (i = 1; i <= levels; i++) {

	2038 th[i] = (th[i - 1] + 1) >> 1;

	2039 tw[i] = (tw[i - 1] + 1) >> 1;

	2040 }

	2041 for (lv = levels - 1; lv >= 0; lv--) {

	2042 nh = th[lv];

	2043 nw = tw[lv];

	2044 hh = th[lv + 1];

	2045 hw = tw[lv + 1];

	2046 if ((nh < 2) \|\| (nw < 2)) continue;

	2047 for (j = 0; j < nw; j++) {

	2048 for (i = 0; i < nh; i++)

	2049 buffer[i] = c[i * pitch_c + j];

	2050 synthesis_26_col(nh, buffer, buffer + hh, buffer + nh);

	2051 for (i = 0; i < nh; i++)

	2052 c[i * pitch_c + j] = buffer[i + nh];

	2053 }

	2054 for (i = 0; i < nh; i++) {

	2055 memcpy(buffer, &c[i * pitch_c], nw * sizeof(*buffer));

	2056 synthesis_26_row(nw, buffer, buffer + hw, &c[i * pitch_c]);

	2057 }

	2058 }

	2059 for (i = 0; i < height; i++) {

	2060 for (j = 0; j < width; j++) {

	2061 x[i * pitch_x + j] = c[i * pitch_c + j] >= 0 ?

	2062 ((c[i * pitch_c + j] + DWT_PRECISION_RND) >> DWT_PRECISION_BITS) :

	2063 -((-c[i * pitch_c + j] + DWT_PRECISION_RND) >> DWT_PRECISION_BITS);

	2064 }

	2065 }

	2066 }

	2067

	2068 #elif DWT_TYPE == 97

	2069

	2070 static void synthesis_97(int length, double lowpass, double highpass,

	2071 double *x) {

	2072 static const double a_predict1 = -1.586134342;

	2073 static const double a_update1 = -0.05298011854;

	2074 static const double a_predict2 = 0.8829110762;

	2075 static const double a_update2 = 0.4435068522;

	2076 static const double s_low = 1.149604398;

	2077 static const double s_high = 1/1.149604398;

	2078 static const double inv_s_low = 1 / s_low;

	2079 static const double inv_s_high = 1 / s_high;

	2080 int i;

	2081 double y[DWT_MAX_LENGTH];

	2082 // Undo pack and scale

	2083 for (i = 0; i < length / 2; i++) {

	2084 y[i * 2] = lowpass[i] * inv_s_low;

	2085 y[i * 2 + 1] = highpass[i] * inv_s_high;

	2086 }

	2087 memcpy(x, y, sizeof(y) length);

	2088 // Undo update 2

	2089 for (i = 2; i < length; i += 2) {

	2090 x[i] -= a_update2 * (x[i-1] + x[i+1]);

	2091 }

	2092 x[0] -= 2 * a_update2 * x[1];

	2093 // Undo predict 2

	2094 for (i = 1; i < length - 2; i += 2) {

	2095 x[i] -= a_predict2 * (x[i - 1] + x[i + 1]);

	2096 }

	2097 x[length - 1] -= 2 * a_predict2 * x[length - 2];

	2098 // Undo update 1

	2099 for (i = 2; i < length; i += 2) {

	2100 x[i] -= a_update1 * (x[i - 1] + x[i + 1]);

	2101 }

	2102 x[0] -= 2 * a_update1 * x[1];

	2103 // Undo predict 1

	2104 for (i = 1; i < length - 2; i += 2) {

	2105 x[i] -= a_predict1 * (x[i - 1] + x[i + 1]);

	2106 }

	2107 x[length - 1] -= 2 * a_predict1 * x[length - 2];

	2108 }

	2109

	2110 static void dyadic_synthesize_97(int levels, int width, int height, int16_t *c,

	2111 int pitch_c, int16_t *x, int pitch_x) {

	2112 int th[16], tw[16], lv, i, j, nh, nw, hh = height, hw = width;

	2113 double buffer[2 * DWT_MAX_LENGTH];

	2114 double y[DWT_MAX_LENGTH * DWT_MAX_LENGTH];

	2115

	2116 th[0] = hh;

	2117 tw[0] = hw;

	2118 for (i = 1; i <= levels; i++) {

	2119 th[i] = (th[i - 1] + 1) >> 1;

	2120 tw[i] = (tw[i - 1] + 1) >> 1;

	2121 }

	2122 for (lv = levels - 1; lv >= 0; lv--) {

	2123 nh = th[lv];

	2124 nw = tw[lv];

	2125 hh = th[lv + 1];

	2126 hw = tw[lv + 1];

	2127 if ((nh < 2) \|\| (nw < 2)) continue;

	2128 for (j = 0; j < nw; j++) {

	2129 for (i = 0; i < nh; i++)

	2130 buffer[i] = c[i * pitch_c + j];

	2131 synthesis_97(nh, buffer, buffer + hh, buffer + nh);

	2132 for (i = 0; i < nh; i++)

	2133 y[i * DWT_MAX_LENGTH + j] = buffer[i + nh];

	2134 }

	2135 for (i = 0; i < nh; i++) {

	2136 memcpy(buffer, &y[i * DWT_MAX_LENGTH], nw * sizeof(*buffer));

	2137 synthesis_97(nw, buffer, buffer + hw, &y[i * DWT_MAX_LENGTH]);

	2138 }

	2139 }

	2140 for (i = 0; i < height; i++)

	2141 for (j = 0; j < width; j++)

	2142 x[i * pitch_x + j] = round(y[i * DWT_MAX_LENGTH + j] /

	2143 (1 << DWT_PRECISION_BITS));

	2144 }

	2145

	2146 #endif // DWT_TYPE

	2147

	2148 // TODO(debargha): Implement scaling differently so as not to have to use the

	2149 // floating point 16x16 dct

	2150 static void butterfly_16x16_idct_1d_f(double input[16], double output[16]) {

	2151 static const double C1 = 0.995184726672197;

	2152 static const double C2 = 0.98078528040323;

	2153 static const double C3 = 0.956940335732209;

	2154 static const double C4 = 0.923879532511287;

	2155 static const double C5 = 0.881921264348355;

	2156 static const double C6 = 0.831469612302545;

	2157 static const double C7 = 0.773010453362737;

	2158 static const double C8 = 0.707106781186548;

	2159 static const double C9 = 0.634393284163646;

	2160 static const double C10 = 0.555570233019602;

	2161 static const double C11 = 0.471396736825998;

	2162 static const double C12 = 0.38268343236509;

	2163 static const double C13 = 0.290284677254462;

	2164 static const double C14 = 0.195090322016128;

	2165 static const double C15 = 0.098017140329561;

	2166

	2167 vp9_clear_system_state(); // Make it simd safe : __asm emms;

	2168 {

	2169 double step[16];

	2170 double intermediate[16];

	2171 double temp1, temp2;

	2172

	2173

	2174 // step 1 and 2

	2175 step[ 0] = input[0] + input[8];

	2176 step[ 1] = input[0] - input[8];

	2177

	2178 temp1 = input[4]*C12;

	2179 temp2 = input[12]*C4;

	2180

	2181 temp1 -= temp2;

	2182 temp1 *= C8;

	2183

	2184 step[ 2] = 2*(temp1);

	2185

	2186 temp1 = input[4]*C4;

	2187 temp2 = input[12]*C12;

	2188 temp1 += temp2;

	2189 temp1 = (temp1);

	2190 temp1 *= C8;

	2191 step[ 3] = 2*(temp1);

	2192

	2193 temp1 = input[2]*C8;

	2194 temp1 = 2*(temp1);

	2195 temp2 = input[6] + input[10];

	2196

	2197 step[ 4] = temp1 + temp2;

	2198 step[ 5] = temp1 - temp2;

	2199

	2200 temp1 = input[14]*C8;

	2201 temp1 = 2*(temp1);

	2202 temp2 = input[6] - input[10];

	2203

	2204 step[ 6] = temp2 - temp1;

	2205 step[ 7] = temp2 + temp1;

	2206

	2207 // for odd input

	2208 temp1 = input[3]*C12;

	2209 temp2 = input[13]*C4;

	2210 temp1 += temp2;

	2211 temp1 = (temp1);

	2212 temp1 *= C8;

	2213 intermediate[ 8] = 2*(temp1);

	2214

	2215 temp1 = input[3]*C4;

	2216 temp2 = input[13]*C12;

	2217 temp2 -= temp1;

	2218 temp2 = (temp2);

	2219 temp2 *= C8;

	2220 intermediate[ 9] = 2*(temp2);

	2221

	2222 intermediate[10] = 2(input[9]C8);

	2223 intermediate[11] = input[15] - input[1];

	2224 intermediate[12] = input[15] + input[1];

	2225 intermediate[13] = 2((input[7]C8));

	2226

	2227 temp1 = input[11]*C12;

	2228 temp2 = input[5]*C4;

	2229 temp2 -= temp1;

	2230 temp2 = (temp2);

	2231 temp2 *= C8;

	2232 intermediate[14] = 2*(temp2);

	2233

	2234 temp1 = input[11]*C4;

	2235 temp2 = input[5]*C12;

	2236 temp1 += temp2;

	2237 temp1 = (temp1);

	2238 temp1 *= C8;

	2239 intermediate[15] = 2*(temp1);

	2240

	2241 step[ 8] = intermediate[ 8] + intermediate[14];

	2242 step[ 9] = intermediate[ 9] + intermediate[15];

	2243 step[10] = intermediate[10] + intermediate[11];

	2244 step[11] = intermediate[10] - intermediate[11];

	2245 step[12] = intermediate[12] + intermediate[13];

	2246 step[13] = intermediate[12] - intermediate[13];

	2247 step[14] = intermediate[ 8] - intermediate[14];

	2248 step[15] = intermediate[ 9] - intermediate[15];

	2249

	2250 // step 3

	2251 output[0] = step[ 0] + step[ 3];

	2252 output[1] = step[ 1] + step[ 2];

	2253 output[2] = step[ 1] - step[ 2];

	2254 output[3] = step[ 0] - step[ 3];

	2255

	2256 temp1 = step[ 4]*C14;

	2257 temp2 = step[ 7]*C2;

	2258 temp1 -= temp2;

	2259 output[4] = (temp1);

	2260

	2261 temp1 = step[ 4]*C2;

	2262 temp2 = step[ 7]*C14;

	2263 temp1 += temp2;

	2264 output[7] = (temp1);

	2265

	2266 temp1 = step[ 5]*C10;

	2267 temp2 = step[ 6]*C6;

	2268 temp1 -= temp2;

	2269 output[5] = (temp1);

	2270

	2271 temp1 = step[ 5]*C6;

	2272 temp2 = step[ 6]*C10;

	2273 temp1 += temp2;

	2274 output[6] = (temp1);

	2275

	2276 output[8] = step[ 8] + step[11];

	2277 output[9] = step[ 9] + step[10];

	2278 output[10] = step[ 9] - step[10];

	2279 output[11] = step[ 8] - step[11];

	2280 output[12] = step[12] + step[15];

	2281 output[13] = step[13] + step[14];

	2282 output[14] = step[13] - step[14];

	2283 output[15] = step[12] - step[15];

	2284

	2285 // output 4

	2286 step[ 0] = output[0] + output[7];

	2287 step[ 1] = output[1] + output[6];

	2288 step[ 2] = output[2] + output[5];

	2289 step[ 3] = output[3] + output[4];

	2290 step[ 4] = output[3] - output[4];

	2291 step[ 5] = output[2] - output[5];

	2292 step[ 6] = output[1] - output[6];

	2293 step[ 7] = output[0] - output[7];

	2294

	2295 temp1 = output[8]*C7;

	2296 temp2 = output[15]*C9;

	2297 temp1 -= temp2;

	2298 step[ 8] = (temp1);

	2299

	2300 temp1 = output[9]*C11;

	2301 temp2 = output[14]*C5;

	2302 temp1 += temp2;

	2303 step[ 9] = (temp1);

	2304

	2305 temp1 = output[10]*C3;

	2306 temp2 = output[13]*C13;

	2307 temp1 -= temp2;

	2308 step[10] = (temp1);

	2309

	2310 temp1 = output[11]*C15;

	2311 temp2 = output[12]*C1;

	2312 temp1 += temp2;

	2313 step[11] = (temp1);

	2314

	2315 temp1 = output[11]*C1;

	2316 temp2 = output[12]*C15;

	2317 temp2 -= temp1;

	2318 step[12] = (temp2);

	2319

	2320 temp1 = output[10]*C13;

	2321 temp2 = output[13]*C3;

	2322 temp1 += temp2;

	2323 step[13] = (temp1);

	2324

	2325 temp1 = output[9]*C5;

	2326 temp2 = output[14]*C11;

	2327 temp2 -= temp1;

	2328 step[14] = (temp2);

	2329

	2330 temp1 = output[8]*C9;

	2331 temp2 = output[15]*C7;

	2332 temp1 += temp2;

	2333 step[15] = (temp1);

	2334

	2335 // step 5

	2336 output[0] = (step[0] + step[15]);

	2337 output[1] = (step[1] + step[14]);

	2338 output[2] = (step[2] + step[13]);

	2339 output[3] = (step[3] + step[12]);

	2340 output[4] = (step[4] + step[11]);

	2341 output[5] = (step[5] + step[10]);

	2342 output[6] = (step[6] + step[ 9]);

	2343 output[7] = (step[7] + step[ 8]);

	2344

	2345 output[15] = (step[0] - step[15]);

	2346 output[14] = (step[1] - step[14]);

	2347 output[13] = (step[2] - step[13]);

	2348 output[12] = (step[3] - step[12]);

	2349 output[11] = (step[4] - step[11]);

	2350 output[10] = (step[5] - step[10]);

	2351 output[9] = (step[6] - step[ 9]);

	2352 output[8] = (step[7] - step[ 8]);

	2353 }

	2354 vp9_clear_system_state(); // Make it simd safe : __asm emms;

	2355 }

	2356

	2357 static void vp9_short_idct16x16_c_f(int16_t input, int16_t output, int pitch,

	2358 int scale) {

	2359 vp9_clear_system_state(); // Make it simd safe : __asm emms;

	2360 {

	2361 double out[1616], out2[1616];

	2362 const int short_pitch = pitch >> 1;

	2363 int i, j;

	2364 // First transform rows

	2365 for (i = 0; i < 16; ++i) {

	2366 double temp_in[16], temp_out[16];

	2367 for (j = 0; j < 16; ++j)

	2368 temp_in[j] = input[j + i*short_pitch];

	2369 butterfly_16x16_idct_1d_f(temp_in, temp_out);

	2370 for (j = 0; j < 16; ++j)

	2371 out[j + i*16] = temp_out[j];

	2372 }

	2373 // Then transform columns

	2374 for (i = 0; i < 16; ++i) {

	2375 double temp_in[16], temp_out[16];

	2376 for (j = 0; j < 16; ++j)

	2377 temp_in[j] = out[j*16 + i];

	2378 butterfly_16x16_idct_1d_f(temp_in, temp_out);

	2379 for (j = 0; j < 16; ++j)

	2380 out2[j*16 + i] = temp_out[j];

	2381 }

	2382 for (i = 0; i < 16*16; ++i)

	2383 output[i] = round(out2[i] / (128 >> scale));

	2384 }

	2385 vp9_clear_system_state(); // Make it simd safe : __asm emms;

	2386 }

	2387

	2388 static void idct8_1d(double *x) {

	2389 int i, j;

	2390 double t[8];

	2391 static const double idctmat[64] = {

	2392 0.35355339059327, 0.49039264020162, 0.46193976625564, 0.41573480615127,

	2393 0.35355339059327, 0.2777851165098, 0.19134171618254, 0.097545161008064,

	2394 0.35355339059327, 0.41573480615127, 0.19134171618254, -0.097545161008064,

	2395 -0.35355339059327, -0.49039264020161, -0.46193976625564, -0.2777851165098,

	2396 0.35355339059327, 0.2777851165098, -0.19134171618254, -0.49039264020162,

	2397 -0.35355339059327, 0.097545161008064, 0.46193976625564, 0.41573480615127,

	2398 0.35355339059327, 0.097545161008063, -0.46193976625564, -0.2777851165098,

	2399 0.35355339059327, 0.41573480615127, -0.19134171618254, -0.49039264020162,

	2400 0.35355339059327, -0.097545161008063, -0.46193976625564, 0.2777851165098,

	2401 0.35355339059327, -0.41573480615127, -0.19134171618255, 0.49039264020162,

	2402 0.35355339059327, -0.2777851165098, -0.19134171618254, 0.49039264020161,

	2403 -0.35355339059327, -0.097545161008064, 0.46193976625564, -0.41573480615127,

	2404 0.35355339059327, -0.41573480615127, 0.19134171618254, 0.097545161008065,

	2405 -0.35355339059327, 0.49039264020162, -0.46193976625564, 0.2777851165098,

	2406 0.35355339059327, -0.49039264020162, 0.46193976625564, -0.41573480615127,

	2407 0.35355339059327, -0.2777851165098, 0.19134171618255, -0.097545161008064

	2408 };

	2409 for (i = 0; i < 8; ++i) {

	2410 t[i] = 0;

	2411 for (j = 0; j < 8; ++j)

	2412 t[i] += idctmat[i * 8 + j] * x[j];

	2413 }

	2414 for (i = 0; i < 8; ++i) {

	2415 x[i] = t[i];

	2416 }

	2417 }

	2418

	2419 static void vp9_short_idct8x8_c_f(int16_t coefs, int16_t block, int pitch,

	2420 int scale) {

	2421 double X[8 * 8], Y[8];

	2422 int i, j;

	2423 int shortpitch = pitch >> 1;

	2424

	2425 vp9_clear_system_state(); // Make it simd safe : __asm emms;

	2426 {

	2427 for (i = 0; i < 8; i++) {

	2428 for (j = 0; j < 8; j++) {

	2429 X[i * 8 + j] = (double)coefs[i * shortpitch + j];

	2430 }

	2431 }

	2432 for (i = 0; i < 8; i++)

	2433 idct8_1d(X + 8 * i);

	2434 for (i = 0; i < 8; i++) {

	2435 for (j = 0; j < 8; ++j)

	2436 Y[j] = X[i + 8 * j];

	2437 idct8_1d(Y);

	2438 for (j = 0; j < 8; ++j)

	2439 X[i + 8 * j] = Y[j];

	2440 }

	2441 for (i = 0; i < 8; i++) {

	2442 for (j = 0; j < 8; j++) {

	2443 block[i * 8 + j] = (int16_t)round(X[i * 8 + j] / (8 >> scale));

	2444 }

	2445 }

	2446 }

	2447 vp9_clear_system_state(); // Make it simd safe : __asm emms;

	2448 }

	2449

	2450 #define multiply_bits(d, n) ((n) < 0 ? (d) >> (n) : (d) << (n))

	2451

	2452 #if DWTDCT_TYPE == DWTDCT16X16_LEAN

	2453

	2454 void vp9_short_idct32x32_c(int16_t input, int16_t output, int pitch) {

	2455 // assume output is a 32x32 buffer

	2456 // Temporary buffer to hold a 16x16 block for 16x16 inverse dct

	2457 int16_t buffer[16 * 16];

	2458 // Temporary buffer to hold a 32x32 block for inverse 32x32 dwt

	2459 int16_t buffer2[32 * 32];

	2460 // Note: pitch is in bytes, short_pitch is in short units

	2461 const int short_pitch = pitch >> 1;

	2462 int i, j;

	2463

	2464 // TODO(debargha): Implement more efficiently by adding output pitch

	2465 // argument to the idct16x16 function

	2466 vp9_short_idct16x16_c_f(input, buffer, pitch,

	2467 1 + DWT_PRECISION_BITS);

	2468 for (i = 0; i < 16; ++i) {

	2469 vpx_memcpy(buffer2 + i * 32, buffer + i * 16, sizeof(buffer2) 16);

	2470 }

	2471 for (i = 0; i < 16; ++i) {

	2472 for (j = 16; j < 32; ++j) {

	2473 buffer2[i * 32 + j] =

	2474 multiply_bits(input[i * short_pitch + j], DWT_PRECISION_BITS - 2);

	2475 }

	2476 }

	2477 for (i = 16; i < 32; ++i) {

	2478 for (j = 0; j < 32; ++j) {

	2479 buffer2[i * 32 + j] =

	2480 multiply_bits(input[i * short_pitch + j], DWT_PRECISION_BITS - 2);

	2481 }

	2482 }

	2483 #if DWT_TYPE == 26

	2484 dyadic_synthesize_26(1, 32, 32, buffer2, 32, output, 32);

	2485 #elif DWT_TYPE == 97

	2486 dyadic_synthesize_97(1, 32, 32, buffer2, 32, output, 32);

	2487 #elif DWT_TYPE == 53

	2488 dyadic_synthesize_53(1, 32, 32, buffer2, 32, output, 32);

	2489 #endif

	2490 }

	2491

	2492 #elif DWTDCT_TYPE == DWTDCT16X16

	2493

	2494 void vp9_short_idct32x32_c(int16_t input, int16_t output, int pitch) {

	2495 // assume output is a 32x32 buffer

	2496 // Temporary buffer to hold a 16x16 block for 16x16 inverse dct

	2497 int16_t buffer[16 * 16];

	2498 // Temporary buffer to hold a 32x32 block for inverse 32x32 dwt

	2499 int16_t buffer2[32 * 32];

	2500 // Note: pitch is in bytes, short_pitch is in short units

	2501 const int short_pitch = pitch >> 1;

	2502 int i, j;

	2503

	2504 // TODO(debargha): Implement more efficiently by adding output pitch

	2505 // argument to the idct16x16 function

	2506 vp9_short_idct16x16_c_f(input, buffer, pitch,

	2507 1 + DWT_PRECISION_BITS);

	2508 for (i = 0; i < 16; ++i) {

	2509 vpx_memcpy(buffer2 + i * 32, buffer + i * 16, sizeof(buffer2) 16);

	2510 }

	2511 vp9_short_idct16x16_c_f(input + 16, buffer, pitch,

	2512 1 + DWT_PRECISION_BITS);

	2513 for (i = 0; i < 16; ++i) {

	2514 vpx_memcpy(buffer2 + i * 32 + 16, buffer + i * 16, sizeof(buffer2) 16);

	2515 }

	2516 vp9_short_idct16x16_c_f(input + 16 * short_pitch, buffer, pitch,

	2517 1 + DWT_PRECISION_BITS);

	2518 for (i = 0; i < 16; ++i) {

	2519 vpx_memcpy(buffer2 + i * 32 + 16 * 32, buffer + i * 16,

	2520 sizeof(buffer2) 16);

	2521 }

	2522 vp9_short_idct16x16_c_f(input + 16 * short_pitch + 16, buffer, pitch,

	2523 1 + DWT_PRECISION_BITS);

	2524 for (i = 0; i < 16; ++i) {

	2525 vpx_memcpy(buffer2 + i * 32 + 16 * 33, buffer + i * 16,

	2526 sizeof(buffer2) 16);

	2527 }

	2528 #if DWT_TYPE == 26

	2529 dyadic_synthesize_26(1, 32, 32, buffer2, 32, output, 32);

	2530 #elif DWT_TYPE == 97

	2531 dyadic_synthesize_97(1, 32, 32, buffer2, 32, output, 32);

	2532 #elif DWT_TYPE == 53

	2533 dyadic_synthesize_53(1, 32, 32, buffer2, 32, output, 32);

	2534 #endif

	2535 }

	2536

	2537 #elif DWTDCT_TYPE == DWTDCT8X8

	2538

	2539 void vp9_short_idct32x32_c(int16_t input, int16_t output, int pitch) {

	2540 // assume output is a 32x32 buffer

	2541 // Temporary buffer to hold a 16x16 block for 16x16 inverse dct

	2542 int16_t buffer[8 * 8];

	2543 // Temporary buffer to hold a 32x32 block for inverse 32x32 dwt

	2544 int16_t buffer2[32 * 32];

	2545 // Note: pitch is in bytes, short_pitch is in short units

	2546 const int short_pitch = pitch >> 1;

	2547 int i, j;

	2548

	2549 // TODO(debargha): Implement more efficiently by adding output pitch

	2550 // argument to the idct16x16 function

	2551 vp9_short_idct8x8_c_f(input, buffer, pitch,

	2552 1 + DWT_PRECISION_BITS);

	2553 for (i = 0; i < 8; ++i) {

	2554 vpx_memcpy(buffer2 + i * 32, buffer + i * 8, sizeof(buffer2) 8);

	2555 }

	2556 vp9_short_idct8x8_c_f(input + 8, buffer, pitch,

	2557 1 + DWT_PRECISION_BITS);

	2558 for (i = 0; i < 8; ++i) {

	2559 vpx_memcpy(buffer2 + i * 32 + 8, buffer + i * 8, sizeof(buffer2) 8);

	2560 }

	2561 vp9_short_idct8x8_c_f(input + 8 * short_pitch, buffer, pitch,

	2562 1 + DWT_PRECISION_BITS);

	2563 for (i = 0; i < 8; ++i) {

	2564 vpx_memcpy(buffer2 + i * 32 + 8 * 32, buffer + i * 8,

	2565 sizeof(buffer2) 8);

	2566 }

	2567 vp9_short_idct8x8_c_f(input + 8 * short_pitch + 8, buffer, pitch,

	2568 1 + DWT_PRECISION_BITS);

	2569 for (i = 0; i < 8; ++i) {

	2570 vpx_memcpy(buffer2 + i * 32 + 8 * 33, buffer + i * 8,

	2571 sizeof(buffer2) 8);

	2572 }

	2573 for (i = 0; i < 16; ++i) {

	2574 for (j = 16; j < 32; ++j) {

	2575 buffer2[i * 32 + j] =

	2576 multiply_bits(input[i * short_pitch + j], DWT_PRECISION_BITS - 2);

	2577 }

	2578 }

	2579 for (i = 16; i < 32; ++i) {

	2580 for (j = 0; j < 32; ++j) {

	2581 buffer2[i * 32 + j] =

	2582 multiply_bits(input[i * short_pitch + j], DWT_PRECISION_BITS - 2);

	2583 }

	2584 }

	2585 #if DWT_TYPE == 26

	2586 dyadic_synthesize_26(2, 32, 32, buffer2, 32, output, 32);

	2587 #elif DWT_TYPE == 97

	2588 dyadic_synthesize_97(2, 32, 32, buffer2, 32, output, 32);

	2589 #elif DWT_TYPE == 53

	2590 dyadic_synthesize_53(2, 32, 32, buffer2, 32, output, 32);

	2591 #endif

	2592 }

	2593

	2594 #endif

	2595

	2596 #if CONFIG_TX64X64

	2597 void vp9_short_idct64x64_c(int16_t input, int16_t output, int pitch) {

	2598 // assume output is a 64x64 buffer

	2599 // Temporary buffer to hold a 16x16 block for 16x16 inverse dct

	2600 int16_t buffer[16 * 16];

	2601 // Temporary buffer to hold a 32x32 block for inverse 32x32 dwt

	2602 int16_t buffer2[64 * 64];

	2603 // Note: pitch is in bytes, short_pitch is in short units

	2604 const int short_pitch = pitch >> 1;

	2605 int i, j;

	2606

	2607 // TODO(debargha): Implement more efficiently by adding output pitch

	2608 // argument to the idct16x16 function

	2609 vp9_short_idct16x16_c_f(input, buffer, pitch,

	2610 2 + DWT_PRECISION_BITS);

	2611 for (i = 0; i < 16; ++i) {

	2612 vpx_memcpy(buffer2 + i * 64, buffer + i * 16, sizeof(buffer2) 16);

	2613 }

	2614 #if DWTDCT_TYPE == DWTDCT16X16_LEAN

	2615 for (i = 0; i < 16; ++i) {

	2616 for (j = 16; j < 64; ++j) {

	2617 buffer2[i * 64 + j] =

	2618 multiply_bits(input[i * short_pitch + j], DWT_PRECISION_BITS - 1);

	2619 }

	2620 }

	2621 for (i = 16; i < 64; ++i) {

	2622 for (j = 0; j < 64; ++j) {

	2623 buffer2[i * 64 + j] =

	2624 multiply_bits(input[i * short_pitch + j], DWT_PRECISION_BITS - 1);

	2625 }

	2626 }

	2627 #elif DWTDCT_TYPE == DWTDCT16X16

	2628 vp9_short_idct16x16_c_f(input + 16, buffer, pitch,

	2629 2 + DWT_PRECISION_BITS);

	2630 for (i = 0; i < 16; ++i) {

	2631 vpx_memcpy(buffer2 + i * 64 + 16, buffer + i * 16, sizeof(buffer2) 16);

	2632 }

	2633 vp9_short_idct16x16_c_f(input + 16 * short_pitch, buffer, pitch,

	2634 2 + DWT_PRECISION_BITS);

	2635 for (i = 0; i < 16; ++i) {

	2636 vpx_memcpy(buffer2 + i * 64 + 16 * 64, buffer + i * 16,

	2637 sizeof(buffer2) 16);

	2638 }

	2639 vp9_short_idct16x16_c_f(input + 16 * short_pitch + 16, buffer, pitch,

	2640 2 + DWT_PRECISION_BITS);

	2641 for (i = 0; i < 16; ++i) {

	2642 vpx_memcpy(buffer2 + i * 64 + 16 * 65, buffer + i * 16,

	2643 sizeof(buffer2) 16);

	2644 }

	2645

	2646 // Copying and scaling highest bands into buffer2

	2647 for (i = 0; i < 32; ++i) {

	2648 for (j = 32; j < 64; ++j) {

	2649 buffer2[i * 64 + j] =

	2650 multiply_bits(input[i * short_pitch + j], DWT_PRECISION_BITS - 1);

	2651 }

	2652 }

	2653 for (i = 32; i < 64; ++i) {

	2654 for (j = 0; j < 64; ++j) {

	2655 buffer2[i * 64 + j] =

	2656 multiply_bits(input[i * short_pitch + j], DWT_PRECISION_BITS - 1);

	2657 }

	2658 }

	2659 #endif // DWTDCT_TYPE

	2660

	2661 #if DWT_TYPE == 26

	2662 dyadic_synthesize_26(2, 64, 64, buffer2, 64, output, 64);

	2663 #elif DWT_TYPE == 97

	2664 dyadic_synthesize_97(2, 64, 64, buffer2, 64, output, 64);

	2665 #elif DWT_TYPE == 53

	2666 dyadic_synthesize_53(2, 64, 64, buffer2, 64, output, 64);

	2667 #endif

	2668 }

	2669 #endif // CONFIG_TX64X64

	2670 #endif // !CONFIG_DWTDCTHYBRID

OLD	NEW

« no previous file with comments | « source/libvpx/vp9/common/vp9_header.h ('k') | source/libvpx/vp9/common/vp9_implicit_segmentation.c » ('j') | no next file with comments »