patched-ffmpeg-mt/libavcodec/ppc/dsputil_altivec.c - Issue 789004: ffmpeg roll of source to mar 9 version...

Side by Side Diff: patched-ffmpeg-mt/libavcodec/ppc/dsputil_altivec.c

Issue 789004: ffmpeg roll of source to mar 9 version... (Closed) Base URL: svn://chrome-svn/chrome/trunk/deps/third_party/ffmpeg/

Patch Set: '' Created 10 years, 9 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

« patched-ffmpeg-mt/libavcodec/mpeg4video_es_bsf.c ('K') | « patched-ffmpeg-mt/libavcodec/ppc/dsputil_altivec.h ('k') | patched-ffmpeg-mt/libavcodec/ppc/dsputil_ppc.c » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

OLD	NEW
1 /*	1 /*

2 * Copyright (c) 2002 Brian Foley	2 * Copyright (c) 2002 Brian Foley

3 * Copyright (c) 2002 Dieter Shirley	3 * Copyright (c) 2002 Dieter Shirley

4 * Copyright (c) 2003-2004 Romain Dolbeau <romain@dolbeau.org>	4 * Copyright (c) 2003-2004 Romain Dolbeau <romain@dolbeau.org>

5 *	5 *

6 * This file is part of FFmpeg.	6 * This file is part of FFmpeg.

7 *	7 *

8 * FFmpeg is free software; you can redistribute it and/or	8 * FFmpeg is free software; you can redistribute it and/or

9 * modify it under the terms of the GNU Lesser General Public	9 * modify it under the terms of the GNU Lesser General Public

10 * License as published by the Free Software Foundation; either	10 * License as published by the Free Software Foundation; either

(...skipping 10 matching lines...) Expand all Loading...
21 */	21 */

22	22

23 #include "config.h"	23 #include "config.h"

24 #if HAVE_ALTIVEC_H	24 #if HAVE_ALTIVEC_H

25 #include <altivec.h>	25 #include <altivec.h>

26 #endif	26 #endif

27 #include "libavcodec/dsputil.h"	27 #include "libavcodec/dsputil.h"

28 #include "dsputil_ppc.h"	28 #include "dsputil_ppc.h"

29 #include "util_altivec.h"	29 #include "util_altivec.h"

30 #include "types_altivec.h"	30 #include "types_altivec.h"

	31 #include "dsputil_altivec.h"

31	32

32 int sad16_x2_altivec(void v, uint8_t pix1, uint8_t *pix2, int line_size, int h )	33 static int sad16_x2_altivec(void v, uint8_t pix1, uint8_t *pix2, int line_size , int h)

33 {	34 {

34 int i;	35 int i;

35 int s;	36 int s;

36 const vector unsigned char zero = (const vector unsigned char)vec_splat_u8(0 );	37 const vector unsigned char zero = (const vector unsigned char)vec_splat_u8(0 );

37 vector unsigned char *tv;	38 vector unsigned char *tv;

38 vector unsigned char pix1v, pix2v, pix2iv, avgv, t5;	39 vector unsigned char pix1v, pix2v, pix2iv, avgv, t5;

39 vector unsigned int sad;	40 vector unsigned int sad;

40 vector signed int sumdiffs;	41 vector signed int sumdiffs;

41	42

42 s = 0;	43 s = 0;

(...skipping 24 matching lines...) Expand all Loading...
67 pix2 += line_size;	68 pix2 += line_size;

68 }	69 }

69 /* Sum up the four partial sums, and put the result into s */	70 /* Sum up the four partial sums, and put the result into s */

70 sumdiffs = vec_sums((vector signed int) sad, (vector signed int) zero);	71 sumdiffs = vec_sums((vector signed int) sad, (vector signed int) zero);

71 sumdiffs = vec_splat(sumdiffs, 3);	72 sumdiffs = vec_splat(sumdiffs, 3);

72 vec_ste(sumdiffs, 0, &s);	73 vec_ste(sumdiffs, 0, &s);

73	74

74 return s;	75 return s;

75 }	76 }

76	77

77 int sad16_y2_altivec(void v, uint8_t pix1, uint8_t *pix2, int line_size, int h )	78 static int sad16_y2_altivec(void v, uint8_t pix1, uint8_t *pix2, int line_size , int h)

78 {	79 {

79 int i;	80 int i;

80 int s;	81 int s;

81 const vector unsigned char zero = (const vector unsigned char)vec_splat_u8(0 );	82 const vector unsigned char zero = (const vector unsigned char)vec_splat_u8(0 );

82 vector unsigned char *tv;	83 vector unsigned char *tv;

83 vector unsigned char pix1v, pix2v, pix3v, avgv, t5;	84 vector unsigned char pix1v, pix2v, pix3v, avgv, t5;

84 vector unsigned int sad;	85 vector unsigned int sad;

85 vector signed int sumdiffs;	86 vector signed int sumdiffs;

86 uint8_t *pix3 = pix2 + line_size;	87 uint8_t *pix3 = pix2 + line_size;

87	88

(...skipping 35 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
123	124

124 }	125 }

125	126

126 /* Sum up the four partial sums, and put the result into s */	127 /* Sum up the four partial sums, and put the result into s */

127 sumdiffs = vec_sums((vector signed int) sad, (vector signed int) zero);	128 sumdiffs = vec_sums((vector signed int) sad, (vector signed int) zero);

128 sumdiffs = vec_splat(sumdiffs, 3);	129 sumdiffs = vec_splat(sumdiffs, 3);

129 vec_ste(sumdiffs, 0, &s);	130 vec_ste(sumdiffs, 0, &s);

130 return s;	131 return s;

131 }	132 }

132	133

133 int sad16_xy2_altivec(void v, uint8_t pix1, uint8_t *pix2, int line_size, int h)	134 static int sad16_xy2_altivec(void v, uint8_t pix1, uint8_t *pix2, int line_siz e, int h)

134 {	135 {

135 int i;	136 int i;

136 int s;	137 int s;

137 uint8_t *pix3 = pix2 + line_size;	138 uint8_t *pix3 = pix2 + line_size;

138 const vector unsigned char zero = (const vector unsigned char)vec_splat_u8(0 );	139 const vector unsigned char zero = (const vector unsigned char)vec_splat_u8(0 );

139 const vector unsigned short two = (const vector unsigned short)vec_splat_u16 (2);	140 const vector unsigned short two = (const vector unsigned short)vec_splat_u16 (2);

140 vector unsigned char *tv, avgv, t5;	141 vector unsigned char *tv, avgv, t5;

141 vector unsigned char pix1v, pix2v, pix3v, pix2iv, pix3iv;	142 vector unsigned char pix1v, pix2v, pix3v, pix2iv, pix3iv;

142 vector unsigned short pix2lv, pix2hv, pix2ilv, pix2ihv;	143 vector unsigned short pix2lv, pix2hv, pix2ilv, pix2ihv;

143 vector unsigned short pix3lv, pix3hv, pix3ilv, pix3ihv;	144 vector unsigned short pix3lv, pix3hv, pix3ilv, pix3ihv;

(...skipping 74 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
218 t2 = t4;	219 t2 = t4;

219 }	220 }

220 /* Sum up the four partial sums, and put the result into s */	221 /* Sum up the four partial sums, and put the result into s */

221 sumdiffs = vec_sums((vector signed int) sad, (vector signed int) zero);	222 sumdiffs = vec_sums((vector signed int) sad, (vector signed int) zero);

222 sumdiffs = vec_splat(sumdiffs, 3);	223 sumdiffs = vec_splat(sumdiffs, 3);

223 vec_ste(sumdiffs, 0, &s);	224 vec_ste(sumdiffs, 0, &s);

224	225

225 return s;	226 return s;

226 }	227 }

227	228

228 int sad16_altivec(void v, uint8_t pix1, uint8_t *pix2, int line_size, int h)	229 static int sad16_altivec(void v, uint8_t pix1, uint8_t *pix2, int line_size, i nt h)

229 {	230 {

230 int i;	231 int i;

231 int s;	232 int s;

232 const vector unsigned int zero = (const vector unsigned int)vec_splat_u32(0) ;	233 const vector unsigned int zero = (const vector unsigned int)vec_splat_u32(0) ;

233 vector unsigned char perm1, perm2, pix1v, pix2v;	234 vector unsigned char perm1, perm2, pix1v, pix2v;

234 vector unsigned char t1, t2, t3,t4, t5;	235 vector unsigned char t1, t2, t3,t4, t5;

235 vector unsigned int sad;	236 vector unsigned int sad;

236 vector signed int sumdiffs;	237 vector signed int sumdiffs;

237	238

238 sad = (vector unsigned int)vec_splat_u32(0);	239 sad = (vector unsigned int)vec_splat_u32(0);

(...skipping 21 matching lines...) Expand all Loading...
260 }	261 }

261	262

262 /* Sum up the four partial sums, and put the result into s */	263 /* Sum up the four partial sums, and put the result into s */

263 sumdiffs = vec_sums((vector signed int) sad, (vector signed int) zero);	264 sumdiffs = vec_sums((vector signed int) sad, (vector signed int) zero);

264 sumdiffs = vec_splat(sumdiffs, 3);	265 sumdiffs = vec_splat(sumdiffs, 3);

265 vec_ste(sumdiffs, 0, &s);	266 vec_ste(sumdiffs, 0, &s);

266	267

267 return s;	268 return s;

268 }	269 }

269	270

270 int sad8_altivec(void v, uint8_t pix1, uint8_t *pix2, int line_size, int h)	271 static int sad8_altivec(void v, uint8_t pix1, uint8_t *pix2, int line_size, in t h)

271 {	272 {

272 int i;	273 int i;

273 int s;	274 int s;

274 const vector unsigned int zero = (const vector unsigned int)vec_splat_u32(0) ;	275 const vector unsigned int zero = (const vector unsigned int)vec_splat_u32(0) ;

275 vector unsigned char perm1, perm2, permclear, pix1v, pix2v;	276 vector unsigned char perm1, perm2, permclear, pix1v, pix2v;

276 vector unsigned char t1, t2, t3,t4, t5;	277 vector unsigned char t1, t2, t3,t4, t5;

277 vector unsigned int sad;	278 vector unsigned int sad;

278 vector signed int sumdiffs;	279 vector signed int sumdiffs;

279	280

280 sad = (vector unsigned int)vec_splat_u32(0);	281 sad = (vector unsigned int)vec_splat_u32(0);

(...skipping 24 matching lines...) Expand all Loading...
305 }	306 }

306	307

307 /* Sum up the four partial sums, and put the result into s */	308 /* Sum up the four partial sums, and put the result into s */

308 sumdiffs = vec_sums((vector signed int) sad, (vector signed int) zero);	309 sumdiffs = vec_sums((vector signed int) sad, (vector signed int) zero);

309 sumdiffs = vec_splat(sumdiffs, 3);	310 sumdiffs = vec_splat(sumdiffs, 3);

310 vec_ste(sumdiffs, 0, &s);	311 vec_ste(sumdiffs, 0, &s);

311	312

312 return s;	313 return s;

313 }	314 }

314	315

315 int pix_norm1_altivec(uint8_t *pix, int line_size)	316 static int pix_norm1_altivec(uint8_t *pix, int line_size)

316 {	317 {

317 int i;	318 int i;

318 int s;	319 int s;

319 const vector unsigned int zero = (const vector unsigned int)vec_splat_u32(0) ;	320 const vector unsigned int zero = (const vector unsigned int)vec_splat_u32(0) ;

320 vector unsigned char *tv;	321 vector unsigned char *tv;

321 vector unsigned char pixv;	322 vector unsigned char pixv;

322 vector unsigned int sv;	323 vector unsigned int sv;

323 vector signed int sum;	324 vector signed int sum;

324	325

325 sv = (vector unsigned int)vec_splat_u32(0);	326 sv = (vector unsigned int)vec_splat_u32(0);

(...skipping 15 matching lines...) Expand all Loading...
341 vec_ste(sum, 0, &s);	342 vec_ste(sum, 0, &s);

342	343

343 return s;	344 return s;

344 }	345 }

345	346

346 /**	347 /**

347 * Sum of Squared Errors for a 8x8 block.	348 * Sum of Squared Errors for a 8x8 block.

348 * AltiVec-enhanced.	349 * AltiVec-enhanced.

349 * It's the sad8_altivec code above w/ squaring added.	350 * It's the sad8_altivec code above w/ squaring added.

350 */	351 */

351 int sse8_altivec(void v, uint8_t pix1, uint8_t *pix2, int line_size, int h)	352 static int sse8_altivec(void v, uint8_t pix1, uint8_t *pix2, int line_size, in t h)

352 {	353 {

353 int i;	354 int i;

354 int s;	355 int s;

355 const vector unsigned int zero = (const vector unsigned int)vec_splat_u32(0) ;	356 const vector unsigned int zero = (const vector unsigned int)vec_splat_u32(0) ;

356 vector unsigned char perm1, perm2, permclear, pix1v, pix2v;	357 vector unsigned char perm1, perm2, permclear, pix1v, pix2v;

357 vector unsigned char t1, t2, t3,t4, t5;	358 vector unsigned char t1, t2, t3,t4, t5;

358 vector unsigned int sum;	359 vector unsigned int sum;

359 vector signed int sumsqr;	360 vector signed int sumsqr;

360	361

361 sum = (vector unsigned int)vec_splat_u32(0);	362 sum = (vector unsigned int)vec_splat_u32(0);

(...skipping 33 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
395 vec_ste(sumsqr, 0, &s);	396 vec_ste(sumsqr, 0, &s);

396	397

397 return s;	398 return s;

398 }	399 }

399	400

400 /**	401 /**

401 * Sum of Squared Errors for a 16x16 block.	402 * Sum of Squared Errors for a 16x16 block.

402 * AltiVec-enhanced.	403 * AltiVec-enhanced.

403 * It's the sad16_altivec code above w/ squaring added.	404 * It's the sad16_altivec code above w/ squaring added.

404 */	405 */

405 int sse16_altivec(void v, uint8_t pix1, uint8_t *pix2, int line_size, int h)	406 static int sse16_altivec(void v, uint8_t pix1, uint8_t *pix2, int line_size, i nt h)

406 {	407 {

407 int i;	408 int i;

408 int s;	409 int s;

409 const vector unsigned int zero = (const vector unsigned int)vec_splat_u32(0) ;	410 const vector unsigned int zero = (const vector unsigned int)vec_splat_u32(0) ;

410 vector unsigned char perm1, perm2, pix1v, pix2v;	411 vector unsigned char perm1, perm2, pix1v, pix2v;

411 vector unsigned char t1, t2, t3,t4, t5;	412 vector unsigned char t1, t2, t3,t4, t5;

412 vector unsigned int sum;	413 vector unsigned int sum;

413 vector signed int sumsqr;	414 vector signed int sumsqr;

414	415

415 sum = (vector unsigned int)vec_splat_u32(0);	416 sum = (vector unsigned int)vec_splat_u32(0);

(...skipping 23 matching lines...) Expand all Loading...
439 }	440 }

440	441

441 /* Sum up the four partial sums, and put the result into s */	442 /* Sum up the four partial sums, and put the result into s */

442 sumsqr = vec_sums((vector signed int) sum, (vector signed int) zero);	443 sumsqr = vec_sums((vector signed int) sum, (vector signed int) zero);

443 sumsqr = vec_splat(sumsqr, 3);	444 sumsqr = vec_splat(sumsqr, 3);

444 vec_ste(sumsqr, 0, &s);	445 vec_ste(sumsqr, 0, &s);

445	446

446 return s;	447 return s;

447 }	448 }

448	449

449 int pix_sum_altivec(uint8_t * pix, int line_size)	450 static int pix_sum_altivec(uint8_t * pix, int line_size)

450 {	451 {

451 const vector unsigned int zero = (const vector unsigned int)vec_splat_u32(0) ;	452 const vector unsigned int zero = (const vector unsigned int)vec_splat_u32(0) ;

452 vector unsigned char perm, *pixv;	453 vector unsigned char perm, *pixv;

453 vector unsigned char t1;	454 vector unsigned char t1;

454 vector unsigned int sad;	455 vector unsigned int sad;

455 vector signed int sumdiffs;	456 vector signed int sumdiffs;

456	457

457 int i;	458 int i;

458 int s;	459 int s;

459	460

(...skipping 12 matching lines...) Expand all Loading...
472 }	473 }

473	474

474 /* Sum up the four partial sums, and put the result into s */	475 /* Sum up the four partial sums, and put the result into s */

475 sumdiffs = vec_sums((vector signed int) sad, (vector signed int) zero);	476 sumdiffs = vec_sums((vector signed int) sad, (vector signed int) zero);

476 sumdiffs = vec_splat(sumdiffs, 3);	477 sumdiffs = vec_splat(sumdiffs, 3);

477 vec_ste(sumdiffs, 0, &s);	478 vec_ste(sumdiffs, 0, &s);

478	479

479 return s;	480 return s;

480 }	481 }

481	482

482 void get_pixels_altivec(DCTELEM restrict block, const uint8_t pixels, int line _size)	483 static void get_pixels_altivec(DCTELEM restrict block, const uint8_t pixels, i nt line_size)

483 {	484 {

484 int i;	485 int i;

485 vector unsigned char perm, bytes, *pixv;	486 vector unsigned char perm, bytes, *pixv;

486 const vector unsigned char zero = (const vector unsigned char)vec_splat_u8(0 );	487 const vector unsigned char zero = (const vector unsigned char)vec_splat_u8(0 );

487 vector signed short shorts;	488 vector signed short shorts;

488	489

489 for (i = 0; i < 8; i++) {	490 for (i = 0; i < 8; i++) {

490 // Read potentially unaligned pixels.	491 // Read potentially unaligned pixels.

491 // We're reading 16 pixels, and actually only want 8,	492 // We're reading 16 pixels, and actually only want 8,

492 // but we simply ignore the extras.	493 // but we simply ignore the extras.

493 perm = vec_lvsl(0, pixels);	494 perm = vec_lvsl(0, pixels);

494 pixv = (vector unsigned char *) pixels;	495 pixv = (vector unsigned char *) pixels;

495 bytes = vec_perm(pixv[0], pixv[1], perm);	496 bytes = vec_perm(pixv[0], pixv[1], perm);

496	497

497 // convert the bytes into shorts	498 // convert the bytes into shorts

498 shorts = (vector signed short)vec_mergeh(zero, bytes);	499 shorts = (vector signed short)vec_mergeh(zero, bytes);

499	500

500 // save the data to the block, we assume the block is 16-byte aligned	501 // save the data to the block, we assume the block is 16-byte aligned

501 vec_st(shorts, i16, (vector signed short)block);	502 vec_st(shorts, i16, (vector signed short)block);

502	503

503 pixels += line_size;	504 pixels += line_size;

504 }	505 }

505 }	506 }

506	507

507 void diff_pixels_altivec(DCTELEM restrict block, const uint8_t s1,	508 static void diff_pixels_altivec(DCTELEM restrict block, const uint8_t s1,

508 const uint8_t *s2, int stride)	509 const uint8_t *s2, int stride)

509 {	510 {

510 int i;	511 int i;

511 vector unsigned char perm, bytes, *pixv;	512 vector unsigned char perm, bytes, *pixv;

512 const vector unsigned char zero = (const vector unsigned char)vec_splat_u8(0 );	513 const vector unsigned char zero = (const vector unsigned char)vec_splat_u8(0 );

513 vector signed short shorts1, shorts2;	514 vector signed short shorts1, shorts2;

514	515

515 for (i = 0; i < 4; i++) {	516 for (i = 0; i < 4; i++) {

516 // Read potentially unaligned pixels	517 // Read potentially unaligned pixels

517 // We're reading 16 pixels, and actually only want 8,	518 // We're reading 16 pixels, and actually only want 8,

(...skipping 64 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
582 vec_st(zero_s16v, 16, block);	583 vec_st(zero_s16v, 16, block);

583 vec_st(zero_s16v, 32, block);	584 vec_st(zero_s16v, 32, block);

584 vec_st(zero_s16v, 48, block);	585 vec_st(zero_s16v, 48, block);

585 vec_st(zero_s16v, 64, block);	586 vec_st(zero_s16v, 64, block);

586 vec_st(zero_s16v, 80, block);	587 vec_st(zero_s16v, 80, block);

587 vec_st(zero_s16v, 96, block);	588 vec_st(zero_s16v, 96, block);

588 vec_st(zero_s16v, 112, block);	589 vec_st(zero_s16v, 112, block);

589 }	590 }

590	591

591	592

592 void add_bytes_altivec(uint8_t dst, uint8_t src, int w) {	593 static void add_bytes_altivec(uint8_t dst, uint8_t src, int w) {

593 register int i;	594 register int i;

594 register vector unsigned char vdst, vsrc;	595 register vector unsigned char vdst, vsrc;

595	596

596 /* dst and src are 16 bytes-aligned (guaranteed) */	597 /* dst and src are 16 bytes-aligned (guaranteed) */

597 for (i = 0 ; (i + 15) < w ; i+=16) {	598 for (i = 0 ; (i + 15) < w ; i+=16) {

598 vdst = vec_ld(i, (unsigned char*)dst);	599 vdst = vec_ld(i, (unsigned char*)dst);

599 vsrc = vec_ld(i, (unsigned char*)src);	600 vsrc = vec_ld(i, (unsigned char*)src);

600 vdst = vec_add(vsrc, vdst);	601 vdst = vec_add(vsrc, vdst);

601 vec_st(vdst, i, (unsigned char*)dst);	602 vec_st(vdst, i, (unsigned char*)dst);

602 }	603 }

(...skipping 77 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
680 blockv = vec_avg(blockv,pixelsv);	681 blockv = vec_avg(blockv,pixelsv);

681 vec_st(blockv, 0, (unsigned char*)block);	682 vec_st(blockv, 0, (unsigned char*)block);

682 pixels+=line_size;	683 pixels+=line_size;

683 block +=line_size;	684 block +=line_size;

684 }	685 }

685	686

686 POWERPC_PERF_STOP_COUNT(altivec_avg_pixels16_num, 1);	687 POWERPC_PERF_STOP_COUNT(altivec_avg_pixels16_num, 1);

687 }	688 }

688	689

689 /* next one assumes that ((line_size % 8) == 0) */	690 /* next one assumes that ((line_size % 8) == 0) */

690 void avg_pixels8_altivec(uint8_t * block, const uint8_t * pixels, int line_size, int h)	691 static void avg_pixels8_altivec(uint8_t * block, const uint8_t * pixels, int lin e_size, int h)

691 {	692 {

692 POWERPC_PERF_DECLARE(altivec_avg_pixels8_num, 1);	693 POWERPC_PERF_DECLARE(altivec_avg_pixels8_num, 1);

693 register vector unsigned char pixelsv1, pixelsv2, pixelsv, blockv;	694 register vector unsigned char pixelsv1, pixelsv2, pixelsv, blockv;

694 int i;	695 int i;

695	696

696 POWERPC_PERF_START_COUNT(altivec_avg_pixels8_num, 1);	697 POWERPC_PERF_START_COUNT(altivec_avg_pixels8_num, 1);

697	698

698 for (i = 0; i < h; i++) {	699 for (i = 0; i < h; i++) {

699 /* block is 8 bytes-aligned, so we're either in the	700 /* block is 8 bytes-aligned, so we're either in the

700 left block (16 bytes-aligned) or in the right block (not) */	701 left block (16 bytes-aligned) or in the right block (not) */

(...skipping 15 matching lines...) Expand all Loading...
716 vec_st(blockv, 0, block);	717 vec_st(blockv, 0, block);

717	718

718 pixels += line_size;	719 pixels += line_size;

719 block += line_size;	720 block += line_size;

720 }	721 }

721	722

722 POWERPC_PERF_STOP_COUNT(altivec_avg_pixels8_num, 1);	723 POWERPC_PERF_STOP_COUNT(altivec_avg_pixels8_num, 1);

723 }	724 }

724	725

725 /* next one assumes that ((line_size % 8) == 0) */	726 /* next one assumes that ((line_size % 8) == 0) */

726 void put_pixels8_xy2_altivec(uint8_t block, const uint8_t pixels, int line_siz e, int h)	727 static void put_pixels8_xy2_altivec(uint8_t block, const uint8_t pixels, int l ine_size, int h)

727 {	728 {

728 POWERPC_PERF_DECLARE(altivec_put_pixels8_xy2_num, 1);	729 POWERPC_PERF_DECLARE(altivec_put_pixels8_xy2_num, 1);

729 register int i;	730 register int i;

730 register vector unsigned char pixelsv1, pixelsv2, pixelsavg;	731 register vector unsigned char pixelsv1, pixelsv2, pixelsavg;

731 register vector unsigned char blockv, temp1, temp2;	732 register vector unsigned char blockv, temp1, temp2;

732 register vector unsigned short pixelssum1, pixelssum2, temp3;	733 register vector unsigned short pixelssum1, pixelssum2, temp3;

733 register const vector unsigned char vczero = (const vector unsigned char)vec _splat_u8(0);	734 register const vector unsigned char vczero = (const vector unsigned char)vec _splat_u8(0);

734 register const vector unsigned short vctwo = (const vector unsigned short)ve c_splat_u16(2);	735 register const vector unsigned short vctwo = (const vector unsigned short)ve c_splat_u16(2);

735	736

736 temp1 = vec_ld(0, pixels);	737 temp1 = vec_ld(0, pixels);

(...skipping 42 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
779 vec_st(blockv, 0, block);	780 vec_st(blockv, 0, block);

780	781

781 block += line_size;	782 block += line_size;

782 pixels += line_size;	783 pixels += line_size;

783 }	784 }

784	785

785 POWERPC_PERF_STOP_COUNT(altivec_put_pixels8_xy2_num, 1);	786 POWERPC_PERF_STOP_COUNT(altivec_put_pixels8_xy2_num, 1);

786 }	787 }

787	788

788 /* next one assumes that ((line_size % 8) == 0) */	789 /* next one assumes that ((line_size % 8) == 0) */

789 void put_no_rnd_pixels8_xy2_altivec(uint8_t block, const uint8_t pixels, int l ine_size, int h)	790 static void put_no_rnd_pixels8_xy2_altivec(uint8_t block, const uint8_t pixels , int line_size, int h)

790 {	791 {

791 POWERPC_PERF_DECLARE(altivec_put_no_rnd_pixels8_xy2_num, 1);	792 POWERPC_PERF_DECLARE(altivec_put_no_rnd_pixels8_xy2_num, 1);

792 register int i;	793 register int i;

793 register vector unsigned char pixelsv1, pixelsv2, pixelsavg;	794 register vector unsigned char pixelsv1, pixelsv2, pixelsavg;

794 register vector unsigned char blockv, temp1, temp2;	795 register vector unsigned char blockv, temp1, temp2;

795 register vector unsigned short pixelssum1, pixelssum2, temp3;	796 register vector unsigned short pixelssum1, pixelssum2, temp3;

796 register const vector unsigned char vczero = (const vector unsigned char)vec _splat_u8(0);	797 register const vector unsigned char vczero = (const vector unsigned char)vec _splat_u8(0);

797 register const vector unsigned short vcone = (const vector unsigned short)ve c_splat_u16(1);	798 register const vector unsigned short vcone = (const vector unsigned short)ve c_splat_u16(1);

798 register const vector unsigned short vctwo = (const vector unsigned short)ve c_splat_u16(2);	799 register const vector unsigned short vctwo = (const vector unsigned short)ve c_splat_u16(2);

799	800

(...skipping 43 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
843 vec_st(blockv, 0, block);	844 vec_st(blockv, 0, block);

844	845

845 block += line_size;	846 block += line_size;

846 pixels += line_size;	847 pixels += line_size;

847 }	848 }

848	849

849 POWERPC_PERF_STOP_COUNT(altivec_put_no_rnd_pixels8_xy2_num, 1);	850 POWERPC_PERF_STOP_COUNT(altivec_put_no_rnd_pixels8_xy2_num, 1);

850 }	851 }

851	852

852 /* next one assumes that ((line_size % 16) == 0) */	853 /* next one assumes that ((line_size % 16) == 0) */

853 void put_pixels16_xy2_altivec(uint8_t * block, const uint8_t * pixels, int line_ size, int h)	854 static void put_pixels16_xy2_altivec(uint8_t * block, const uint8_t * pixels, in t line_size, int h)

854 {	855 {

855 POWERPC_PERF_DECLARE(altivec_put_pixels16_xy2_num, 1);	856 POWERPC_PERF_DECLARE(altivec_put_pixels16_xy2_num, 1);

856 register int i;	857 register int i;

857 register vector unsigned char pixelsv1, pixelsv2, pixelsv3, pixelsv4;	858 register vector unsigned char pixelsv1, pixelsv2, pixelsv3, pixelsv4;

858 register vector unsigned char blockv, temp1, temp2;	859 register vector unsigned char blockv, temp1, temp2;

859 register vector unsigned short temp3, temp4,	860 register vector unsigned short temp3, temp4,

860 pixelssum1, pixelssum2, pixelssum3, pixelssum4;	861 pixelssum1, pixelssum2, pixelssum3, pixelssum4;

861 register const vector unsigned char vczero = (const vector unsigned char)vec _splat_u8(0);	862 register const vector unsigned char vczero = (const vector unsigned char)vec _splat_u8(0);

862 register const vector unsigned short vctwo = (const vector unsigned short)ve c_splat_u16(2);	863 register const vector unsigned short vctwo = (const vector unsigned short)ve c_splat_u16(2);

863	864

(...skipping 52 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
916 vec_st(blockv, 0, block);	917 vec_st(blockv, 0, block);

917	918

918 block += line_size;	919 block += line_size;

919 pixels += line_size;	920 pixels += line_size;

920 }	921 }

921	922

922 POWERPC_PERF_STOP_COUNT(altivec_put_pixels16_xy2_num, 1);	923 POWERPC_PERF_STOP_COUNT(altivec_put_pixels16_xy2_num, 1);

923 }	924 }

924	925

925 /* next one assumes that ((line_size % 16) == 0) */	926 /* next one assumes that ((line_size % 16) == 0) */

926 void put_no_rnd_pixels16_xy2_altivec(uint8_t * block, const uint8_t * pixels, in t line_size, int h)	927 static void put_no_rnd_pixels16_xy2_altivec(uint8_t * block, const uint8_t * pix els, int line_size, int h)

927 {	928 {

928 POWERPC_PERF_DECLARE(altivec_put_no_rnd_pixels16_xy2_num, 1);	929 POWERPC_PERF_DECLARE(altivec_put_no_rnd_pixels16_xy2_num, 1);

929 register int i;	930 register int i;

930 register vector unsigned char pixelsv1, pixelsv2, pixelsv3, pixelsv4;	931 register vector unsigned char pixelsv1, pixelsv2, pixelsv3, pixelsv4;

931 register vector unsigned char blockv, temp1, temp2;	932 register vector unsigned char blockv, temp1, temp2;

932 register vector unsigned short temp3, temp4,	933 register vector unsigned short temp3, temp4,

933 pixelssum1, pixelssum2, pixelssum3, pixelssum4;	934 pixelssum1, pixelssum2, pixelssum3, pixelssum4;

934 register const vector unsigned char vczero = (const vector unsigned char)vec _splat_u8(0);	935 register const vector unsigned char vczero = (const vector unsigned char)vec _splat_u8(0);

935 register const vector unsigned short vcone = (const vector unsigned short)ve c_splat_u16(1);	936 register const vector unsigned short vcone = (const vector unsigned short)ve c_splat_u16(1);

936 register const vector unsigned short vctwo = (const vector unsigned short)ve c_splat_u16(2);	937 register const vector unsigned short vctwo = (const vector unsigned short)ve c_splat_u16(2);

(...skipping 52 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
989	990

990 vec_st(blockv, 0, block);	991 vec_st(blockv, 0, block);

991	992

992 block += line_size;	993 block += line_size;

993 pixels += line_size;	994 pixels += line_size;

994 }	995 }

995	996

996 POWERPC_PERF_STOP_COUNT(altivec_put_no_rnd_pixels16_xy2_num, 1);	997 POWERPC_PERF_STOP_COUNT(altivec_put_no_rnd_pixels16_xy2_num, 1);

997 }	998 }

998	999

999 int hadamard8_diff8x8_altivec(/MpegEncContext/ void s, uint8_t dst, uint8_t *src, int stride, int h){	1000 static int hadamard8_diff8x8_altivec(/MpegEncContext/ void s, uint8_t dst, u int8_t *src, int stride, int h){

1000 POWERPC_PERF_DECLARE(altivec_hadamard8_diff8x8_num, 1);	1001 POWERPC_PERF_DECLARE(altivec_hadamard8_diff8x8_num, 1);

1001 int sum;	1002 int sum;

1002 register const vector unsigned char vzero =	1003 register const vector unsigned char vzero =

1003 (const vector unsigned char)vec_splat_u8(0);	1004 (const vector unsigned char)vec_splat_u8(0);

1004 register vector signed short temp0, temp1, temp2, temp3, temp4,	1005 register vector signed short temp0, temp1, temp2, temp3, temp4,

1005 temp5, temp6, temp7;	1006 temp5, temp6, temp7;

1006 POWERPC_PERF_START_COUNT(altivec_hadamard8_diff8x8_num, 1);	1007 POWERPC_PERF_START_COUNT(altivec_hadamard8_diff8x8_num, 1);

1007 {	1008 {

1008 register const vector signed short vprod1 =(const vector signed short)	1009 register const vector signed short vprod1 =(const vector signed short)

1009 { 1,-1, 1,-1, 1,-1, 1,-1 };	1010 { 1,-1, 1,-1, 1,-1, 1,-1 };

(...skipping 300 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1310 vsum = vec_sum4s(vec_abs(line5CS), vsum);	1311 vsum = vec_sum4s(vec_abs(line5CS), vsum);

1311 vsum = vec_sum4s(vec_abs(line6CS), vsum);	1312 vsum = vec_sum4s(vec_abs(line6CS), vsum);

1312 vsum = vec_sum4s(vec_abs(line7CS), vsum);	1313 vsum = vec_sum4s(vec_abs(line7CS), vsum);

1313 vsum = vec_sums(vsum, (vector signed int)vzero);	1314 vsum = vec_sums(vsum, (vector signed int)vzero);

1314 vsum = vec_splat(vsum, 3);	1315 vsum = vec_splat(vsum, 3);

1315 vec_ste(vsum, 0, &sum);	1316 vec_ste(vsum, 0, &sum);

1316 }	1317 }

1317 return sum;	1318 return sum;

1318 }	1319 }

1319	1320

1320 int hadamard8_diff16_altivec(/MpegEncContext/ void s, uint8_t dst, uint8_t * src, int stride, int h){	1321 static int hadamard8_diff16_altivec(/MpegEncContext/ void s, uint8_t dst, ui nt8_t *src, int stride, int h){

1321 POWERPC_PERF_DECLARE(altivec_hadamard8_diff16_num, 1);	1322 POWERPC_PERF_DECLARE(altivec_hadamard8_diff16_num, 1);

1322 int score;	1323 int score;

1323 POWERPC_PERF_START_COUNT(altivec_hadamard8_diff16_num, 1);	1324 POWERPC_PERF_START_COUNT(altivec_hadamard8_diff16_num, 1);

1324 score = hadamard8_diff16x8_altivec(s, dst, src, stride, 8);	1325 score = hadamard8_diff16x8_altivec(s, dst, src, stride, 8);

1325 if (h==16) {	1326 if (h==16) {

1326 dst += 8*stride;	1327 dst += 8*stride;

1327 src += 8*stride;	1328 src += 8*stride;

1328 score += hadamard8_diff16x8_altivec(s, dst, src, stride, 8);	1329 score += hadamard8_diff16x8_altivec(s, dst, src, stride, 8);

1329 }	1330 }

1330 POWERPC_PERF_STOP_COUNT(altivec_hadamard8_diff16_num, 1);	1331 POWERPC_PERF_STOP_COUNT(altivec_hadamard8_diff16_num, 1);

(...skipping 17 matching lines...) Expand all Loading...
1348 t0 = (vector bool int)vec_and(a, t1);	1349 t0 = (vector bool int)vec_and(a, t1);

1349 t1 = (vector bool int)vec_andc(a, t1);	1350 t1 = (vector bool int)vec_andc(a, t1);

1350 a = vec_sub(m, (vector float)t1);	1351 a = vec_sub(m, (vector float)t1);

1351 m = vec_add(m, (vector float)t0);	1352 m = vec_add(m, (vector float)t0);

1352 vec_stl(a, 0, ang+i);	1353 vec_stl(a, 0, ang+i);

1353 vec_stl(m, 0, mag+i);	1354 vec_stl(m, 0, mag+i);

1354 }	1355 }

1355 }	1356 }

1356	1357

1357 /* next one assumes that ((line_size % 8) == 0) */	1358 /* next one assumes that ((line_size % 8) == 0) */

1358 void avg_pixels8_xy2_altivec(uint8_t block, const uint8_t pixels, int line_siz e, int h)	1359 static void avg_pixels8_xy2_altivec(uint8_t block, const uint8_t pixels, int l ine_size, int h)

1359 {	1360 {

1360 POWERPC_PERF_DECLARE(altivec_avg_pixels8_xy2_num, 1);	1361 POWERPC_PERF_DECLARE(altivec_avg_pixels8_xy2_num, 1);

1361 register int i;	1362 register int i;

1362 register vector unsigned char pixelsv1, pixelsv2, pixelsavg;	1363 register vector unsigned char pixelsv1, pixelsv2, pixelsavg;

1363 register vector unsigned char blockv, temp1, temp2, blocktemp;	1364 register vector unsigned char blockv, temp1, temp2, blocktemp;

1364 register vector unsigned short pixelssum1, pixelssum2, temp3;	1365 register vector unsigned short pixelssum1, pixelssum2, temp3;

1365	1366

1366 register const vector unsigned char vczero = (const vector unsigned char)	1367 register const vector unsigned char vczero = (const vector unsigned char)

1367 vec_splat_u8(0);	1368 vec_splat_u8(0);

1368 register const vector unsigned short vctwo = (const vector unsigned short)	1369 register const vector unsigned short vctwo = (const vector unsigned short)

(...skipping 78 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1447 c->put_pixels_tab[1][3] = put_pixels8_xy2_altivec;	1448 c->put_pixels_tab[1][3] = put_pixels8_xy2_altivec;

1448 c->put_no_rnd_pixels_tab[1][3] = put_no_rnd_pixels8_xy2_altivec;	1449 c->put_no_rnd_pixels_tab[1][3] = put_no_rnd_pixels8_xy2_altivec;

1449 c->put_pixels_tab[0][3] = put_pixels16_xy2_altivec;	1450 c->put_pixels_tab[0][3] = put_pixels16_xy2_altivec;

1450 c->put_no_rnd_pixels_tab[0][3] = put_no_rnd_pixels16_xy2_altivec;	1451 c->put_no_rnd_pixels_tab[0][3] = put_no_rnd_pixels16_xy2_altivec;

1451	1452

1452 c->hadamard8_diff[0] = hadamard8_diff16_altivec;	1453 c->hadamard8_diff[0] = hadamard8_diff16_altivec;

1453 c->hadamard8_diff[1] = hadamard8_diff8x8_altivec;	1454 c->hadamard8_diff[1] = hadamard8_diff8x8_altivec;

1454 if (CONFIG_VORBIS_DECODER)	1455 if (CONFIG_VORBIS_DECODER)

1455 c->vorbis_inverse_coupling = vorbis_inverse_coupling_altivec;	1456 c->vorbis_inverse_coupling = vorbis_inverse_coupling_altivec;

1456 }	1457 }

OLD	NEW