source/libvpx/vp9/common/mips/msa/vp9_macros_msa.h - Issue 1169543007: libvpx: Pull from upstream

Side by Side Diff: source/libvpx/vp9/common/mips/msa/vp9_macros_msa.h

Issue 1169543007: libvpx: Pull from upstream (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/libvpx.git@master

Patch Set: Created 5 years, 6 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 /*	1 /*

2 * Copyright (c) 2015 The WebM project authors. All Rights Reserved.	2 * Copyright (c) 2015 The WebM project authors. All Rights Reserved.

3 *	3 *

4 * Use of this source code is governed by a BSD-style license	4 * Use of this source code is governed by a BSD-style license

5 * that can be found in the LICENSE file in the root of the source	5 * that can be found in the LICENSE file in the root of the source

6 * tree. An additional intellectual property rights grant can be found	6 * tree. An additional intellectual property rights grant can be found

7 * in the file PATENTS. All contributing project authors may	7 * in the file PATENTS. All contributing project authors may

8 * be found in the AUTHORS file in the root of the source tree.	8 * be found in the AUTHORS file in the root of the source tree.

9 */	9 */

10	10

(...skipping 210 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
221 uint32_t val0_m, val1_m; \	221 uint32_t val0_m, val1_m; \

222 \	222 \

223 val0_m = (uint32_t)((val) & 0x00000000FFFFFFFF); \	223 val0_m = (uint32_t)((val) & 0x00000000FFFFFFFF); \

224 val1_m = (uint32_t)(((val) >> 32) & 0x00000000FFFFFFFF); \	224 val1_m = (uint32_t)(((val) >> 32) & 0x00000000FFFFFFFF); \

225 \	225 \

226 SW(val0_m, pdst_m1); \	226 SW(val0_m, pdst_m1); \

227 SW(val1_m, pdst_m1 + 4); \	227 SW(val1_m, pdst_m1 + 4); \

228 }	228 }

229 #endif // (__mips_isa_rev >= 6)	229 #endif // (__mips_isa_rev >= 6)

230	230

	231 /* Description : Load 4 words with stride

	232 Arguments : Inputs - psrc (source pointer to load from)

	233 - stride

	234 Outputs - out0, out1, out2, out3

	235 Details : Loads word in 'out0' from (psrc)

	236 Loads word in 'out1' from (psrc + stride)

	237 Loads word in 'out2' from (psrc + 2 * stride)

	238 Loads word in 'out3' from (psrc + 3 * stride)

	239 */

	240 #define LW4(psrc, stride, out0, out1, out2, out3) { \

	241 out0 = LW((psrc)); \

	242 out1 = LW((psrc) + stride); \

	243 out2 = LW((psrc) + 2 * stride); \

	244 out3 = LW((psrc) + 3 * stride); \

	245 }

	246

231 /* Description : Store 4 words with stride	247 /* Description : Store 4 words with stride

232 Arguments : Inputs - in0, in1, in2, in3, pdst, stride	248 Arguments : Inputs - in0, in1, in2, in3, pdst, stride

233 Details : Stores word from 'in0' to (pdst)	249 Details : Stores word from 'in0' to (pdst)

234 Stores word from 'in1' to (pdst + stride)	250 Stores word from 'in1' to (pdst + stride)

235 Stores word from 'in2' to (pdst + 2 * stride)	251 Stores word from 'in2' to (pdst + 2 * stride)

236 Stores word from 'in3' to (pdst + 3 * stride)	252 Stores word from 'in3' to (pdst + 3 * stride)

237 */	253 */

238 #define SW4(in0, in1, in2, in3, pdst, stride) { \	254 #define SW4(in0, in1, in2, in3, pdst, stride) { \

239 SW(in0, (pdst)) \	255 SW(in0, (pdst)) \

240 SW(in1, (pdst) + stride); \	256 SW(in1, (pdst) + stride); \

(...skipping 88 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
329 #define LD_H16(RTYPE, psrc, stride, \	345 #define LD_H16(RTYPE, psrc, stride, \

330 out0, out1, out2, out3, out4, out5, out6, out7, \	346 out0, out1, out2, out3, out4, out5, out6, out7, \

331 out8, out9, out10, out11, out12, out13, out14, out15) { \	347 out8, out9, out10, out11, out12, out13, out14, out15) { \

332 LD_H8(RTYPE, (psrc), stride, \	348 LD_H8(RTYPE, (psrc), stride, \

333 out0, out1, out2, out3, out4, out5, out6, out7); \	349 out0, out1, out2, out3, out4, out5, out6, out7); \

334 LD_H8(RTYPE, (psrc) + 8 * stride, stride, \	350 LD_H8(RTYPE, (psrc) + 8 * stride, stride, \

335 out8, out9, out10, out11, out12, out13, out14, out15); \	351 out8, out9, out10, out11, out12, out13, out14, out15); \

336 }	352 }

337 #define LD_SH16(...) LD_H16(v8i16, __VA_ARGS__)	353 #define LD_SH16(...) LD_H16(v8i16, __VA_ARGS__)

338	354

	355 /* Description : Load as 4x4 block of signed halfword elements from 1D source

	356 data into 4 vectors (Each vector with 4 signed halfwords)

	357 Arguments : Inputs - psrc

	358 Outputs - out0, out1, out2, out3

	359 */

	360 #define LD4x4_SH(psrc, out0, out1, out2, out3) { \

	361 out0 = LD_SH(psrc); \

	362 out2 = LD_SH(psrc + 8); \

	363 out1 = (v8i16)__msa_ilvl_d((v2i64)out0, (v2i64)out0); \

	364 out3 = (v8i16)__msa_ilvl_d((v2i64)out2, (v2i64)out2); \

	365 }

	366

339 /* Description : Store vectors of 16 byte elements with stride	367 /* Description : Store vectors of 16 byte elements with stride

340 Arguments : Inputs - in0, in1, stride	368 Arguments : Inputs - in0, in1, stride

341 Outputs - pdst (destination pointer to store to)	369 Outputs - pdst (destination pointer to store to)

342 Details : Stores 16 byte elements from 'in0' to (pdst)	370 Details : Stores 16 byte elements from 'in0' to (pdst)

343 Stores 16 byte elements from 'in1' to (pdst + stride)	371 Stores 16 byte elements from 'in1' to (pdst + stride)

344 */	372 */

345 #define ST_B2(RTYPE, in0, in1, pdst, stride) { \	373 #define ST_B2(RTYPE, in0, in1, pdst, stride) { \

346 ST_B(RTYPE, in0, (pdst)); \	374 ST_B(RTYPE, in0, (pdst)); \

347 ST_B(RTYPE, in1, (pdst) + stride); \	375 ST_B(RTYPE, in1, (pdst) + stride); \

348 }	376 }

(...skipping 29 matching lines...) Expand all Loading...
378 ST_H2(RTYPE, in2, in3, (pdst) + 2 * stride, stride); \	406 ST_H2(RTYPE, in2, in3, (pdst) + 2 * stride, stride); \

379 }	407 }

380 #define ST_SH4(...) ST_H4(v8i16, __VA_ARGS__)	408 #define ST_SH4(...) ST_H4(v8i16, __VA_ARGS__)

381	409

382 #define ST_H8(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, pdst, stride) { \	410 #define ST_H8(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, pdst, stride) { \

383 ST_H4(RTYPE, in0, in1, in2, in3, (pdst), stride); \	411 ST_H4(RTYPE, in0, in1, in2, in3, (pdst), stride); \

384 ST_H4(RTYPE, in4, in5, in6, in7, (pdst) + 4 * stride, stride); \	412 ST_H4(RTYPE, in4, in5, in6, in7, (pdst) + 4 * stride, stride); \

385 }	413 }

386 #define ST_SH8(...) ST_H8(v8i16, __VA_ARGS__)	414 #define ST_SH8(...) ST_H8(v8i16, __VA_ARGS__)

387	415

	416 /* Description : Store as 2x4 byte block to destination memory from input vector

	417 Arguments : Inputs - in, stidx, pdst, stride

	418 Return Type - unsigned byte

	419 Details : Index stidx halfword element from 'in' vector is copied and

	420 stored on first line

	421 Index stidx+1 halfword element from 'in' vector is copied and

	422 stored on second line

	423 Index stidx+2 halfword element from 'in' vector is copied and

	424 stored on third line

	425 Index stidx+3 halfword element from 'in' vector is copied and

	426 stored on fourth line

	427 */

	428 #define ST2x4_UB(in, stidx, pdst, stride) { \

	429 uint16_t out0_m, out1_m, out2_m, out3_m; \

	430 uint8_t pblk_2x4_m = (uint8_t )(pdst); \

	431 \

	432 out0_m = __msa_copy_u_h((v8i16)in, (stidx)); \

	433 out1_m = __msa_copy_u_h((v8i16)in, (stidx + 1)); \

	434 out2_m = __msa_copy_u_h((v8i16)in, (stidx + 2)); \

	435 out3_m = __msa_copy_u_h((v8i16)in, (stidx + 3)); \

	436 \

	437 SH(out0_m, pblk_2x4_m); \

	438 SH(out1_m, pblk_2x4_m + stride); \

	439 SH(out2_m, pblk_2x4_m + 2 * stride); \

	440 SH(out3_m, pblk_2x4_m + 3 * stride); \

	441 }

	442

388 /* Description : Store as 4x4 byte block to destination memory from input vector	443 /* Description : Store as 4x4 byte block to destination memory from input vector

389 Arguments : Inputs - in0, in1, pdst, stride	444 Arguments : Inputs - in0, in1, pdst, stride

390 Return Type - unsigned byte	445 Return Type - unsigned byte

391 Details : Idx0 word element from input vector 'in0' is copied and stored	446 Details : Idx0 word element from input vector 'in0' is copied and stored

392 on first line	447 on first line

393 Idx1 word element from input vector 'in0' is copied and stored	448 Idx1 word element from input vector 'in0' is copied and stored

394 on second line	449 on second line

395 Idx2 word element from input vector 'in1' is copied and stored	450 Idx2 word element from input vector 'in1' is copied and stored

396 on third line	451 on third line

397 Idx3 word element from input vector 'in1' is copied and stored	452 Idx3 word element from input vector 'in1' is copied and stored

(...skipping 85 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
483 Details : Byte elements from 'zero_m' vector are slide into 'in0' by	538 Details : Byte elements from 'zero_m' vector are slide into 'in0' by

484 number of elements specified by 'slide_val'	539 number of elements specified by 'slide_val'

485 */	540 */

486 #define SLDI_B2_0(RTYPE, in0, in1, out0, out1, slide_val) { \	541 #define SLDI_B2_0(RTYPE, in0, in1, out0, out1, slide_val) { \

487 v16i8 zero_m = { 0 }; \	542 v16i8 zero_m = { 0 }; \

488 out0 = (RTYPE)__msa_sldi_b((v16i8)zero_m, (v16i8)in0, slide_val); \	543 out0 = (RTYPE)__msa_sldi_b((v16i8)zero_m, (v16i8)in0, slide_val); \

489 out1 = (RTYPE)__msa_sldi_b((v16i8)zero_m, (v16i8)in1, slide_val); \	544 out1 = (RTYPE)__msa_sldi_b((v16i8)zero_m, (v16i8)in1, slide_val); \

490 }	545 }

491 #define SLDI_B2_0_SW(...) SLDI_B2_0(v4i32, __VA_ARGS__)	546 #define SLDI_B2_0_SW(...) SLDI_B2_0(v4i32, __VA_ARGS__)

492	547

	548 #define SLDI_B4_0(RTYPE, in0, in1, in2, in3, \

	549 out0, out1, out2, out3, slide_val) { \

	550 SLDI_B2_0(RTYPE, in0, in1, out0, out1, slide_val); \

	551 SLDI_B2_0(RTYPE, in2, in3, out2, out3, slide_val); \

	552 }

	553 #define SLDI_B4_0_UB(...) SLDI_B4_0(v16u8, __VA_ARGS__)

	554

493 /* Description : Immediate number of columns to slide	555 /* Description : Immediate number of columns to slide

494 Arguments : Inputs - in0_0, in0_1, in1_0, in1_1, slide_val	556 Arguments : Inputs - in0_0, in0_1, in1_0, in1_1, slide_val

495 Outputs - out0, out1	557 Outputs - out0, out1

496 Return Type - as per RTYPE	558 Return Type - as per RTYPE

497 Details : Byte elements from 'in0_0' vector are slide into 'in1_0' by	559 Details : Byte elements from 'in0_0' vector are slide into 'in1_0' by

498 number of elements specified by 'slide_val'	560 number of elements specified by 'slide_val'

499 */	561 */

500 #define SLDI_B2(RTYPE, in0_0, in0_1, in1_0, in1_1, out0, out1, slide_val) { \	562 #define SLDI_B2(RTYPE, in0_0, in0_1, in1_0, in1_1, out0, out1, slide_val) { \

501 out0 = (RTYPE)__msa_sldi_b((v16i8)in0_0, (v16i8)in1_0, slide_val); \	563 out0 = (RTYPE)__msa_sldi_b((v16i8)in0_0, (v16i8)in1_0, slide_val); \

502 out1 = (RTYPE)__msa_sldi_b((v16i8)in0_1, (v16i8)in1_1, slide_val); \	564 out1 = (RTYPE)__msa_sldi_b((v16i8)in0_1, (v16i8)in1_1, slide_val); \

(...skipping 171 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
674 })	736 })

675 #define CLIP_SH2_0_255(in0, in1) { \	737 #define CLIP_SH2_0_255(in0, in1) { \

676 in0 = CLIP_SH_0_255(in0); \	738 in0 = CLIP_SH_0_255(in0); \

677 in1 = CLIP_SH_0_255(in1); \	739 in1 = CLIP_SH_0_255(in1); \

678 }	740 }

679 #define CLIP_SH4_0_255(in0, in1, in2, in3) { \	741 #define CLIP_SH4_0_255(in0, in1, in2, in3) { \

680 CLIP_SH2_0_255(in0, in1); \	742 CLIP_SH2_0_255(in0, in1); \

681 CLIP_SH2_0_255(in2, in3); \	743 CLIP_SH2_0_255(in2, in3); \

682 }	744 }

683	745

	746 /* Description : Insert specified word elements from input vectors to 1

	747 destination vector

	748 Arguments : Inputs - in0, in1, in2, in3 (4 input vectors)

	749 Outputs - out (output vector)

	750 Return Type - as per RTYPE

	751 */

	752 #define INSERT_W2(RTYPE, in0, in1, out) { \

	753 out = (RTYPE)__msa_insert_w((v4i32)out, 0, in0); \

	754 out = (RTYPE)__msa_insert_w((v4i32)out, 1, in1); \

	755 }

	756 #define INSERT_W2_SB(...) INSERT_W2(v16i8, __VA_ARGS__)

	757

684 /* Description : Interleave even byte elements from vectors	758 /* Description : Interleave even byte elements from vectors

685 Arguments : Inputs - in0, in1, in2, in3	759 Arguments : Inputs - in0, in1, in2, in3

686 Outputs - out0, out1	760 Outputs - out0, out1

687 Return Type - as per RTYPE	761 Return Type - as per RTYPE

688 Details : Even byte elements of 'in0' and even byte	762 Details : Even byte elements of 'in0' and even byte

689 elements of 'in1' are interleaved and copied to 'out0'	763 elements of 'in1' are interleaved and copied to 'out0'

690 Even byte elements of 'in2' and even byte	764 Even byte elements of 'in2' and even byte

691 elements of 'in3' are interleaved and copied to 'out1'	765 elements of 'in3' are interleaved and copied to 'out1'

692 */	766 */

693 #define ILVEV_B2(RTYPE, in0, in1, in2, in3, out0, out1) { \	767 #define ILVEV_B2(RTYPE, in0, in1, in2, in3, out0, out1) { \

(...skipping 13 matching lines...) Expand all Loading...
707 elements of 'in3' are interleaved and copied to 'out1'	781 elements of 'in3' are interleaved and copied to 'out1'

708 */	782 */

709 #define ILVEV_H2(RTYPE, in0, in1, in2, in3, out0, out1) { \	783 #define ILVEV_H2(RTYPE, in0, in1, in2, in3, out0, out1) { \

710 out0 = (RTYPE)__msa_ilvev_h((v8i16)in1, (v8i16)in0); \	784 out0 = (RTYPE)__msa_ilvev_h((v8i16)in1, (v8i16)in0); \

711 out1 = (RTYPE)__msa_ilvev_h((v8i16)in3, (v8i16)in2); \	785 out1 = (RTYPE)__msa_ilvev_h((v8i16)in3, (v8i16)in2); \

712 }	786 }

713 #define ILVEV_H2_UB(...) ILVEV_H2(v16u8, __VA_ARGS__)	787 #define ILVEV_H2_UB(...) ILVEV_H2(v16u8, __VA_ARGS__)

714 #define ILVEV_H2_SH(...) ILVEV_H2(v8i16, __VA_ARGS__)	788 #define ILVEV_H2_SH(...) ILVEV_H2(v8i16, __VA_ARGS__)

715 #define ILVEV_H2_SW(...) ILVEV_H2(v4i32, __VA_ARGS__)	789 #define ILVEV_H2_SW(...) ILVEV_H2(v4i32, __VA_ARGS__)

716	790

	791 /* Description : Interleave even double word elements from vectors

	792 Arguments : Inputs - in0, in1, in2, in3

	793 Outputs - out0, out1

	794 Return Type - as per RTYPE

	795 Details : Even double word elements of 'in0' and even double word

	796 elements of 'in1' are interleaved and copied to 'out0'

	797 Even double word elements of 'in2' and even double word

	798 elements of 'in3' are interleaved and copied to 'out1'

	799 */

	800 #define ILVEV_D2(RTYPE, in0, in1, in2, in3, out0, out1) { \

	801 out0 = (RTYPE)__msa_ilvev_d((v2i64)in1, (v2i64)in0); \

	802 out1 = (RTYPE)__msa_ilvev_d((v2i64)in3, (v2i64)in2); \

	803 }

	804 #define ILVEV_D2_UB(...) ILVEV_D2(v16u8, __VA_ARGS__)

	805

717 /* Description : Interleave left half of byte elements from vectors	806 /* Description : Interleave left half of byte elements from vectors

718 Arguments : Inputs - in0, in1, in2, in3	807 Arguments : Inputs - in0, in1, in2, in3

719 Outputs - out0, out1	808 Outputs - out0, out1

720 Return Type - as per RTYPE	809 Return Type - as per RTYPE

721 Details : Left half of byte elements of in0 and left half of byte	810 Details : Left half of byte elements of in0 and left half of byte

722 elements of in1 are interleaved and copied to out0.	811 elements of in1 are interleaved and copied to out0.

723 Left half of byte elements of in2 and left half of byte	812 Left half of byte elements of in2 and left half of byte

724 elements of in3 are interleaved and copied to out1.	813 elements of in3 are interleaved and copied to out1.

725 */	814 */

726 #define ILVL_B2(RTYPE, in0, in1, in2, in3, out0, out1) { \	815 #define ILVL_B2(RTYPE, in0, in1, in2, in3, out0, out1) { \

(...skipping 66 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
793 #define ILVR_B4(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, \	882 #define ILVR_B4(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, \

794 out0, out1, out2, out3) { \	883 out0, out1, out2, out3) { \

795 ILVR_B2(RTYPE, in0, in1, in2, in3, out0, out1); \	884 ILVR_B2(RTYPE, in0, in1, in2, in3, out0, out1); \

796 ILVR_B2(RTYPE, in4, in5, in6, in7, out2, out3); \	885 ILVR_B2(RTYPE, in4, in5, in6, in7, out2, out3); \

797 }	886 }

798 #define ILVR_B4_UB(...) ILVR_B4(v16u8, __VA_ARGS__)	887 #define ILVR_B4_UB(...) ILVR_B4(v16u8, __VA_ARGS__)

799 #define ILVR_B4_SB(...) ILVR_B4(v16i8, __VA_ARGS__)	888 #define ILVR_B4_SB(...) ILVR_B4(v16i8, __VA_ARGS__)

800 #define ILVR_B4_UH(...) ILVR_B4(v8u16, __VA_ARGS__)	889 #define ILVR_B4_UH(...) ILVR_B4(v8u16, __VA_ARGS__)

801 #define ILVR_B4_SH(...) ILVR_B4(v8i16, __VA_ARGS__)	890 #define ILVR_B4_SH(...) ILVR_B4(v8i16, __VA_ARGS__)

802	891

	892 #define ILVR_B8(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, \

	893 in8, in9, in10, in11, in12, in13, in14, in15, \

	894 out0, out1, out2, out3, out4, out5, out6, out7) { \

	895 ILVR_B4(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, \

	896 out0, out1, out2, out3); \

	897 ILVR_B4(RTYPE, in8, in9, in10, in11, in12, in13, in14, in15, \

	898 out4, out5, out6, out7); \

	899 }

	900 #define ILVR_B8_UH(...) ILVR_B8(v8u16, __VA_ARGS__)

	901

803 /* Description : Interleave right half of halfword elements from vectors	902 /* Description : Interleave right half of halfword elements from vectors

804 Arguments : Inputs - in0, in1, in2, in3, in4, in5, in6, in7	903 Arguments : Inputs - in0, in1, in2, in3, in4, in5, in6, in7

805 Outputs - out0, out1, out2, out3	904 Outputs - out0, out1, out2, out3

806 Return Type - signed halfword	905 Return Type - signed halfword

807 Details : Right half of halfword elements of in0 and right half of	906 Details : Right half of halfword elements of in0 and right half of

808 halfword elements of in1 are interleaved and copied to out0.	907 halfword elements of in1 are interleaved and copied to out0.

809 Right half of halfword elements of in2 and right half of	908 Right half of halfword elements of in2 and right half of

810 halfword elements of in3 are interleaved and copied to out1.	909 halfword elements of in3 are interleaved and copied to out1.

811 Similar for other pairs	910 Similar for other pairs

812 */	911 */

(...skipping 10 matching lines...) Expand all Loading...
823 }	922 }

824 #define ILVR_H4_SH(...) ILVR_H4(v8i16, __VA_ARGS__)	923 #define ILVR_H4_SH(...) ILVR_H4(v8i16, __VA_ARGS__)

825	924

826 #define ILVR_W2(RTYPE, in0, in1, in2, in3, out0, out1) { \	925 #define ILVR_W2(RTYPE, in0, in1, in2, in3, out0, out1) { \

827 out0 = (RTYPE)__msa_ilvr_w((v4i32)in0, (v4i32)in1); \	926 out0 = (RTYPE)__msa_ilvr_w((v4i32)in0, (v4i32)in1); \

828 out1 = (RTYPE)__msa_ilvr_w((v4i32)in2, (v4i32)in3); \	927 out1 = (RTYPE)__msa_ilvr_w((v4i32)in2, (v4i32)in3); \

829 }	928 }

830 #define ILVR_W2_UB(...) ILVR_W2(v16u8, __VA_ARGS__)	929 #define ILVR_W2_UB(...) ILVR_W2(v16u8, __VA_ARGS__)

831 #define ILVR_W2_SH(...) ILVR_W2(v8i16, __VA_ARGS__)	930 #define ILVR_W2_SH(...) ILVR_W2(v8i16, __VA_ARGS__)

832	931

	932 #define ILVR_W4(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, \

	933 out0, out1, out2, out3) { \

	934 ILVR_W2(RTYPE, in0, in1, in2, in3, out0, out1); \

	935 ILVR_W2(RTYPE, in4, in5, in6, in7, out2, out3); \

	936 }

	937 #define ILVR_W4_UB(...) ILVR_W4(v16u8, __VA_ARGS__)

	938

833 /* Description : Interleave right half of double word elements from vectors	939 /* Description : Interleave right half of double word elements from vectors

834 Arguments : Inputs - in0, in1, in2, in3, in4, in5, in6, in7	940 Arguments : Inputs - in0, in1, in2, in3, in4, in5, in6, in7

835 Outputs - out0, out1, out2, out3	941 Outputs - out0, out1, out2, out3

836 Return Type - unsigned double word	942 Return Type - unsigned double word

837 Details : Right half of double word elements of in0 and right half of	943 Details : Right half of double word elements of in0 and right half of

838 double word elements of in1 are interleaved and copied to out0.	944 double word elements of in1 are interleaved and copied to out0.

839 Right half of double word elements of in2 and right half of	945 Right half of double word elements of in2 and right half of

840 double word elements of in3 are interleaved and copied to out1.	946 double word elements of in3 are interleaved and copied to out1.

841 */	947 */

842 #define ILVR_D2(RTYPE, in0, in1, in2, in3, out0, out1) { \	948 #define ILVR_D2(RTYPE, in0, in1, in2, in3, out0, out1) { \

(...skipping 248 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1091 }	1197 }

1092 #define ADDS_SH2_SH(...) ADDS_SH2(v8i16, __VA_ARGS__)	1198 #define ADDS_SH2_SH(...) ADDS_SH2(v8i16, __VA_ARGS__)

1093	1199

1094 #define ADDS_SH4(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, \	1200 #define ADDS_SH4(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, \

1095 out0, out1, out2, out3) { \	1201 out0, out1, out2, out3) { \

1096 ADDS_SH2(RTYPE, in0, in1, in2, in3, out0, out1); \	1202 ADDS_SH2(RTYPE, in0, in1, in2, in3, out0, out1); \

1097 ADDS_SH2(RTYPE, in4, in5, in6, in7, out2, out3); \	1203 ADDS_SH2(RTYPE, in4, in5, in6, in7, out2, out3); \

1098 }	1204 }

1099 #define ADDS_SH4_SH(...) ADDS_SH4(v8i16, __VA_ARGS__)	1205 #define ADDS_SH4_SH(...) ADDS_SH4(v8i16, __VA_ARGS__)

1100	1206

	1207 /* Description : Arithmetic shift right all elements of vector

	1208 (generic for all data types)

	1209 Arguments : Inputs - in0, in1, in2, in3, shift

	1210 Outputs - in0, in1, in2, in3 (in place)

	1211 Return Type - as per input vector RTYPE

	1212 Details : Each element of vector 'in0' is right shifted by 'shift' and

	1213 result is in place written to 'in0'

	1214 Here, 'shift' is GP variable passed in

	1215 Similar for other pairs

	1216 */

	1217 #define SRA_4V(in0, in1, in2, in3, shift) { \

	1218 in0 = in0 >> shift; \

	1219 in1 = in1 >> shift; \

	1220 in2 = in2 >> shift; \

	1221 in3 = in3 >> shift; \

	1222 }

	1223

1101 /* Description : Shift right arithmetic rounded (immediate)	1224 /* Description : Shift right arithmetic rounded (immediate)

1102 Arguments : Inputs - in0, in1, in2, in3, shift	1225 Arguments : Inputs - in0, in1, in2, in3, shift

1103 Outputs - in0, in1, in2, in3 (in place)	1226 Outputs - in0, in1, in2, in3 (in place)

1104 Return Type - as per RTYPE	1227 Return Type - as per RTYPE

1105 Details : Each element of vector 'in0' is shifted right arithmetic by	1228 Details : Each element of vector 'in0' is shifted right arithmetic by

1106 value in 'shift'.	1229 value in 'shift'.

1107 The last discarded bit is added to shifted value for rounding	1230 The last discarded bit is added to shifted value for rounding

1108 and the result is in place written to 'in0'	1231 and the result is in place written to 'in0'

1109 Similar for other pairs	1232 Similar for other pairs

1110 */	1233 */

(...skipping 60 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1171 out1 = in2 - in3; \	1294 out1 = in2 - in3; \

1172 }	1295 }

1173 #define SUB4(in0, in1, in2, in3, in4, in5, in6, in7, \	1296 #define SUB4(in0, in1, in2, in3, in4, in5, in6, in7, \

1174 out0, out1, out2, out3) { \	1297 out0, out1, out2, out3) { \

1175 out0 = in0 - in1; \	1298 out0 = in0 - in1; \

1176 out1 = in2 - in3; \	1299 out1 = in2 - in3; \

1177 out2 = in4 - in5; \	1300 out2 = in4 - in5; \

1178 out3 = in6 - in7; \	1301 out3 = in6 - in7; \

1179 }	1302 }

1180	1303

	1304 /* Description : Sign extend halfword elements from right half of the vector

	1305 Arguments : Inputs - in (input halfword vector)

	1306 Outputs - out (sign extended word vectors)

	1307 Return Type - signed word

	1308 Details : Sign bit of halfword elements from input vector 'in' is

	1309 extracted and interleaved with same vector 'in0' to generate

	1310 4 word elements keeping sign intact

	1311 */

	1312 #define UNPCK_R_SH_SW(in, out) { \

	1313 v8i16 sign_m; \

	1314 \

	1315 sign_m = __msa_clti_s_h((v8i16)in, 0); \

	1316 out = (v4i32)__msa_ilvr_h(sign_m, (v8i16)in); \

	1317 }

	1318

1181 /* Description : Zero extend unsigned byte elements to halfword elements	1319 /* Description : Zero extend unsigned byte elements to halfword elements

1182 Arguments : Inputs - in (1 input unsigned byte vector)	1320 Arguments : Inputs - in (1 input unsigned byte vector)

1183 Outputs - out0, out1 (unsigned 2 halfword vectors)	1321 Outputs - out0, out1 (unsigned 2 halfword vectors)

1184 Return Type - signed halfword	1322 Return Type - signed halfword

1185 Details : Zero extended right half of vector is returned in 'out0'	1323 Details : Zero extended right half of vector is returned in 'out0'

1186 Zero extended left half of vector is returned in 'out1'	1324 Zero extended left half of vector is returned in 'out1'

1187 */	1325 */

1188 #define UNPCK_UB_SH(in, out0, out1) { \	1326 #define UNPCK_UB_SH(in, out0, out1) { \

1189 v16i8 zero_m = { 0 }; \	1327 v16i8 zero_m = { 0 }; \

1190 \	1328 \

(...skipping 24 matching lines...) Expand all Loading...
1215 out1 = in1 + in6; \	1353 out1 = in1 + in6; \

1216 out2 = in2 + in5; \	1354 out2 = in2 + in5; \

1217 out3 = in3 + in4; \	1355 out3 = in3 + in4; \

1218 \	1356 \

1219 out4 = in3 - in4; \	1357 out4 = in3 - in4; \

1220 out5 = in2 - in5; \	1358 out5 = in2 - in5; \

1221 out6 = in1 - in6; \	1359 out6 = in1 - in6; \

1222 out7 = in0 - in7; \	1360 out7 = in0 - in7; \

1223 }	1361 }

1224	1362

	1363 /* Description : Transposes input 8x8 byte block

	1364 Arguments : Inputs - in0, in1, in2, in3, in4, in5, in6, in7

	1365 (input 8x8 byte block)

	1366 Outputs - out0, out1, out2, out3, out4, out5, out6, out7

	1367 (output 8x8 byte block)

	1368 Return Type - unsigned byte

	1369 Details :

	1370 */

	1371 #define TRANSPOSE8x8_UB(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, \

	1372 out0, out1, out2, out3, out4, out5, out6, out7) { \

	1373 v16i8 tmp0_m, tmp1_m, tmp2_m, tmp3_m; \

	1374 v16i8 tmp4_m, tmp5_m, tmp6_m, tmp7_m; \

	1375 \

	1376 ILVR_B4_SB(in2, in0, in3, in1, in6, in4, in7, in5, \

	1377 tmp0_m, tmp1_m, tmp2_m, tmp3_m); \

	1378 ILVRL_B2_SB(tmp1_m, tmp0_m, tmp4_m, tmp5_m); \

	1379 ILVRL_B2_SB(tmp3_m, tmp2_m, tmp6_m, tmp7_m); \

	1380 ILVRL_W2(RTYPE, tmp6_m, tmp4_m, out0, out2); \

	1381 ILVRL_W2(RTYPE, tmp7_m, tmp5_m, out4, out6); \

	1382 SLDI_B2_0(RTYPE, out0, out2, out1, out3, 8); \

	1383 SLDI_B2_0(RTYPE, out4, out6, out5, out7, 8); \

	1384 }

	1385 #define TRANSPOSE8x8_UB_UB(...) TRANSPOSE8x8_UB(v16u8, __VA_ARGS__)

	1386

	1387 /* Description : Transposes 16x8 block into 8x16 with byte elements in vectors

	1388 Arguments : Inputs - in0, in1, in2, in3, in4, in5, in6, in7,

	1389 in8, in9, in10, in11, in12, in13, in14, in15

	1390 Outputs - out0, out1, out2, out3, out4, out5, out6, out7

	1391 Return Type - unsigned byte

	1392 Details :

	1393 */

	1394 #define TRANSPOSE16x8_UB_UB(in0, in1, in2, in3, in4, in5, in6, in7, \

	1395 in8, in9, in10, in11, in12, in13, in14, in15, \

	1396 out0, out1, out2, out3, out4, out5, out6, out7) { \

	1397 v16u8 tmp0_m, tmp1_m, tmp2_m, tmp3_m; \

	1398 v16u8 tmp4_m, tmp5_m, tmp6_m, tmp7_m; \

	1399 \

	1400 ILVEV_D2_UB(in0, in8, in1, in9, out7, out6); \

	1401 ILVEV_D2_UB(in2, in10, in3, in11, out5, out4); \

	1402 ILVEV_D2_UB(in4, in12, in5, in13, out3, out2); \

	1403 ILVEV_D2_UB(in6, in14, in7, in15, out1, out0); \

	1404 \

	1405 tmp0_m = (v16u8)__msa_ilvev_b((v16i8)out6, (v16i8)out7); \

	1406 tmp4_m = (v16u8)__msa_ilvod_b((v16i8)out6, (v16i8)out7); \

	1407 tmp1_m = (v16u8)__msa_ilvev_b((v16i8)out4, (v16i8)out5); \

	1408 tmp5_m = (v16u8)__msa_ilvod_b((v16i8)out4, (v16i8)out5); \

	1409 out5 = (v16u8)__msa_ilvev_b((v16i8)out2, (v16i8)out3); \

	1410 tmp6_m = (v16u8)__msa_ilvod_b((v16i8)out2, (v16i8)out3); \

	1411 out7 = (v16u8)__msa_ilvev_b((v16i8)out0, (v16i8)out1); \

	1412 tmp7_m = (v16u8)__msa_ilvod_b((v16i8)out0, (v16i8)out1); \

	1413 \

	1414 ILVEV_H2_UB(tmp0_m, tmp1_m, out5, out7, tmp2_m, tmp3_m); \

	1415 out0 = (v16u8)__msa_ilvev_w((v4i32)tmp3_m, (v4i32)tmp2_m); \

	1416 out4 = (v16u8)__msa_ilvod_w((v4i32)tmp3_m, (v4i32)tmp2_m); \

	1417 \

	1418 tmp2_m = (v16u8)__msa_ilvod_h((v8i16)tmp1_m, (v8i16)tmp0_m); \

	1419 tmp3_m = (v16u8)__msa_ilvod_h((v8i16)out7, (v8i16)out5); \

	1420 out2 = (v16u8)__msa_ilvev_w((v4i32)tmp3_m, (v4i32)tmp2_m); \

	1421 out6 = (v16u8)__msa_ilvod_w((v4i32)tmp3_m, (v4i32)tmp2_m); \

	1422 \

	1423 ILVEV_H2_UB(tmp4_m, tmp5_m, tmp6_m, tmp7_m, tmp2_m, tmp3_m); \

	1424 out1 = (v16u8)__msa_ilvev_w((v4i32)tmp3_m, (v4i32)tmp2_m); \

	1425 out5 = (v16u8)__msa_ilvod_w((v4i32)tmp3_m, (v4i32)tmp2_m); \

	1426 \

	1427 tmp2_m = (v16u8)__msa_ilvod_h((v8i16)tmp5_m, (v8i16)tmp4_m); \

	1428 tmp2_m = (v16u8)__msa_ilvod_h((v8i16)tmp5_m, (v8i16)tmp4_m); \

	1429 tmp3_m = (v16u8)__msa_ilvod_h((v8i16)tmp7_m, (v8i16)tmp6_m); \

	1430 tmp3_m = (v16u8)__msa_ilvod_h((v8i16)tmp7_m, (v8i16)tmp6_m); \

	1431 out3 = (v16u8)__msa_ilvev_w((v4i32)tmp3_m, (v4i32)tmp2_m); \

	1432 out7 = (v16u8)__msa_ilvod_w((v4i32)tmp3_m, (v4i32)tmp2_m); \

	1433 }

	1434

	1435 /* Description : Transposes 4x4 block with half word elements in vectors

	1436 Arguments : Inputs - in0, in1, in2, in3

	1437 Outputs - out0, out1, out2, out3

	1438 Return Type - signed halfword

	1439 Details :

	1440 */

	1441 #define TRANSPOSE4x4_SH_SH(in0, in1, in2, in3, out0, out1, out2, out3) { \

	1442 v8i16 s0_m, s1_m; \

	1443 \

	1444 ILVR_H2_SH(in1, in0, in3, in2, s0_m, s1_m); \

	1445 ILVRL_W2_SH(s1_m, s0_m, out0, out2); \

	1446 out1 = (v8i16)__msa_ilvl_d((v2i64)out0, (v2i64)out0); \

	1447 out3 = (v8i16)__msa_ilvl_d((v2i64)out0, (v2i64)out2); \

	1448 }

	1449

1225 /* Description : Transposes 4x8 block with half word elements in vectors	1450 /* Description : Transposes 4x8 block with half word elements in vectors

1226 Arguments : Inputs - in0, in1, in2, in3, in4, in5, in6, in7	1451 Arguments : Inputs - in0, in1, in2, in3, in4, in5, in6, in7

1227 Outputs - out0, out1, out2, out3, out4, out5, out6, out7	1452 Outputs - out0, out1, out2, out3, out4, out5, out6, out7

1228 Return Type - signed halfword	1453 Return Type - signed halfword

1229 Details :	1454 Details :

1230 */	1455 */

1231 #define TRANSPOSE4X8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7, \	1456 #define TRANSPOSE4X8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7, \

1232 out0, out1, out2, out3, out4, out5, out6, out7) { \	1457 out0, out1, out2, out3, out4, out5, out6, out7) { \

1233 v8i16 tmp0_m, tmp1_m, tmp2_m, tmp3_m; \	1458 v8i16 tmp0_m, tmp1_m, tmp2_m, tmp3_m; \

1234 v8i16 tmp0_n, tmp1_n, tmp2_n, tmp3_n; \	1459 v8i16 tmp0_n, tmp1_n, tmp2_n, tmp3_n; \

(...skipping 52 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1287 ILVRL_H2_SH(s1_m, s0_m, tmp6_m, tmp7_m); \	1512 ILVRL_H2_SH(s1_m, s0_m, tmp6_m, tmp7_m); \

1288 PCKEV_D4(RTYPE, tmp0_m, tmp4_m, tmp1_m, tmp5_m, tmp2_m, tmp6_m, \	1513 PCKEV_D4(RTYPE, tmp0_m, tmp4_m, tmp1_m, tmp5_m, tmp2_m, tmp6_m, \

1289 tmp3_m, tmp7_m, out0, out2, out4, out6); \	1514 tmp3_m, tmp7_m, out0, out2, out4, out6); \

1290 out1 = (RTYPE)__msa_pckod_d((v2i64)tmp0_m, (v2i64)tmp4_m); \	1515 out1 = (RTYPE)__msa_pckod_d((v2i64)tmp0_m, (v2i64)tmp4_m); \

1291 out3 = (RTYPE)__msa_pckod_d((v2i64)tmp1_m, (v2i64)tmp5_m); \	1516 out3 = (RTYPE)__msa_pckod_d((v2i64)tmp1_m, (v2i64)tmp5_m); \

1292 out5 = (RTYPE)__msa_pckod_d((v2i64)tmp2_m, (v2i64)tmp6_m); \	1517 out5 = (RTYPE)__msa_pckod_d((v2i64)tmp2_m, (v2i64)tmp6_m); \

1293 out7 = (RTYPE)__msa_pckod_d((v2i64)tmp3_m, (v2i64)tmp7_m); \	1518 out7 = (RTYPE)__msa_pckod_d((v2i64)tmp3_m, (v2i64)tmp7_m); \

1294 }	1519 }

1295 #define TRANSPOSE8x8_SH_SH(...) TRANSPOSE8x8_H(v8i16, __VA_ARGS__)	1520 #define TRANSPOSE8x8_SH_SH(...) TRANSPOSE8x8_H(v8i16, __VA_ARGS__)

1296	1521

	1522 /* Description : Transposes 4x4 block with word elements in vectors

	1523 Arguments : Inputs - in0, in1, in2, in3

	1524 Outputs - out0, out1, out2, out3

	1525 Return Type - signed word

	1526 Details :

	1527 */

	1528 #define TRANSPOSE4x4_SW_SW(in0, in1, in2, in3, out0, out1, out2, out3) { \

	1529 v4i32 s0_m, s1_m, s2_m, s3_m; \

	1530 \

	1531 ILVRL_W2_SW(in1, in0, s0_m, s1_m); \

	1532 ILVRL_W2_SW(in3, in2, s2_m, s3_m); \

	1533 \

	1534 out0 = (v4i32)__msa_ilvr_d((v2i64)s2_m, (v2i64)s0_m); \

	1535 out1 = (v4i32)__msa_ilvl_d((v2i64)s2_m, (v2i64)s0_m); \

	1536 out2 = (v4i32)__msa_ilvr_d((v2i64)s3_m, (v2i64)s1_m); \

	1537 out3 = (v4i32)__msa_ilvl_d((v2i64)s3_m, (v2i64)s1_m); \

	1538 }

	1539

	1540 /* Description : Add block 4x4

	1541 Arguments : Inputs - in0, in1, in2, in3, pdst, stride

	1542 Outputs -

	1543 Return Type - unsigned bytes

	1544 Details : Least significant 4 bytes from each input vector are added to

	1545 the destination bytes, clipped between 0-255 and then stored.

	1546 */

	1547 #define ADDBLK_ST4x4_UB(in0, in1, in2, in3, pdst, stride) { \

	1548 uint32_t src0_m, src1_m, src2_m, src3_m; \

	1549 uint32_t out0_m, out1_m, out2_m, out3_m; \

	1550 v8i16 inp0_m, inp1_m, res0_m, res1_m; \

	1551 v16i8 dst0_m = { 0 }; \

	1552 v16i8 dst1_m = { 0 }; \

	1553 v16i8 zero_m = { 0 }; \

	1554 \

	1555 ILVR_D2_SH(in1, in0, in3, in2, inp0_m, inp1_m) \

	1556 LW4(pdst, stride, src0_m, src1_m, src2_m, src3_m); \

	1557 INSERT_W2_SB(src0_m, src1_m, dst0_m); \

	1558 INSERT_W2_SB(src2_m, src3_m, dst1_m); \

	1559 ILVR_B2_SH(zero_m, dst0_m, zero_m, dst1_m, res0_m, res1_m); \

	1560 ADD2(res0_m, inp0_m, res1_m, inp1_m, res0_m, res1_m); \

	1561 CLIP_SH2_0_255(res0_m, res1_m); \

	1562 PCKEV_B2_SB(res0_m, res0_m, res1_m, res1_m, dst0_m, dst1_m); \

	1563 \

	1564 out0_m = __msa_copy_u_w((v4i32)dst0_m, 0); \

	1565 out1_m = __msa_copy_u_w((v4i32)dst0_m, 1); \

	1566 out2_m = __msa_copy_u_w((v4i32)dst1_m, 0); \

	1567 out3_m = __msa_copy_u_w((v4i32)dst1_m, 1); \

	1568 SW4(out0_m, out1_m, out2_m, out3_m, pdst, stride); \

	1569 }

	1570

1297 /* Description : Pack even elements of input vectors & xor with 128	1571 /* Description : Pack even elements of input vectors & xor with 128

1298 Arguments : Inputs - in0, in1	1572 Arguments : Inputs - in0, in1

1299 Outputs - out_m	1573 Outputs - out_m

1300 Return Type - unsigned byte	1574 Return Type - unsigned byte

1301 Details : Signed byte even elements from 'in0' and 'in1' are packed	1575 Details : Signed byte even elements from 'in0' and 'in1' are packed

1302 together in one vector and the resulted vector is xor'ed with	1576 together in one vector and the resulted vector is xor'ed with

1303 128 to shift the range from signed to unsigned byte	1577 128 to shift the range from signed to unsigned byte

1304 */	1578 */

1305 #define PCKEV_XORI128_UB(in0, in1) ({ \	1579 #define PCKEV_XORI128_UB(in0, in1) ({ \

1306 v16u8 out_m; \	1580 v16u8 out_m; \

1307 \	1581 \

1308 out_m = (v16u8)__msa_pckev_b((v16i8)in1, (v16i8)in0); \	1582 out_m = (v16u8)__msa_pckev_b((v16i8)in1, (v16i8)in0); \

1309 out_m = (v16u8)__msa_xori_b((v16u8)out_m, 128); \	1583 out_m = (v16u8)__msa_xori_b((v16u8)out_m, 128); \

1310 out_m; \	1584 out_m; \

1311 })	1585 })

1312	1586

	1587 /* Description : Converts inputs to unsigned bytes, interleave, average & store

	1588 as 8x4 unsigned byte block

	1589 Arguments : Inputs - in0, in1, in2, in3, dst0, dst1, dst2, dst3,

	1590 pdst, stride

	1591 */

	1592 #define CONVERT_UB_AVG_ST8x4_UB(in0, in1, in2, in3, \

	1593 dst0, dst1, dst2, dst3, pdst, stride) { \

	1594 v16u8 tmp0_m, tmp1_m, tmp2_m, tmp3_m; \

	1595 uint8_t pdst_m = (uint8_t )(pdst); \

	1596 \

	1597 tmp0_m = PCKEV_XORI128_UB(in0, in1); \

	1598 tmp1_m = PCKEV_XORI128_UB(in2, in3); \

	1599 ILVR_D2_UB(dst1, dst0, dst3, dst2, tmp2_m, tmp3_m); \

	1600 AVER_UB2_UB(tmp0_m, tmp2_m, tmp1_m, tmp3_m, tmp0_m, tmp1_m); \

	1601 ST8x4_UB(tmp0_m, tmp1_m, pdst_m, stride); \

	1602 }

	1603

1313 /* Description : Pack even byte elements and store byte vector in destination	1604 /* Description : Pack even byte elements and store byte vector in destination

1314 memory	1605 memory

1315 Arguments : Inputs - in0, in1, pdst	1606 Arguments : Inputs - in0, in1, pdst

1316 */	1607 */

1317 #define PCKEV_ST_SB(in0, in1, pdst) { \	1608 #define PCKEV_ST_SB(in0, in1, pdst) { \

1318 v16i8 tmp_m; \	1609 v16i8 tmp_m; \

1319 \	1610 \

1320 tmp_m = __msa_pckev_b((v16i8)in1, (v16i8)in0); \	1611 tmp_m = __msa_pckev_b((v16i8)in1, (v16i8)in0); \

1321 ST_SB(tmp_m, (pdst)); \	1612 ST_SB(tmp_m, (pdst)); \

1322 }	1613 }

1323	1614

1324 /* Description : Horizontal 2 tap filter kernel code	1615 /* Description : Horizontal 2 tap filter kernel code

1325 Arguments : Inputs - in0, in1, mask, coeff, shift	1616 Arguments : Inputs - in0, in1, mask, coeff, shift

1326 */	1617 */

1327 #define HORIZ_2TAP_FILT_UH(in0, in1, mask, coeff, shift) ({ \	1618 #define HORIZ_2TAP_FILT_UH(in0, in1, mask, coeff, shift) ({ \

1328 v16i8 tmp0_m; \	1619 v16i8 tmp0_m; \

1329 v8u16 tmp1_m; \	1620 v8u16 tmp1_m; \

1330 \	1621 \

1331 tmp0_m = __msa_vshf_b((v16i8)mask, (v16i8)in1, (v16i8)in0); \	1622 tmp0_m = __msa_vshf_b((v16i8)mask, (v16i8)in1, (v16i8)in0); \

1332 tmp1_m = __msa_dotp_u_h((v16u8)tmp0_m, (v16u8)coeff); \	1623 tmp1_m = __msa_dotp_u_h((v16u8)tmp0_m, (v16u8)coeff); \

1333 tmp1_m = (v8u16)__msa_srari_h((v8i16)tmp1_m, shift); \	1624 tmp1_m = (v8u16)__msa_srari_h((v8i16)tmp1_m, shift); \

1334 tmp1_m = __msa_sat_u_h(tmp1_m, shift); \	1625 tmp1_m = __msa_sat_u_h(tmp1_m, shift); \

1335 \	1626 \

1336 tmp1_m; \	1627 tmp1_m; \

1337 })	1628 })

1338 #endif /* VP9_COMMON_MIPS_MSA_VP9_MACROS_MSA_H_ */	1629 #endif /* VP9_COMMON_MIPS_MSA_VP9_MACROS_MSA_H_ */

OLD	NEW

« no previous file with comments | « source/libvpx/vp9/common/mips/msa/vp9_loopfilter_msa.h ('k') | source/libvpx/vp9/common/vp9_blockd.h » ('j') | no next file with comments »