Chromium Code Reviews| Index: include/libyuv/cpu/mips/macros_msa.h |
| diff --git a/include/libyuv/cpu/mips/macros_msa.h b/include/libyuv/cpu/mips/macros_msa.h |
| new file mode 100755 |
| index 0000000000000000000000000000000000000000..33ae293794e1c4c71cb7625be7173578d2c91e65 |
| --- /dev/null |
| +++ b/include/libyuv/cpu/mips/macros_msa.h |
| @@ -0,0 +1,162 @@ |
| +/* |
| + * Copyright 2016 The LibYuv Project Authors. All rights reserved. |
| + * |
| + * Use of this source code is governed by a BSD-style license |
| + * that can be found in the LICENSE file in the root of the source |
| + * tree. An additional intellectual property rights grant can be found |
| + * in the file PATENTS. All contributing project authors may |
| + * be found in the AUTHORS file in the root of the source tree. |
| + */ |
| + |
| +#ifndef __MACROS_MSA_H__ |
| +#define __MACROS_MSA_H__ |
|
fbarchard1
2016/09/14 01:48:04
Could you add:
#if !defined(LIBYUV_DISABLE_MSA) &
manojkumar.bhosale
2016/09/14 12:45:29
Done.
|
| + |
| +#include <stdint.h> |
| +#include <msa.h> |
| + |
| +#define LD_B(RTYPE, psrc) *((RTYPE*)(psrc)) |
| +#define LD_UB(...) LD_B(v16u8, __VA_ARGS__) |
| + |
| +#define ST_B(RTYPE, in, pdst) *((RTYPE*)(pdst)) = (in) |
| +#define ST_UB(...) ST_B(v16u8, __VA_ARGS__) |
| + |
| +/* Description : Load two vectors with 16 'byte' sized elements |
| + Arguments : Inputs - psrc, stride |
| + Outputs - out0, out1 |
| + Return Type - as per RTYPE |
| + Details : Load 16 byte elements in 'out0' from (psrc) |
| + Load 16 byte elements in 'out1' from (psrc + stride) |
| +*/ |
| +#define LD_B2(RTYPE, psrc, stride, out0, out1) \ |
| +{ \ |
| + out0 = LD_B(RTYPE, (psrc)); \ |
| + out1 = LD_B(RTYPE, (psrc) + stride); \ |
| +} |
| +#define LD_UB2(...) LD_B2(v16u8, __VA_ARGS__) |
| +#define LD_SB2(...) LD_B2(v16i8, __VA_ARGS__) |
| + |
| +#define LD_B3(RTYPE, psrc, stride, out0, out1, out2) \ |
| +{ \ |
| + LD_B2(RTYPE, (psrc), stride, out0, out1); \ |
| + out2 = LD_B(RTYPE, (psrc) + 2 * stride); \ |
| +} |
| +#define LD_UB3(...) LD_B3(v16u8, __VA_ARGS__) |
| +#define LD_SB3(...) LD_B3(v16i8, __VA_ARGS__) |
| + |
| +#define LD_B4(RTYPE, psrc, stride, out0, out1, out2, out3) \ |
| +{ \ |
| + LD_B2(RTYPE, (psrc), stride, out0, out1); \ |
| + LD_B2(RTYPE, (psrc) + 2 * stride , stride, out2, out3); \ |
| +} |
| +#define LD_UB4(...) LD_B4(v16u8, __VA_ARGS__) |
| +#define LD_SB4(...) LD_B4(v16i8, __VA_ARGS__) |
| + |
| +#define LD_B5(RTYPE, psrc, stride, out0, out1, out2, out3, out4) \ |
| +{ \ |
| + LD_B4(RTYPE, (psrc), stride, out0, out1, out2, out3); \ |
| + out4 = LD_B(RTYPE, (psrc) + 4 * stride); \ |
| +} |
| +#define LD_UB5(...) LD_B5(v16u8, __VA_ARGS__) |
| +#define LD_SB5(...) LD_B5(v16i8, __VA_ARGS__) |
| + |
| +#define LD_B6(RTYPE, psrc, stride, out0, out1, out2, out3, out4, out5) \ |
| +{ \ |
| + LD_B4(RTYPE, (psrc), stride, out0, out1, out2, out3); \ |
| + LD_B2(RTYPE, (psrc) + 4 * stride, stride, out4, out5); \ |
| +} |
| +#define LD_UB6(...) LD_B6(v16u8, __VA_ARGS__) |
| +#define LD_SB6(...) LD_B6(v16i8, __VA_ARGS__) |
| + |
| +#define LD_B7(RTYPE, psrc, stride, \ |
| + out0, out1, out2, out3, out4, out5, out6) \ |
| +{ \ |
| + LD_B5(RTYPE, (psrc), stride, out0, out1, out2, out3, out4); \ |
| + LD_B2(RTYPE, (psrc) + 5 * stride, stride, out5, out6); \ |
| +} |
| +#define LD_UB7(...) LD_B7(v16u8, __VA_ARGS__) |
| +#define LD_SB7(...) LD_B7(v16i8, __VA_ARGS__) |
| + |
| +#define LD_B8(RTYPE, psrc, stride, \ |
| + out0, out1, out2, out3, out4, out5, out6, out7) \ |
| +{ \ |
| + LD_B4(RTYPE, (psrc), stride, out0, out1, out2, out3); \ |
| + LD_B4(RTYPE, (psrc) + 4 * stride, stride, out4, out5, out6, out7); \ |
| +} |
| +#define LD_UB8(...) LD_B8(v16u8, __VA_ARGS__) |
| + |
| +/* Description : Store two vectors with stride each having 16 'byte' sized |
| + elements |
| + Arguments : Inputs - in0, in1, pdst, stride |
| + Details : Store 16 byte elements from 'in0' to (pdst) |
| + Store 16 byte elements from 'in1' to (pdst + stride) |
| +*/ |
| +#define ST_B2(RTYPE, in0, in1, pdst, stride) \ |
| +{ \ |
| + ST_B(RTYPE, in0, (pdst)); \ |
| + ST_B(RTYPE, in1, (pdst) + stride); \ |
| +} |
| +#define ST_UB2(...) ST_B2(v16u8, __VA_ARGS__) |
| +#define ST_SB2(...) ST_B2(v16i8, __VA_ARGS__) |
| + |
| +#define ST_B3(RTYPE, in0, in1, in2, pdst, stride) \ |
| +{ \ |
| + ST_B2(RTYPE, in0, in1, (pdst), stride); \ |
| + ST_B(RTYPE, in2, (pdst) + 2 * stride); \ |
| +} |
| +#define ST_UB3(...) ST_B3(v16u8, __VA_ARGS__) |
| + |
| +#define ST_B4(RTYPE, in0, in1, in2, in3, pdst, stride) \ |
| +{ \ |
| + ST_B2(RTYPE, in0, in1, (pdst), stride); \ |
| + ST_B2(RTYPE, in2, in3, (pdst) + 2 * stride, stride); \ |
| +} |
| +#define ST_UB4(...) ST_B4(v16u8, __VA_ARGS__) |
| +#define ST_SB4(...) ST_B4(v16i8, __VA_ARGS__) |
| + |
| +#define ST_B5(RTYPE, in0, in1, in2, in3, in4, pdst, stride) \ |
| +{ \ |
| + ST_B2(RTYPE, in0, in1, (pdst), stride); \ |
| + ST_B2(RTYPE, in2, in3, (pdst) + 2 * stride, stride); \ |
| + ST_B(RTYPE, in4, (pdst) + 4 * stride); \ |
| +} |
| +#define ST_UB5(...) ST_B5(v16u8, __VA_ARGS__) |
| + |
| +#define ST_B6(RTYPE, in0, in1, in2, in3, in4, in5, pdst, stride) \ |
| +{ \ |
| + ST_B2(RTYPE, in0, in1, (pdst), stride); \ |
| + ST_B2(RTYPE, in2, in3, (pdst) + 2 * stride, stride); \ |
| + ST_B2(RTYPE, in4, in5, (pdst) + 4 * stride, stride); \ |
| +} |
| +#define ST_UB6(...) ST_B6(v16u8, __VA_ARGS__) |
| + |
| +#define ST_B7(RTYPE, in0, in1, in2, in3, in4, in5, in6, pdst, stride) \ |
| +{ \ |
| + ST_B2(RTYPE, in0, in1, (pdst), stride); \ |
| + ST_B2(RTYPE, in2, in3, (pdst) + 2 * stride, stride); \ |
| + ST_B2(RTYPE, in4, in5, (pdst) + 4 * stride, stride); \ |
| + ST_B(RTYPE, in6, (pdst) + 6 * stride); \ |
| +} |
| +#define ST_UB7(...) ST_B7(v16u8, __VA_ARGS__) |
| + |
| +#define ST_B8(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, \ |
| + pdst, stride) \ |
| +{ \ |
| + ST_B4(RTYPE, in0, in1, in2, in3, pdst, stride); \ |
| + ST_B4(RTYPE, in4, in5, in6, in7, (pdst) + 4 * stride, stride); \ |
| +} |
| +#define ST_UB8(...) ST_B8(v16u8, __VA_ARGS__) |
| + |
| +/* Description : Shuffle byte vector elements as per mask vector |
| + Arguments : Inputs - in0, in1, in2, in3, mask0, mask1 |
| + Outputs - out0, out1 |
| + Return Type - as per RTYPE |
| + Details : Byte elements from 'in0' & 'in1' are copied selectively to |
| + 'out0' as per control vector 'mask0' |
| +*/ |
| +#define VSHF_B2(RTYPE, in0, in1, in2, in3, mask0, mask1, out0, out1) \ |
| +{ \ |
| + out0 = (RTYPE) __msa_vshf_b((v16i8) mask0, (v16i8) in1, (v16i8) in0); \ |
| + out1 = (RTYPE) __msa_vshf_b((v16i8) mask1, (v16i8) in3, (v16i8) in2); \ |
| +} |
| +#define VSHF_B2_UB(...) VSHF_B2(v16u8, __VA_ARGS__) |
| +#endif /* __MACROS_MSA_H__ */ |