Chromium Code Reviews| OLD | NEW |
|---|---|
| (Empty) | |
| 1 /* | |
| 2 * Copyright 2016 The LibYuv Project Authors. All rights reserved. | |
| 3 * | |
| 4 * Use of this source code is governed by a BSD-style license | |
| 5 * that can be found in the LICENSE file in the root of the source | |
| 6 * tree. An additional intellectual property rights grant can be found | |
| 7 * in the file PATENTS. All contributing project authors may | |
| 8 * be found in the AUTHORS file in the root of the source tree. | |
| 9 */ | |
| 10 | |
| 11 #ifndef __MACROS_MSA_H__ | |
| 12 #define __MACROS_MSA_H__ | |
|
fbarchard1
2016/09/14 01:48:04
Could you add:
#if !defined(LIBYUV_DISABLE_MSA) &
manojkumar.bhosale
2016/09/14 12:45:29
Done.
| |
| 13 | |
| 14 #include <stdint.h> | |
| 15 #include <msa.h> | |
| 16 | |
| 17 #define LD_B(RTYPE, psrc) *((RTYPE*)(psrc)) | |
| 18 #define LD_UB(...) LD_B(v16u8, __VA_ARGS__) | |
| 19 | |
| 20 #define ST_B(RTYPE, in, pdst) *((RTYPE*)(pdst)) = (in) | |
| 21 #define ST_UB(...) ST_B(v16u8, __VA_ARGS__) | |
| 22 | |
| 23 /* Description : Load two vectors with 16 'byte' sized elements | |
| 24 Arguments : Inputs - psrc, stride | |
| 25 Outputs - out0, out1 | |
| 26 Return Type - as per RTYPE | |
| 27 Details : Load 16 byte elements in 'out0' from (psrc) | |
| 28 Load 16 byte elements in 'out1' from (psrc + stride) | |
| 29 */ | |
| 30 #define LD_B2(RTYPE, psrc, stride, out0, out1) \ | |
| 31 { \ | |
| 32 out0 = LD_B(RTYPE, (psrc)); \ | |
| 33 out1 = LD_B(RTYPE, (psrc) + stride); \ | |
| 34 } | |
| 35 #define LD_UB2(...) LD_B2(v16u8, __VA_ARGS__) | |
| 36 #define LD_SB2(...) LD_B2(v16i8, __VA_ARGS__) | |
| 37 | |
| 38 #define LD_B3(RTYPE, psrc, stride, out0, out1, out2) \ | |
| 39 { \ | |
| 40 LD_B2(RTYPE, (psrc), stride, out0, out1); \ | |
| 41 out2 = LD_B(RTYPE, (psrc) + 2 * stride); \ | |
| 42 } | |
| 43 #define LD_UB3(...) LD_B3(v16u8, __VA_ARGS__) | |
| 44 #define LD_SB3(...) LD_B3(v16i8, __VA_ARGS__) | |
| 45 | |
| 46 #define LD_B4(RTYPE, psrc, stride, out0, out1, out2, out3) \ | |
| 47 { \ | |
| 48 LD_B2(RTYPE, (psrc), stride, out0, out1); \ | |
| 49 LD_B2(RTYPE, (psrc) + 2 * stride , stride, out2, out3); \ | |
| 50 } | |
| 51 #define LD_UB4(...) LD_B4(v16u8, __VA_ARGS__) | |
| 52 #define LD_SB4(...) LD_B4(v16i8, __VA_ARGS__) | |
| 53 | |
| 54 #define LD_B5(RTYPE, psrc, stride, out0, out1, out2, out3, out4) \ | |
| 55 { \ | |
| 56 LD_B4(RTYPE, (psrc), stride, out0, out1, out2, out3); \ | |
| 57 out4 = LD_B(RTYPE, (psrc) + 4 * stride); \ | |
| 58 } | |
| 59 #define LD_UB5(...) LD_B5(v16u8, __VA_ARGS__) | |
| 60 #define LD_SB5(...) LD_B5(v16i8, __VA_ARGS__) | |
| 61 | |
| 62 #define LD_B6(RTYPE, psrc, stride, out0, out1, out2, out3, out4, out5) \ | |
| 63 { \ | |
| 64 LD_B4(RTYPE, (psrc), stride, out0, out1, out2, out3); \ | |
| 65 LD_B2(RTYPE, (psrc) + 4 * stride, stride, out4, out5); \ | |
| 66 } | |
| 67 #define LD_UB6(...) LD_B6(v16u8, __VA_ARGS__) | |
| 68 #define LD_SB6(...) LD_B6(v16i8, __VA_ARGS__) | |
| 69 | |
| 70 #define LD_B7(RTYPE, psrc, stride, \ | |
| 71 out0, out1, out2, out3, out4, out5, out6) \ | |
| 72 { \ | |
| 73 LD_B5(RTYPE, (psrc), stride, out0, out1, out2, out3, out4); \ | |
| 74 LD_B2(RTYPE, (psrc) + 5 * stride, stride, out5, out6); \ | |
| 75 } | |
| 76 #define LD_UB7(...) LD_B7(v16u8, __VA_ARGS__) | |
| 77 #define LD_SB7(...) LD_B7(v16i8, __VA_ARGS__) | |
| 78 | |
| 79 #define LD_B8(RTYPE, psrc, stride, \ | |
| 80 out0, out1, out2, out3, out4, out5, out6, out7) \ | |
| 81 { \ | |
| 82 LD_B4(RTYPE, (psrc), stride, out0, out1, out2, out3); \ | |
| 83 LD_B4(RTYPE, (psrc) + 4 * stride, stride, out4, out5, out6, out7); \ | |
| 84 } | |
| 85 #define LD_UB8(...) LD_B8(v16u8, __VA_ARGS__) | |
| 86 | |
| 87 /* Description : Store two vectors with stride each having 16 'byte' sized | |
| 88 elements | |
| 89 Arguments : Inputs - in0, in1, pdst, stride | |
| 90 Details : Store 16 byte elements from 'in0' to (pdst) | |
| 91 Store 16 byte elements from 'in1' to (pdst + stride) | |
| 92 */ | |
| 93 #define ST_B2(RTYPE, in0, in1, pdst, stride) \ | |
| 94 { \ | |
| 95 ST_B(RTYPE, in0, (pdst)); \ | |
| 96 ST_B(RTYPE, in1, (pdst) + stride); \ | |
| 97 } | |
| 98 #define ST_UB2(...) ST_B2(v16u8, __VA_ARGS__) | |
| 99 #define ST_SB2(...) ST_B2(v16i8, __VA_ARGS__) | |
| 100 | |
| 101 #define ST_B3(RTYPE, in0, in1, in2, pdst, stride) \ | |
| 102 { \ | |
| 103 ST_B2(RTYPE, in0, in1, (pdst), stride); \ | |
| 104 ST_B(RTYPE, in2, (pdst) + 2 * stride); \ | |
| 105 } | |
| 106 #define ST_UB3(...) ST_B3(v16u8, __VA_ARGS__) | |
| 107 | |
| 108 #define ST_B4(RTYPE, in0, in1, in2, in3, pdst, stride) \ | |
| 109 { \ | |
| 110 ST_B2(RTYPE, in0, in1, (pdst), stride); \ | |
| 111 ST_B2(RTYPE, in2, in3, (pdst) + 2 * stride, stride); \ | |
| 112 } | |
| 113 #define ST_UB4(...) ST_B4(v16u8, __VA_ARGS__) | |
| 114 #define ST_SB4(...) ST_B4(v16i8, __VA_ARGS__) | |
| 115 | |
| 116 #define ST_B5(RTYPE, in0, in1, in2, in3, in4, pdst, stride) \ | |
| 117 { \ | |
| 118 ST_B2(RTYPE, in0, in1, (pdst), stride); \ | |
| 119 ST_B2(RTYPE, in2, in3, (pdst) + 2 * stride, stride); \ | |
| 120 ST_B(RTYPE, in4, (pdst) + 4 * stride); \ | |
| 121 } | |
| 122 #define ST_UB5(...) ST_B5(v16u8, __VA_ARGS__) | |
| 123 | |
| 124 #define ST_B6(RTYPE, in0, in1, in2, in3, in4, in5, pdst, stride) \ | |
| 125 { \ | |
| 126 ST_B2(RTYPE, in0, in1, (pdst), stride); \ | |
| 127 ST_B2(RTYPE, in2, in3, (pdst) + 2 * stride, stride); \ | |
| 128 ST_B2(RTYPE, in4, in5, (pdst) + 4 * stride, stride); \ | |
| 129 } | |
| 130 #define ST_UB6(...) ST_B6(v16u8, __VA_ARGS__) | |
| 131 | |
| 132 #define ST_B7(RTYPE, in0, in1, in2, in3, in4, in5, in6, pdst, stride) \ | |
| 133 { \ | |
| 134 ST_B2(RTYPE, in0, in1, (pdst), stride); \ | |
| 135 ST_B2(RTYPE, in2, in3, (pdst) + 2 * stride, stride); \ | |
| 136 ST_B2(RTYPE, in4, in5, (pdst) + 4 * stride, stride); \ | |
| 137 ST_B(RTYPE, in6, (pdst) + 6 * stride); \ | |
| 138 } | |
| 139 #define ST_UB7(...) ST_B7(v16u8, __VA_ARGS__) | |
| 140 | |
| 141 #define ST_B8(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, \ | |
| 142 pdst, stride) \ | |
| 143 { \ | |
| 144 ST_B4(RTYPE, in0, in1, in2, in3, pdst, stride); \ | |
| 145 ST_B4(RTYPE, in4, in5, in6, in7, (pdst) + 4 * stride, stride); \ | |
| 146 } | |
| 147 #define ST_UB8(...) ST_B8(v16u8, __VA_ARGS__) | |
| 148 | |
| 149 /* Description : Shuffle byte vector elements as per mask vector | |
| 150 Arguments : Inputs - in0, in1, in2, in3, mask0, mask1 | |
| 151 Outputs - out0, out1 | |
| 152 Return Type - as per RTYPE | |
| 153 Details : Byte elements from 'in0' & 'in1' are copied selectively to | |
| 154 'out0' as per control vector 'mask0' | |
| 155 */ | |
| 156 #define VSHF_B2(RTYPE, in0, in1, in2, in3, mask0, mask1, out0, out1) \ | |
| 157 { \ | |
| 158 out0 = (RTYPE) __msa_vshf_b((v16i8) mask0, (v16i8) in1, (v16i8) in0); \ | |
| 159 out1 = (RTYPE) __msa_vshf_b((v16i8) mask1, (v16i8) in3, (v16i8) in2); \ | |
| 160 } | |
| 161 #define VSHF_B2_UB(...) VSHF_B2(v16u8, __VA_ARGS__) | |
| 162 #endif /* __MACROS_MSA_H__ */ | |
| OLD | NEW |