| OLD | NEW |
| 1 // Copyright 2016 The Chromium Authors. All rights reserved. | 1 // Copyright 2016 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #ifndef CommonMacrosMSA_h | 5 #ifndef CommonMacrosMSA_h |
| 6 #define CommonMacrosMSA_h | 6 #define CommonMacrosMSA_h |
| 7 | 7 |
| 8 #include <msa.h> | 8 #include <msa.h> |
| 9 #include <stdint.h> | 9 #include <stdint.h> |
| 10 | 10 |
| (...skipping 124 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 135 | 135 |
| 136 #define LD_V4(RTYPE, psrc, stride, out0, out1, out2, out3) \ | 136 #define LD_V4(RTYPE, psrc, stride, out0, out1, out2, out3) \ |
| 137 { \ | 137 { \ |
| 138 LD_V2(RTYPE, psrc, stride, out0, out1); \ | 138 LD_V2(RTYPE, psrc, stride, out0, out1); \ |
| 139 LD_V2(RTYPE, psrc, stride, out2, out3); \ | 139 LD_V2(RTYPE, psrc, stride, out2, out3); \ |
| 140 } | 140 } |
| 141 #define LD_UB4(...) LD_V4(v16u8, __VA_ARGS__) | 141 #define LD_UB4(...) LD_V4(v16u8, __VA_ARGS__) |
| 142 #define LD_UH4(...) LD_V4(v8u16, __VA_ARGS__) | 142 #define LD_UH4(...) LD_V4(v8u16, __VA_ARGS__) |
| 143 #define LD_SP4(...) LD_V4(v4f32, __VA_ARGS__) | 143 #define LD_SP4(...) LD_V4(v4f32, __VA_ARGS__) |
| 144 | 144 |
| 145 #define LD_V5(RTYPE, psrc, stride, out0, out1, out2, out3, out4) \ |
| 146 { \ |
| 147 LD_V4(RTYPE, psrc, stride, out0, out1, out2, out3); \ |
| 148 out4 = LD_V(RTYPE, psrc); \ |
| 149 psrc += stride; \ |
| 150 } |
| 151 #define LD_UB5(...) LD_V5(v16u8, __VA_ARGS__) |
| 152 |
| 145 #define LD_V6(RTYPE, psrc, stride, out0, out1, out2, out3, out4, out5) \ | 153 #define LD_V6(RTYPE, psrc, stride, out0, out1, out2, out3, out4, out5) \ |
| 146 { \ | 154 { \ |
| 147 LD_V4(RTYPE, psrc, stride, out0, out1, out2, out3); \ | 155 LD_V4(RTYPE, psrc, stride, out0, out1, out2, out3); \ |
| 148 LD_V2(RTYPE, psrc, stride, out4, out5); \ | 156 LD_V2(RTYPE, psrc, stride, out4, out5); \ |
| 149 } | 157 } |
| 150 #define LD_UB6(...) LD_V6(v16u8, __VA_ARGS__) | 158 #define LD_UB6(...) LD_V6(v16u8, __VA_ARGS__) |
| 151 #define LD_UH6(...) LD_V6(v8u16, __VA_ARGS__) | 159 #define LD_UH6(...) LD_V6(v8u16, __VA_ARGS__) |
| 152 #define LD_SP6(...) LD_V6(v4f32, __VA_ARGS__) | 160 #define LD_SP6(...) LD_V6(v4f32, __VA_ARGS__) |
| 153 | 161 |
| 162 #define LD_V7(RTYPE, psrc, stride, out0, out1, out2, out3, out4, out5, out6) \ |
| 163 { \ |
| 164 LD_V5(RTYPE, psrc, stride, out0, out1, out2, out3, out4); \ |
| 165 LD_V2(RTYPE, psrc, stride, out5, out6); \ |
| 166 } |
| 167 #define LD_UB7(...) LD_V7(v16u8, __VA_ARGS__) |
| 168 |
| 154 #define LD_V8(RTYPE, psrc, stride, out0, out1, out2, out3, out4, out5, out6, \ | 169 #define LD_V8(RTYPE, psrc, stride, out0, out1, out2, out3, out4, out5, out6, \ |
| 155 out7) \ | 170 out7) \ |
| 156 { \ | 171 { \ |
| 157 LD_V4(RTYPE, psrc, stride, out0, out1, out2, out3); \ | 172 LD_V4(RTYPE, psrc, stride, out0, out1, out2, out3); \ |
| 158 LD_V4(RTYPE, psrc, stride, out4, out5, out6, out7); \ | 173 LD_V4(RTYPE, psrc, stride, out4, out5, out6, out7); \ |
| 159 } | 174 } |
| 160 #define LD_UB8(...) LD_V8(v16u8, __VA_ARGS__) | 175 #define LD_UB8(...) LD_V8(v16u8, __VA_ARGS__) |
| 161 #define LD_UH8(...) LD_V8(v8u16, __VA_ARGS__) | 176 #define LD_UH8(...) LD_V8(v8u16, __VA_ARGS__) |
| 162 #define LD_SP8(...) LD_V8(v4f32, __VA_ARGS__) | 177 #define LD_SP8(...) LD_V8(v4f32, __VA_ARGS__) |
| 163 #define LD_DP8(...) LD_V8(v2f64, __VA_ARGS__) | 178 #define LD_DP8(...) LD_V8(v2f64, __VA_ARGS__) |
| (...skipping 41 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 205 #define ST_SP6(...) ST_V6(v4f32, __VA_ARGS__) | 220 #define ST_SP6(...) ST_V6(v4f32, __VA_ARGS__) |
| 206 | 221 |
| 207 #define ST_V8(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, pdst, stride) \ | 222 #define ST_V8(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, pdst, stride) \ |
| 208 { \ | 223 { \ |
| 209 ST_V4(RTYPE, in0, in1, in2, in3, pdst, stride); \ | 224 ST_V4(RTYPE, in0, in1, in2, in3, pdst, stride); \ |
| 210 ST_V4(RTYPE, in4, in5, in6, in7, pdst, stride); \ | 225 ST_V4(RTYPE, in4, in5, in6, in7, pdst, stride); \ |
| 211 } | 226 } |
| 212 #define ST_UB8(...) ST_V8(v16u8, __VA_ARGS__) | 227 #define ST_UB8(...) ST_V8(v16u8, __VA_ARGS__) |
| 213 #define ST_SP8(...) ST_V8(v4f32, __VA_ARGS__) | 228 #define ST_SP8(...) ST_V8(v4f32, __VA_ARGS__) |
| 214 | 229 |
| 230 /* Description : Store 8x1 byte block to destination memory from input vector |
| 231 Arguments : Inputs - in, pdst |
| 232 Details : Index 0 double word element from 'in' vector is copied to the |
| 233 GP register and stored to (pdst) |
| 234 */ |
| 235 #define ST8x1_UB(in, pdst) \ |
| 236 { \ |
| 237 const uint64_t out0m = __msa_copy_s_d((v2i64)in, 0); \ |
| 238 SD(out0m, pdst); \ |
| 239 } |
| 240 |
| 215 /* Description : Logical and in0 and in1. | 241 /* Description : Logical and in0 and in1. |
| 216 Arguments : Inputs - in0, in1, in2, in3, | 242 Arguments : Inputs - in0, in1, in2, in3, |
| 217 Outputs - out0, out1, out2, out3 | 243 Outputs - out0, out1, out2, out3 |
| 218 Return Type - as per RTYPE | 244 Return Type - as per RTYPE |
| 219 Details : Each unsigned word element from 'in0' vector is added with | 245 Details : Each unsigned word element from 'in0' vector is added with |
| 220 each unsigned word element from 'in1' vector. Then the average | 246 each unsigned word element from 'in1' vector. Then the average |
| 221 is calculated and written to 'out0' | 247 is calculated and written to 'out0' |
| 222 */ | 248 */ |
| 223 #define AND_V2(RTYPE, in0, in1, mask, out0, out1) \ | 249 #define AND_V2(RTYPE, in0, in1, mask, out0, out1) \ |
| 224 { \ | 250 { \ |
| (...skipping 65 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 290 'out0' as per control variable 'shf_val'. | 316 'out0' as per control variable 'shf_val'. |
| 291 */ | 317 */ |
| 292 #define SHF_B2(RTYPE, in0, in1, shf_val) \ | 318 #define SHF_B2(RTYPE, in0, in1, shf_val) \ |
| 293 { \ | 319 { \ |
| 294 in0 = (RTYPE)__msa_shf_b((v16i8)in0, shf_val); \ | 320 in0 = (RTYPE)__msa_shf_b((v16i8)in0, shf_val); \ |
| 295 in1 = (RTYPE)__msa_shf_b((v16i8)in1, shf_val); \ | 321 in1 = (RTYPE)__msa_shf_b((v16i8)in1, shf_val); \ |
| 296 } | 322 } |
| 297 #define SHF_B2_UB(...) SHF_B2(v16u8, __VA_ARGS__) | 323 #define SHF_B2_UB(...) SHF_B2(v16u8, __VA_ARGS__) |
| 298 #define SHF_B2_UH(...) SHF_B2(v8u16, __VA_ARGS__) | 324 #define SHF_B2_UH(...) SHF_B2(v8u16, __VA_ARGS__) |
| 299 | 325 |
| 326 #define SHF_B3(RTYPE, in0, in1, in2, shf_val) \ |
| 327 { \ |
| 328 SHF_B2(RTYPE, in0, in1, shf_val); \ |
| 329 in2 = (RTYPE)__msa_shf_b((v16i8)in2, shf_val); \ |
| 330 } |
| 331 #define SHF_B3_UB(...) SHF_B3(v16u8, __VA_ARGS__) |
| 332 #define SHF_B3_UH(...) SHF_B3(v8u16, __VA_ARGS__) |
| 333 |
| 300 #define SHF_B4(RTYPE, in0, in1, in2, in3, shf_val) \ | 334 #define SHF_B4(RTYPE, in0, in1, in2, in3, shf_val) \ |
| 301 { \ | 335 { \ |
| 302 SHF_B2(RTYPE, in0, in1, shf_val); \ | 336 SHF_B2(RTYPE, in0, in1, shf_val); \ |
| 303 SHF_B2(RTYPE, in2, in3, shf_val); \ | 337 SHF_B2(RTYPE, in2, in3, shf_val); \ |
| 304 } | 338 } |
| 305 #define SHF_B4_UB(...) SHF_B4(v16u8, __VA_ARGS__) | 339 #define SHF_B4_UB(...) SHF_B4(v16u8, __VA_ARGS__) |
| 306 #define SHF_B4_UH(...) SHF_B4(v8u16, __VA_ARGS__) | 340 #define SHF_B4_UH(...) SHF_B4(v8u16, __VA_ARGS__) |
| 307 | 341 |
| 342 /* Description : Shuffle byte vector elements as per mask vector |
| 343 Arguments : Inputs - in0, in1, in2, in3, mask0, mask1 |
| 344 Outputs - out0, out1 |
| 345 Return Type - as per RTYPE |
| 346 Details : Byte elements from 'in0' & 'in1' are copied selectively to |
| 347 'out0' as per control vector 'mask0' |
| 348 */ |
| 349 #define VSHF_B(RTYPE, in0, in1, mask) \ |
| 350 (RTYPE) __msa_vshf_b((v16i8)mask, (v16i8)in1, (v16i8)in0); |
| 351 #define VSHF_UB(...) VSHF_B(v16u8, __VA_ARGS__) |
| 352 |
| 308 /* Description : Interleave even byte elements from vectors | 353 /* Description : Interleave even byte elements from vectors |
| 309 Arguments : Inputs - in0, in1, in2, in3 | 354 Arguments : Inputs - in0, in1, in2, in3 |
| 310 Outputs - out0, out1 | 355 Outputs - out0, out1 |
| 311 Return Type - as per RTYPE | 356 Return Type - as per RTYPE |
| 312 Details : Even byte elements of 'in0' and 'in1' are interleaved | 357 Details : Even byte elements of 'in0' and 'in1' are interleaved |
| 313 and written to 'out0' | 358 and written to 'out0' |
| 314 */ | 359 */ |
| 315 #define ILVEV_B2(RTYPE, in0, in1, in2, in3, out0, out1) \ | 360 #define ILVEV_B2(RTYPE, in0, in1, in2, in3, out0, out1) \ |
| 316 { \ | 361 { \ |
| 317 out0 = (RTYPE)__msa_ilvev_b((v16i8)in1, (v16i8)in0); \ | 362 out0 = (RTYPE)__msa_ilvev_b((v16i8)in1, (v16i8)in0); \ |
| (...skipping 16 matching lines...) Expand all Loading... |
| 334 Details : Even halfword elements of 'in0' and 'in1' are interleaved | 379 Details : Even halfword elements of 'in0' and 'in1' are interleaved |
| 335 and written to 'out0' | 380 and written to 'out0' |
| 336 */ | 381 */ |
| 337 #define ILVEV_H2(RTYPE, in0, in1, in2, in3, out0, out1) \ | 382 #define ILVEV_H2(RTYPE, in0, in1, in2, in3, out0, out1) \ |
| 338 { \ | 383 { \ |
| 339 out0 = (RTYPE)__msa_ilvev_h((v8i16)in1, (v8i16)in0); \ | 384 out0 = (RTYPE)__msa_ilvev_h((v8i16)in1, (v8i16)in0); \ |
| 340 out1 = (RTYPE)__msa_ilvev_h((v8i16)in3, (v8i16)in2); \ | 385 out1 = (RTYPE)__msa_ilvev_h((v8i16)in3, (v8i16)in2); \ |
| 341 } | 386 } |
| 342 #define ILVEV_H2_UB(...) ILVEV_H2(v16u8, __VA_ARGS__) | 387 #define ILVEV_H2_UB(...) ILVEV_H2(v16u8, __VA_ARGS__) |
| 343 | 388 |
| 389 /* Description : Interleave right half of double word elements from vectors |
| 390 * Arguments : Inputs - in0, in1, in2, in3 |
| 391 * Outputs - out0, out1 |
| 392 * Return Type - as per RTYPE |
| 393 * Details : Right half of double word elements of 'in0' and 'in1' are |
| 394 * interleaved and written to 'out0'. |
| 395 */ |
| 396 #define ILVR_D2(RTYPE, in0, in1, in2, in3, out0, out1) \ |
| 397 { \ |
| 398 out0 = (RTYPE)__msa_ilvr_d((v2i64)in0, (v2i64)in1); \ |
| 399 out1 = (RTYPE)__msa_ilvr_d((v2i64)in2, (v2i64)in3); \ |
| 400 } |
| 401 #define ILVR_D2_UB(...) ILVR_D2(v16u8, __VA_ARGS__) |
| 402 |
| 403 #define ILVR_D3(RTYPE, in0, in1, in2, in3, in4, in5, out0, out1, out2) \ |
| 404 { \ |
| 405 ILVR_D2(RTYPE, in0, in1, in2, in3, out0, out1); \ |
| 406 out2 = (RTYPE)__msa_ilvr_d((v2i64)in4, (v2i64)in5); \ |
| 407 } |
| 408 #define ILVR_D3_UB(...) ILVR_D3(v16u8, __VA_ARGS__) |
| 409 |
| 410 #define ILVR_D4(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, out0, out1, \ |
| 411 out2, out3) \ |
| 412 { \ |
| 413 ILVR_D2(RTYPE, in0, in1, in2, in3, out0, out1); \ |
| 414 ILVR_D2(RTYPE, in4, in5, in6, in7, out2, out3); \ |
| 415 } |
| 416 #define ILVR_D4_UB(...) ILVR_D4(v16u8, __VA_ARGS__) |
| 417 |
| 344 /* Description : Interleave both left and right half of input vectors | 418 /* Description : Interleave both left and right half of input vectors |
| 345 Arguments : Inputs - in0, in1 | 419 Arguments : Inputs - in0, in1 |
| 346 Outputs - out0, out1 | 420 Outputs - out0, out1 |
| 347 Return Type - as per RTYPE | 421 Return Type - as per RTYPE |
| 348 Details : Right half of byte elements from 'in0' and 'in1' are | 422 Details : Right half of byte elements from 'in0' and 'in1' are |
| 349 interleaved and written to 'out0' | 423 interleaved and written to 'out0' |
| 350 */ | 424 */ |
| 351 #define ILVRL_B2(RTYPE, in0, in1, out0, out1) \ | 425 #define ILVRL_B2(RTYPE, in0, in1, out0, out1) \ |
| 352 { \ | 426 { \ |
| 353 out0 = (RTYPE)__msa_ilvr_b((v16i8)in0, (v16i8)in1); \ | 427 out0 = (RTYPE)__msa_ilvr_b((v16i8)in0, (v16i8)in1); \ |
| (...skipping 15 matching lines...) Expand all Loading... |
| 369 Details : Odd half of byte elements from 'in0' and 'in1' are | 443 Details : Odd half of byte elements from 'in0' and 'in1' are |
| 370 interleaved and written to 'out0' | 444 interleaved and written to 'out0' |
| 371 */ | 445 */ |
| 372 #define ILVODEV_B2(RTYPE, in0, in1, out0, out1) \ | 446 #define ILVODEV_B2(RTYPE, in0, in1, out0, out1) \ |
| 373 { \ | 447 { \ |
| 374 out0 = (RTYPE)__msa_ilvod_b((v16i8)in0, (v16i8)in1); \ | 448 out0 = (RTYPE)__msa_ilvod_b((v16i8)in0, (v16i8)in1); \ |
| 375 out1 = (RTYPE)__msa_ilvev_b((v16i8)in0, (v16i8)in1); \ | 449 out1 = (RTYPE)__msa_ilvev_b((v16i8)in0, (v16i8)in1); \ |
| 376 } | 450 } |
| 377 #define ILVODEV_B2_UB(...) ILVODEV_B2(v16u8, __VA_ARGS__) | 451 #define ILVODEV_B2_UB(...) ILVODEV_B2(v16u8, __VA_ARGS__) |
| 378 | 452 |
| 453 /* Description : Pack even byte elements of vector pairs |
| 454 * Arguments : Inputs - in0, in1, in2, in3 |
| 455 * Outputs - out0, out1 |
| 456 * Return Type - as per RTYPE |
| 457 * Details : Even byte elements of 'in0' are copied to the left half of |
| 458 * 'out0' & even byte elements of 'in1' are copied to the right |
| 459 * half of 'out0'. |
| 460 */ |
| 461 #define PCKEV_B2(RTYPE, in0, in1, in2, in3, out0, out1) \ |
| 462 { \ |
| 463 out0 = (RTYPE)__msa_pckev_b((v16i8)in0, (v16i8)in1); \ |
| 464 out1 = (RTYPE)__msa_pckev_b((v16i8)in2, (v16i8)in3); \ |
| 465 } |
| 466 #define PCKEV_B2_UB(...) PCKEV_B2(v16u8, __VA_ARGS__) |
| 467 #define PCKEV_B2_UH(...) PCKEV_B2(v8u16, __VA_ARGS__) |
| 468 |
| 469 #define PCKEV_B3(RTYPE, in0, in1, in2, in3, in4, in5, out0, out1, out2) \ |
| 470 { \ |
| 471 PCKEV_B2(RTYPE, in0, in1, in2, in3, out0, out1); \ |
| 472 out2 = (RTYPE)__msa_pckev_b((v16i8)in4, (v16i8)in5); \ |
| 473 } |
| 474 #define PCKEV_B3_UH(...) PCKEV_B3(v8u16, __VA_ARGS__) |
| 475 |
| 476 #define PCKEV_B4(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, out0, out1, \ |
| 477 out2, out3) \ |
| 478 { \ |
| 479 PCKEV_B2(RTYPE, in0, in1, in2, in3, out0, out1); \ |
| 480 PCKEV_B2(RTYPE, in4, in5, in6, in7, out2, out3); \ |
| 481 } |
| 482 #define PCKEV_B4_UH(...) PCKEV_B4(v8u16, __VA_ARGS__) |
| 483 |
| 379 /* Description : Pack even halfword elements of vector pairs | 484 /* Description : Pack even halfword elements of vector pairs |
| 380 Arguments : Inputs - in0, in1, in2, in3 | 485 Arguments : Inputs - in0, in1, in2, in3 |
| 381 Outputs - out0, out1 | 486 Outputs - out0, out1 |
| 382 Return Type - as per RTYPE | 487 Return Type - as per RTYPE |
| 383 Details : Even halfword elements of 'in0' are copied to the left half of | 488 Details : Even halfword elements of 'in0' are copied to the left half of |
| 384 'out0' & even halfword elements of 'in1' are copied to the | 489 'out0' & even halfword elements of 'in1' are copied to the |
| 385 right half of 'out0'. | 490 right half of 'out0'. |
| 386 */ | 491 */ |
| 387 #define PCKEV_H2(RTYPE, in0, in1, in2, in3, out0, out1) \ | 492 #define PCKEV_H2(RTYPE, in0, in1, in2, in3, out0, out1) \ |
| 388 { \ | 493 { \ |
| (...skipping 68 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 457 } | 562 } |
| 458 #define SRLI_B3_UB(...) SRLI_B3(v16u8, __VA_ARGS__) | 563 #define SRLI_B3_UB(...) SRLI_B3(v16u8, __VA_ARGS__) |
| 459 | 564 |
| 460 #define SRLI_B4(RTYPE, in0, in1, in2, in3, shift_val) \ | 565 #define SRLI_B4(RTYPE, in0, in1, in2, in3, shift_val) \ |
| 461 { \ | 566 { \ |
| 462 SRLI_B2(RTYPE, in0, in1, shift_val); \ | 567 SRLI_B2(RTYPE, in0, in1, shift_val); \ |
| 463 SRLI_B2(RTYPE, in2, in3, shift_val); \ | 568 SRLI_B2(RTYPE, in2, in3, shift_val); \ |
| 464 } | 569 } |
| 465 #define SRLI_B4_UB(...) SRLI_B4(v16u8, __VA_ARGS__) | 570 #define SRLI_B4_UB(...) SRLI_B4(v16u8, __VA_ARGS__) |
| 466 | 571 |
| 572 /* Description : Logical shift right all elements of vector (immediate) |
| 573 Arguments : Inputs - in0, in1, in2, in3, shift |
| 574 Outputs - out0, out1, out2, out3 |
| 575 Return Type - as per RTYPE |
| 576 Details : Each element of vector 'in0' is right shifted by 'shift' and |
| 577 the result is written in 'out0'. 'shift' is an immediate value. |
| 578 */ |
| 579 #define SRLI_H2(RTYPE, in0, in1, out0, out1, shift) \ |
| 580 { \ |
| 581 out0 = (RTYPE)SRLI_H((v8i16)in0, shift); \ |
| 582 out1 = (RTYPE)SRLI_H((v8i16)in1, shift); \ |
| 583 } |
| 584 #define SRLI_H2_UB(...) SRLI_H2(v16u8, __VA_ARGS__) |
| 585 |
| 586 #define SRLI_H4(RTYPE, in0, in1, in2, in3, out0, out1, out2, out3, shift) \ |
| 587 { \ |
| 588 SRLI_H2(RTYPE, in0, in1, out0, out1, shift); \ |
| 589 SRLI_H2(RTYPE, in2, in3, out2, out3, shift); \ |
| 590 } |
| 591 #define SRLI_H4_UB(...) SRLI_H4(v16u8, __VA_ARGS__) |
| 592 |
| 593 /* Description : Immediate Bit Insert Left (immediate) |
| 594 Arguments : Inputs - in0, in1, in2, in3, shift |
| 595 Outputs - out0, out1 |
| 596 Return Type - as per RTYPE |
| 597 Details : Copy most significant (left) bits in each element of vector |
| 598 'in1' to elements in vector in0 while preserving the least |
| 599 significant (right) bits. The number of bits to copy is given |
| 600 by the immediate 'shift + 1'. |
| 601 */ |
| 602 #define BINSLI_B2(RTYPE, in0, in1, in2, in3, out0, out1, shift) \ |
| 603 { \ |
| 604 out0 = (RTYPE)__msa_binsli_b((v16u8)in0, (v16u8)in1, shift); \ |
| 605 out1 = (RTYPE)__msa_binsli_b((v16u8)in2, (v16u8)in3, shift); \ |
| 606 } |
| 607 #define BINSLI_B2_UB(...) BINSLI_B2(v16u8, __VA_ARGS__) |
| 608 |
| 467 /* Description : Immediate Bit Insert Right (immediate) | 609 /* Description : Immediate Bit Insert Right (immediate) |
| 468 Arguments : Inputs - in0, in1, in2, in3, shift | 610 Arguments : Inputs - in0, in1, in2, in3, shift |
| 469 Outputs - out0, out1 | 611 Outputs - out0, out1 |
| 470 Return Type - as per RTYPE | 612 Return Type - as per RTYPE |
| 471 Details : Copy least significant (right) bits in each element of vector | 613 Details : Copy least significant (right) bits in each element of vector |
| 472 'in1' to elements in vector in0 while preserving the most | 614 'in1' to elements in vector in0 while preserving the most |
| 473 significant (left) bits. The number of bits to copy is given | 615 significant (left) bits. The number of bits to copy is given |
| 474 by the immediate 'shift + 1'. | 616 by the immediate 'shift + 1'. |
| 475 */ | 617 */ |
| 476 #define BINSRI_B2(RTYPE, in0, in1, in2, in3, out0, out1, shift) \ | 618 #define BINSRI_B2(RTYPE, in0, in1, in2, in3, out0, out1, shift) \ |
| (...skipping 77 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 554 #define FTRUNCU_W2_UB(...) FTRUNCU_W2(v16u8, __VA_ARGS__) | 696 #define FTRUNCU_W2_UB(...) FTRUNCU_W2(v16u8, __VA_ARGS__) |
| 555 | 697 |
| 556 #define FTRUNCU_W4(RTYPE, in0, in1, in2, in3, out0, out1, out2, out3) \ | 698 #define FTRUNCU_W4(RTYPE, in0, in1, in2, in3, out0, out1, out2, out3) \ |
| 557 { \ | 699 { \ |
| 558 FTRUNCU_W2(RTYPE, in0, in1, out0, out1); \ | 700 FTRUNCU_W2(RTYPE, in0, in1, out0, out1); \ |
| 559 FTRUNCU_W2(RTYPE, in2, in3, out2, out3); \ | 701 FTRUNCU_W2(RTYPE, in2, in3, out2, out3); \ |
| 560 } | 702 } |
| 561 #define FTRUNCU_W4_UB(...) FTRUNCU_W4(v16u8, __VA_ARGS__) | 703 #define FTRUNCU_W4_UB(...) FTRUNCU_W4(v16u8, __VA_ARGS__) |
| 562 | 704 |
| 563 #endif // CommonMacrosMSA_h | 705 #endif // CommonMacrosMSA_h |
| OLD | NEW |