| OLD | NEW |
| 1 // Copyright 2016 The Chromium Authors. All rights reserved. | 1 // Copyright 2016 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #ifndef CommonMacrosMSA_h | 5 #ifndef CommonMacrosMSA_h |
| 6 #define CommonMacrosMSA_h | 6 #define CommonMacrosMSA_h |
| 7 | 7 |
| 8 #include <msa.h> | 8 #include <msa.h> |
| 9 #include <stdint.h> | 9 #include <stdint.h> |
| 10 | 10 |
| (...skipping 504 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 515 { \ | 515 { \ |
| 516 out0 = in0 / in1; \ | 516 out0 = in0 / in1; \ |
| 517 out1 = in2 / in3; \ | 517 out1 = in2 / in3; \ |
| 518 } | 518 } |
| 519 #define DIV4(in0, in1, in2, in3, in4, in5, in6, in7, out0, out1, out2, out3) \ | 519 #define DIV4(in0, in1, in2, in3, in4, in5, in6, in7, out0, out1, out2, out3) \ |
| 520 { \ | 520 { \ |
| 521 DIV2(in0, in1, in2, in3, out0, out1); \ | 521 DIV2(in0, in1, in2, in3, out0, out1); \ |
| 522 DIV2(in4, in5, in6, in7, out2, out3); \ | 522 DIV2(in4, in5, in6, in7, out2, out3); \ |
| 523 } | 523 } |
| 524 | 524 |
| 525 /* Description : Logical AND of 4 pairs of vectors with mask |
| 526 Arguments : Inputs - in0, in1, in2, in3, mask |
| 527 Outputs - in0, in1, in2, in3 |
| 528 Details : Each element in 'in0' is logically AND'ed with mask |
| 529 Each element in 'in1' is logically AND'ed with mask |
| 530 Each element in 'in2' is logically AND'ed with mask |
| 531 Each element in 'in3' is logically AND'ed with mask |
| 532 */ |
| 533 #define AND_W4(RTYPE, in0, in1, in2, in3, mask) \ |
| 534 { \ |
| 535 in0 = (RTYPE)((v16i8)in0 & (v16i8)mask); \ |
| 536 in1 = (RTYPE)((v16i8)in1 & (v16i8)mask); \ |
| 537 in2 = (RTYPE)((v16i8)in2 & (v16i8)mask); \ |
| 538 in3 = (RTYPE)((v16i8)in3 & (v16i8)mask); \ |
| 539 } |
| 540 #define AND_W4_SP(...) AND_W4(v4f32, __VA_ARGS__) |
| 541 |
| 542 /* Description : Addition of 2 pairs of vectors |
| 543 Arguments : Inputs - in0, in1, in2, in3 |
| 544 Outputs - out0, out1 |
| 545 Details : Each element in 'in0' is added to 'in1' and result is written |
| 546 to 'out0' |
| 547 Each element in 'in2' is added to 'in3' and result is written |
| 548 to 'out1' |
| 549 */ |
| 550 #define ADD2(in0, in1, in2, in3, out0, out1) \ |
| 551 { \ |
| 552 out0 = in0 + in1; \ |
| 553 out1 = in2 + in3; \ |
| 554 } |
| 555 |
| 556 /* Description : Addition of 4 pairs of vectors |
| 557 Arguments : Inputs - in0, in1, in2, in3, in4, in5, in6, in7 |
| 558 Outputs - out0, out1 |
| 559 Details : Each element in 'in0' is added to 'in1' and result is written |
| 560 to 'out0' |
| 561 Each element in 'in2' is added to 'in3' and result is written |
| 562 to 'out1' |
| 563 Each element in 'in4' is added to 'in5' and result is written |
| 564 to 'out2' |
| 565 Each element in 'in6' is added to 'in7' and result is written |
| 566 to 'out3' |
| 567 */ |
| 568 #define ADD4(in0, in1, in2, in3, in4, in5, in6, in7, out0, out1, out2, out3) \ |
| 569 { \ |
| 570 ADD2(in0, in1, in2, in3, out0, out1); \ |
| 571 ADD2(in4, in5, in6, in7, out2, out3); \ |
| 572 } |
| 573 |
| 525 /* Description : Vector Floating-Point Convert from Unsigned Integer | 574 /* Description : Vector Floating-Point Convert from Unsigned Integer |
| 526 Arguments : Inputs - in0, in1 | 575 Arguments : Inputs - in0, in1 |
| 527 Outputs - out0, out1 | 576 Outputs - out0, out1 |
| 528 Details : | |
| 529 */ | 577 */ |
| 530 #define FFINTU_W2(RTYPE, in0, in1, out0, out1) \ | 578 #define FFINTU_W2(RTYPE, in0, in1, out0, out1) \ |
| 531 { \ | 579 { \ |
| 532 out0 = (RTYPE)__msa_ffint_u_w((v4u32)in0); \ | 580 out0 = (RTYPE)__msa_ffint_u_w((v4u32)in0); \ |
| 533 out1 = (RTYPE)__msa_ffint_u_w((v4u32)in1); \ | 581 out1 = (RTYPE)__msa_ffint_u_w((v4u32)in1); \ |
| 534 } | 582 } |
| 535 #define FFINTU_W2_SP(...) FFINTU_W2(v4f32, __VA_ARGS__) | 583 #define FFINTU_W2_SP(...) FFINTU_W2(v4f32, __VA_ARGS__) |
| 536 | 584 |
| 585 /* Description : Vector Floating-Point Convert from Unsigned Integer |
| 586 Arguments : Inputs - in0, in1, in2, in3 |
| 587 Outputs - out0, out1, out2, out3 |
| 588 */ |
| 537 #define FFINTU_W4(RTYPE, in0, in1, in2, in3, out0, out1, out2, out3) \ | 589 #define FFINTU_W4(RTYPE, in0, in1, in2, in3, out0, out1, out2, out3) \ |
| 538 { \ | 590 { \ |
| 539 FFINTU_W2(RTYPE, in0, in1, out0, out1); \ | 591 FFINTU_W2(RTYPE, in0, in1, out0, out1); \ |
| 540 FFINTU_W2(RTYPE, in2, in3, out2, out3); \ | 592 FFINTU_W2(RTYPE, in2, in3, out2, out3); \ |
| 541 } | 593 } |
| 542 #define FFINTU_W4_SP(...) FFINTU_W4(v4f32, __VA_ARGS__) | 594 #define FFINTU_W4_SP(...) FFINTU_W4(v4f32, __VA_ARGS__) |
| 543 | 595 |
| 544 /* Description : Vector Floating-Point Truncate and Convert to Unsigned Integer | 596 /* Description : Vector Floating-Point Truncate and Convert to Unsigned Integer |
| 545 Arguments : Inputs - in0, in1 | 597 Arguments : Inputs - in0, in1 |
| 546 Outputs - out0, out1 | 598 Outputs - out0, out1 |
| 547 Details : | |
| 548 */ | 599 */ |
| 549 #define FTRUNCU_W2(RTYPE, in0, in1, out0, out1) \ | 600 #define FTRUNCU_W2(RTYPE, in0, in1, out0, out1) \ |
| 550 { \ | 601 { \ |
| 551 out0 = (RTYPE)__msa_ftrunc_u_w((v4f32)in0); \ | 602 out0 = (RTYPE)__msa_ftrunc_u_w((v4f32)in0); \ |
| 552 out1 = (RTYPE)__msa_ftrunc_u_w((v4f32)in1); \ | 603 out1 = (RTYPE)__msa_ftrunc_u_w((v4f32)in1); \ |
| 553 } | 604 } |
| 554 #define FTRUNCU_W2_UB(...) FTRUNCU_W2(v16u8, __VA_ARGS__) | 605 #define FTRUNCU_W2_UB(...) FTRUNCU_W2(v16u8, __VA_ARGS__) |
| 555 | 606 |
| 607 /* Description : Vector Floating-Point Truncate and Convert to Unsigned Integer |
| 608 Arguments : Inputs - in0, in1, in2, in3 |
| 609 Outputs - out0, out1, out2, out3 |
| 610 */ |
| 556 #define FTRUNCU_W4(RTYPE, in0, in1, in2, in3, out0, out1, out2, out3) \ | 611 #define FTRUNCU_W4(RTYPE, in0, in1, in2, in3, out0, out1, out2, out3) \ |
| 557 { \ | 612 { \ |
| 558 FTRUNCU_W2(RTYPE, in0, in1, out0, out1); \ | 613 FTRUNCU_W2(RTYPE, in0, in1, out0, out1); \ |
| 559 FTRUNCU_W2(RTYPE, in2, in3, out2, out3); \ | 614 FTRUNCU_W2(RTYPE, in2, in3, out2, out3); \ |
| 560 } | 615 } |
| 561 #define FTRUNCU_W4_UB(...) FTRUNCU_W4(v16u8, __VA_ARGS__) | 616 #define FTRUNCU_W4_UB(...) FTRUNCU_W4(v16u8, __VA_ARGS__) |
| 562 | 617 |
| 618 /* Description : Vector Floating-Point multiply with scale and accumulate |
| 619 Arguments : Inputs - in0, in1, in2, in3, out0, out1, out2, out3, scale |
| 620 Outputs - out0, out1, out2, out3 |
| 621 */ |
| 622 #define VSMA4(in0, in1, in2, in3, out0, out1, out2, out3, scale) \ |
| 623 { \ |
| 624 out0 += in0 * scale; \ |
| 625 out1 += in1 * scale; \ |
| 626 out2 += in2 * scale; \ |
| 627 out3 += in3 * scale; \ |
| 628 } |
| 629 |
| 630 /* Description : Vector Floating-Point multiply with scale |
| 631 Arguments : Inputs - in0, in1, in2, in3, scale |
| 632 Outputs - out0, out1, out2, out3 |
| 633 */ |
| 634 #define VSMUL4(in0, in1, in2, in3, out0, out1, out2, out3, scale) \ |
| 635 { \ |
| 636 out0 = in0 * scale; \ |
| 637 out1 = in1 * scale; \ |
| 638 out2 = in2 * scale; \ |
| 639 out3 = in3 * scale; \ |
| 640 } |
| 641 |
| 642 /* Description : Vector Floating-Point max value |
| 643 Arguments : Inputs - in0, in1, in2, in3, max |
| 644 Output - max |
| 645 */ |
| 646 #define VMAX_W4(RTYPE, in0, in1, in2, in3, max) \ |
| 647 { \ |
| 648 max = (RTYPE)__msa_fmax_w((v4f32)max, (v4f32)in0); \ |
| 649 max = (RTYPE)__msa_fmax_w((v4f32)max, (v4f32)in1); \ |
| 650 max = (RTYPE)__msa_fmax_w((v4f32)max, (v4f32)in2); \ |
| 651 max = (RTYPE)__msa_fmax_w((v4f32)max, (v4f32)in3); \ |
| 652 } |
| 653 #define VMAX_W4_SP(...) VMAX_W4(v4f32, __VA_ARGS__) |
| 654 |
| 655 /* Description : Vector Floating-Point clip to min max |
| 656 Arguments : Inputs - in0, in1, in2, in3, min, max |
| 657 Outputs - out0, out1, out2, out3 |
| 658 */ |
| 659 #define VCLIP4(in0, in1, in2, in3, min, max, out0, out1, out2, out3) \ |
| 660 { \ |
| 661 out0 = __msa_fmax_w(__msa_fmin_w(in0, max), min); \ |
| 662 out1 = __msa_fmax_w(__msa_fmin_w(in1, max), min); \ |
| 663 out2 = __msa_fmax_w(__msa_fmin_w(in2, max), min); \ |
| 664 out3 = __msa_fmax_w(__msa_fmin_w(in3, max), min); \ |
| 665 } |
| 666 |
| 563 #endif // CommonMacrosMSA_h | 667 #endif // CommonMacrosMSA_h |
| OLD | NEW |