Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 // Copyright 2016 The Chromium Authors. All rights reserved. | 1 // Copyright 2016 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #ifndef CommonMacrosMSA_h | 5 #ifndef CommonMacrosMSA_h |
| 6 #define CommonMacrosMSA_h | 6 #define CommonMacrosMSA_h |
| 7 | 7 |
| 8 #include <msa.h> | 8 #include <msa.h> |
| 9 #include <stdint.h> | 9 #include <stdint.h> |
| 10 | 10 |
| (...skipping 500 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 511 { \ | 511 { \ |
| 512 out0 = in0 / in1; \ | 512 out0 = in0 / in1; \ |
| 513 out1 = in2 / in3; \ | 513 out1 = in2 / in3; \ |
| 514 } | 514 } |
| 515 #define DIV4(in0, in1, in2, in3, in4, in5, in6, in7, out0, out1, out2, out3) \ | 515 #define DIV4(in0, in1, in2, in3, in4, in5, in6, in7, out0, out1, out2, out3) \ |
| 516 { \ | 516 { \ |
| 517 DIV2(in0, in1, in2, in3, out0, out1); \ | 517 DIV2(in0, in1, in2, in3, out0, out1); \ |
| 518 DIV2(in4, in5, in6, in7, out2, out3); \ | 518 DIV2(in4, in5, in6, in7, out2, out3); \ |
| 519 } | 519 } |
| 520 | 520 |
| 521 /* Description : Addition of 2 pairs of vectors | |
| 522 * Arguments : Inputs - in0, in1, in2, in3 | |
| 523 * Outputs - out0, out1 | |
| 524 * Details : Each element in 'in0' is added to 'in1' and result is written | |
| 525 * to 'out0'. | |
| 526 */ | |
|
Raymond Toy
2016/10/03 16:47:06
Comment style here seems inconsistent with the nea
Prashant.Patil
2016/10/04 11:47:27
Done.
| |
| 527 #define ADD2(in0, in1, in2, in3, out0, out1) \ | |
| 528 { \ | |
| 529 out0 = in0 + in1; \ | |
| 530 out1 = in2 + in3; \ | |
| 531 } | |
| 532 #define ADD4(in0, in1, in2, in3, in4, in5, in6, in7, out0, out1, out2, out3) \ | |
|
Raymond Toy
2016/10/03 16:47:06
I know the naming here is consistent with the rest
Prashant.Patil
2016/10/04 11:47:28
I will add macro description for better understand
| |
| 533 { \ | |
| 534 ADD2(in0, in1, in2, in3, out0, out1); \ | |
| 535 ADD2(in4, in5, in6, in7, out2, out3); \ | |
| 536 } | |
| 537 | |
| 521 /* Description : Vector Floating-Point Convert from Unsigned Integer | 538 /* Description : Vector Floating-Point Convert from Unsigned Integer |
| 522 Arguments : Inputs - in0, in1 | 539 Arguments : Inputs - in0, in1 |
| 523 Outputs - out0, out1 | 540 Outputs - out0, out1 |
| 524 Details : | 541 Details : |
| 525 */ | 542 */ |
| 526 #define FFINTU_W2(RTYPE, in0, in1, out0, out1) \ | 543 #define FFINTU_W2(RTYPE, in0, in1, out0, out1) \ |
| 527 { \ | 544 { \ |
| 528 out0 = (RTYPE)__msa_ffint_u_w((v4u32)in0); \ | 545 out0 = (RTYPE)__msa_ffint_u_w((v4u32)in0); \ |
| 529 out1 = (RTYPE)__msa_ffint_u_w((v4u32)in1); \ | 546 out1 = (RTYPE)__msa_ffint_u_w((v4u32)in1); \ |
| 530 } | 547 } |
| (...skipping 18 matching lines...) Expand all Loading... | |
| 549 } | 566 } |
| 550 #define FTRUNCU_W2_UB(...) FTRUNCU_W2(v16u8, __VA_ARGS__) | 567 #define FTRUNCU_W2_UB(...) FTRUNCU_W2(v16u8, __VA_ARGS__) |
| 551 | 568 |
| 552 #define FTRUNCU_W4(RTYPE, in0, in1, in2, in3, out0, out1, out2, out3) \ | 569 #define FTRUNCU_W4(RTYPE, in0, in1, in2, in3, out0, out1, out2, out3) \ |
| 553 { \ | 570 { \ |
| 554 FTRUNCU_W2(RTYPE, in0, in1, out0, out1); \ | 571 FTRUNCU_W2(RTYPE, in0, in1, out0, out1); \ |
| 555 FTRUNCU_W2(RTYPE, in2, in3, out2, out3); \ | 572 FTRUNCU_W2(RTYPE, in2, in3, out2, out3); \ |
| 556 } | 573 } |
| 557 #define FTRUNCU_W4_UB(...) FTRUNCU_W4(v16u8, __VA_ARGS__) | 574 #define FTRUNCU_W4_UB(...) FTRUNCU_W4(v16u8, __VA_ARGS__) |
| 558 | 575 |
| 576 /* Description : Vector Floating-Point multiply with scale and accumulate | |
| 577 Arguments : Inputs - in0, in1, scale | |
|
Raymond Toy
2016/10/03 16:47:06
Based on the code, out0, and out1 are also inputs.
Prashant.Patil
2016/10/04 11:47:27
Done.
| |
| 578 Outputs - out0, out1 | |
| 579 Details : | |
| 580 */ | |
| 581 #define VSMA2(in0, in1, out0, out1, scale) \ | |
| 582 { \ | |
| 583 out0 += in0 * scale; \ | |
| 584 out1 += in1 * scale; \ | |
| 585 } | |
| 586 | |
| 587 #define VSMA4(in0, in1, in2, in3, out0, out1, out2, out3, scale) \ | |
| 588 { \ | |
| 589 VSMA2(in0, in1, out0, out1, scale); \ | |
| 590 VSMA2(in2, in3, out2, out3, scale); \ | |
| 591 } | |
| 592 | |
| 593 /* Description : Vector Floating-Point multiply with scale | |
| 594 Arguments : Inputs - in0, in1, scale | |
| 595 Outputs - out0, out1 | |
| 596 Details : | |
| 597 */ | |
| 598 #define VSMUL2(in0, in1, out0, out1, scale) \ | |
| 599 { \ | |
| 600 out0 = in0 * scale; \ | |
| 601 out1 = in1 * scale; \ | |
| 602 } | |
| 603 | |
| 604 #define VSMUL4(in0, in1, in2, in3, out0, out1, out2, out3, scale) \ | |
| 605 { \ | |
| 606 VSMUL2(in0, in1, out0, out1, scale); \ | |
| 607 VSMUL2(in2, in3, out2, out3, scale); \ | |
| 608 } | |
| 609 | |
| 610 /* Description : Vector Floating-Point max value with absolute | |
| 611 Arguments : Inputs - in0, in1 | |
| 612 Outputs - out0, out1 | |
|
Raymond Toy
2016/10/03 16:47:06
There are not outputs out0, out1, just the one out
Prashant.Patil
2016/10/04 11:47:28
I will update the macro usage for better code read
| |
| 613 Details : | |
| 614 */ | |
| 615 #define VMAXMGV2(in0, in1, mask, max) \ | |
| 616 { \ | |
| 617 max = __msa_fmax_w(max, (v4f32)((v16i8)in0 & mask)); \ | |
| 618 max = __msa_fmax_w(max, (v4f32)((v16i8)in1 & mask)); \ | |
| 619 } | |
| 620 | |
| 621 #define VMAXMGV4(in0, in1, in2, in3, mask, max) \ | |
| 622 { \ | |
| 623 VMAXMGV2(in0, in1, mask, max); \ | |
| 624 VMAXMGV2(in2, in3, mask, max); \ | |
| 625 } | |
| 626 | |
| 627 /* Description : Vector Floating-Point clip to min max | |
| 628 Arguments : Inputs - in0, in1 | |
|
Raymond Toy
2016/10/03 16:47:06
You forgot min, max as inputs.
Prashant.Patil
2016/10/04 11:47:28
Done.
| |
| 629 Outputs - out0, out1 | |
| 630 Details : | |
| 631 */ | |
| 632 #define VCLIP2(in0, in1, min, max, out0, out1) \ | |
| 633 { \ | |
| 634 out0 = __msa_fmax_w(__msa_fmin_w(in0, max), min); \ | |
| 635 out1 = __msa_fmax_w(__msa_fmin_w(in1, max), min); \ | |
| 636 } | |
| 637 | |
| 638 #define VCLIP4(in0, in1, in2, in3, min, max, out0, out1, out2, out3) \ | |
| 639 { \ | |
| 640 VCLIP2(in0, in1, min, max, out0, out1); \ | |
| 641 VCLIP2(in2, in3, min, max, out2, out3); \ | |
| 642 } | |
| 643 | |
| 559 #endif // CommonMacrosMSA_h | 644 #endif // CommonMacrosMSA_h |
| OLD | NEW |