OLD | NEW |
1 // Copyright 2014 Google Inc. All Rights Reserved. | 1 // Copyright 2014 Google Inc. All Rights Reserved. |
2 // | 2 // |
3 // Use of this source code is governed by a BSD-style license | 3 // Use of this source code is governed by a BSD-style license |
4 // that can be found in the COPYING file in the root of the source | 4 // that can be found in the COPYING file in the root of the source |
5 // tree. An additional intellectual property rights grant can be found | 5 // tree. An additional intellectual property rights grant can be found |
6 // in the file PATENTS. All contributing project authors may | 6 // in the file PATENTS. All contributing project authors may |
7 // be found in the AUTHORS file in the root of the source tree. | 7 // be found in the AUTHORS file in the root of the source tree. |
8 // ----------------------------------------------------------------------------- | 8 // ----------------------------------------------------------------------------- |
9 // | 9 // |
10 // MIPS version of speed-critical encoding functions. | 10 // MIPS version of speed-critical encoding functions. |
11 // | 11 // |
12 // Author(s): Djordje Pesut (djordje.pesut@imgtec.com) | 12 // Author(s): Djordje Pesut (djordje.pesut@imgtec.com) |
13 // Jovan Zelincevic (jovan.zelincevic@imgtec.com) | 13 // Jovan Zelincevic (jovan.zelincevic@imgtec.com) |
14 // Slobodan Prijic (slobodan.prijic@imgtec.com) | 14 // Slobodan Prijic (slobodan.prijic@imgtec.com) |
15 | 15 |
16 #include "./dsp.h" | 16 #include "./dsp.h" |
17 | 17 |
18 #if defined(WEBP_USE_MIPS32) | 18 #if defined(WEBP_USE_MIPS32) |
19 | 19 |
20 #include "../enc/vp8enci.h" | 20 #include "../enc/vp8enci.h" |
21 #include "../enc/cost.h" | 21 #include "../enc/cost.h" |
22 | 22 |
| 23 #if defined(__GNUC__) && defined(__ANDROID__) && LOCAL_GCC_VERSION == 0x409 |
| 24 #define WORK_AROUND_GCC |
| 25 #endif |
| 26 |
23 static const int kC1 = 20091 + (1 << 16); | 27 static const int kC1 = 20091 + (1 << 16); |
24 static const int kC2 = 35468; | 28 static const int kC2 = 35468; |
25 | 29 |
26 // macro for one vertical pass in ITransformOne | 30 // macro for one vertical pass in ITransformOne |
27 // MUL macro inlined | 31 // MUL macro inlined |
28 // temp0..temp15 holds tmp[0]..tmp[15] | 32 // temp0..temp15 holds tmp[0]..tmp[15] |
29 // A..D - offsets in bytes to load from in buffer | 33 // A..D - offsets in bytes to load from in buffer |
30 // TEMP0..TEMP3 - registers for corresponding tmp elements | 34 // TEMP0..TEMP3 - registers for corresponding tmp elements |
31 // TEMP4..TEMP5 - temporary registers | 35 // TEMP4..TEMP5 - temporary registers |
32 #define VERTICAL_PASS(A, B, C, D, TEMP4, TEMP0, TEMP1, TEMP2, TEMP3) \ | 36 #define VERTICAL_PASS(A, B, C, D, TEMP4, TEMP0, TEMP1, TEMP2, TEMP3) \ |
(...skipping 601 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
634 "madd %[temp2], %[temp2] \n\t" \ | 638 "madd %[temp2], %[temp2] \n\t" \ |
635 "madd %[temp4], %[temp4] \n\t" \ | 639 "madd %[temp4], %[temp4] \n\t" \ |
636 "madd %[temp6], %[temp6] \n\t" | 640 "madd %[temp6], %[temp6] \n\t" |
637 | 641 |
638 #define GET_SSE(A, B, C, D) \ | 642 #define GET_SSE(A, B, C, D) \ |
639 GET_SSE_INNER(A, A + 1, A + 2, A + 3) \ | 643 GET_SSE_INNER(A, A + 1, A + 2, A + 3) \ |
640 GET_SSE_INNER(B, B + 1, B + 2, B + 3) \ | 644 GET_SSE_INNER(B, B + 1, B + 2, B + 3) \ |
641 GET_SSE_INNER(C, C + 1, C + 2, C + 3) \ | 645 GET_SSE_INNER(C, C + 1, C + 2, C + 3) \ |
642 GET_SSE_INNER(D, D + 1, D + 2, D + 3) | 646 GET_SSE_INNER(D, D + 1, D + 2, D + 3) |
643 | 647 |
| 648 #if !defined(WORK_AROUND_GCC) |
644 static int SSE16x16(const uint8_t* a, const uint8_t* b) { | 649 static int SSE16x16(const uint8_t* a, const uint8_t* b) { |
645 int count; | 650 int count; |
646 int temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7; | 651 int temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7; |
647 | 652 |
648 __asm__ volatile( | 653 __asm__ volatile( |
649 "mult $zero, $zero \n\t" | 654 "mult $zero, $zero \n\t" |
650 | 655 |
651 GET_SSE( 0, 4, 8, 12) | 656 GET_SSE( 0, 4, 8, 12) |
652 GET_SSE( 16, 20, 24, 28) | 657 GET_SSE( 16, 20, 24, 28) |
653 GET_SSE( 32, 36, 40, 44) | 658 GET_SSE( 32, 36, 40, 44) |
(...skipping 81 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
735 "mflo %[count] \n\t" | 740 "mflo %[count] \n\t" |
736 : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2), | 741 : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2), |
737 [temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5), | 742 [temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5), |
738 [temp6]"=&r"(temp6), [temp7]"=&r"(temp7), [count]"=&r"(count) | 743 [temp6]"=&r"(temp6), [temp7]"=&r"(temp7), [count]"=&r"(count) |
739 : [a]"r"(a), [b]"r"(b) | 744 : [a]"r"(a), [b]"r"(b) |
740 : "memory", "hi" , "lo" | 745 : "memory", "hi" , "lo" |
741 ); | 746 ); |
742 return count; | 747 return count; |
743 } | 748 } |
744 | 749 |
| 750 #endif // WORK_AROUND_GCC |
| 751 |
745 #undef GET_SSE_MIPS32 | 752 #undef GET_SSE_MIPS32 |
746 #undef GET_SSE_MIPS32_INNER | 753 #undef GET_SSE_MIPS32_INNER |
747 | 754 |
748 #endif // WEBP_USE_MIPS32 | 755 #endif // WEBP_USE_MIPS32 |
749 | 756 |
750 //------------------------------------------------------------------------------ | 757 //------------------------------------------------------------------------------ |
751 // Entry point | 758 // Entry point |
752 | 759 |
753 extern void VP8EncDspInitMIPS32(void); | 760 extern void VP8EncDspInitMIPS32(void); |
754 | 761 |
755 void VP8EncDspInitMIPS32(void) { | 762 void VP8EncDspInitMIPS32(void) { |
756 #if defined(WEBP_USE_MIPS32) | 763 #if defined(WEBP_USE_MIPS32) |
757 VP8ITransform = ITransform; | 764 VP8ITransform = ITransform; |
758 VP8EncQuantizeBlock = QuantizeBlock; | 765 VP8EncQuantizeBlock = QuantizeBlock; |
759 VP8TDisto4x4 = Disto4x4; | 766 VP8TDisto4x4 = Disto4x4; |
760 VP8TDisto16x16 = Disto16x16; | 767 VP8TDisto16x16 = Disto16x16; |
761 VP8FTransform = FTransform; | 768 VP8FTransform = FTransform; |
| 769 #if !defined(WORK_AROUND_GCC) |
762 VP8SSE16x16 = SSE16x16; | 770 VP8SSE16x16 = SSE16x16; |
763 VP8SSE8x8 = SSE8x8; | 771 VP8SSE8x8 = SSE8x8; |
764 VP8SSE16x8 = SSE16x8; | 772 VP8SSE16x8 = SSE16x8; |
765 VP8SSE4x4 = SSE4x4; | 773 VP8SSE4x4 = SSE4x4; |
| 774 #endif |
766 #endif // WEBP_USE_MIPS32 | 775 #endif // WEBP_USE_MIPS32 |
767 } | 776 } |
OLD | NEW |