OLD | NEW |
(Empty) | |
| 1 // Copyright 2014 Google Inc. All Rights Reserved. |
| 2 // |
| 3 // Use of this source code is governed by a BSD-style license |
| 4 // that can be found in the COPYING file in the root of the source |
| 5 // tree. An additional intellectual property rights grant can be found |
| 6 // in the file PATENTS. All contributing project authors may |
| 7 // be found in the AUTHORS file in the root of the source tree. |
| 8 // ----------------------------------------------------------------------------- |
| 9 // |
| 10 // MIPS common macros |
| 11 |
| 12 #ifndef WEBP_DSP_MIPS_MACRO_H_ |
| 13 #define WEBP_DSP_MIPS_MACRO_H_ |
| 14 |
| 15 #if defined(__GNUC__) && defined(__ANDROID__) && LOCAL_GCC_VERSION == 0x409 |
| 16 #define WORK_AROUND_GCC |
| 17 #endif |
| 18 |
| 19 #define STR(s) #s |
| 20 #define XSTR(s) STR(s) |
| 21 |
| 22 // O0[31..16 | 15..0] = I0[31..16 | 15..0] + I1[31..16 | 15..0] |
| 23 // O1[31..16 | 15..0] = I0[31..16 | 15..0] - I1[31..16 | 15..0] |
| 24 // O - output |
| 25 // I - input (macro doesn't change it) |
| 26 #define ADD_SUB_HALVES(O0, O1, \ |
| 27 I0, I1) \ |
| 28 "addq.ph %[" #O0 "], %[" #I0 "], %[" #I1 "] \n\t" \ |
| 29 "subq.ph %[" #O1 "], %[" #I0 "], %[" #I1 "] \n\t" |
| 30 |
| 31 // O - output |
| 32 // I - input (macro doesn't change it) |
| 33 // I[0/1] - offset in bytes |
| 34 #define LOAD_IN_X2(O0, O1, \ |
| 35 I0, I1) \ |
| 36 "lh %[" #O0 "], " #I0 "(%[in]) \n\t" \ |
| 37 "lh %[" #O1 "], " #I1 "(%[in]) \n\t" |
| 38 |
| 39 // I0 - location |
| 40 // I1..I9 - offsets in bytes |
| 41 #define LOAD_WITH_OFFSET_X4(O0, O1, O2, O3, \ |
| 42 I0, I1, I2, I3, I4, I5, I6, I7, I8, I9) \ |
| 43 "ulw %[" #O0 "], " #I1 "+" XSTR(I9) "*" #I5 "(%[" #I0 "]) \n\t" \ |
| 44 "ulw %[" #O1 "], " #I2 "+" XSTR(I9) "*" #I6 "(%[" #I0 "]) \n\t" \ |
| 45 "ulw %[" #O2 "], " #I3 "+" XSTR(I9) "*" #I7 "(%[" #I0 "]) \n\t" \ |
| 46 "ulw %[" #O3 "], " #I4 "+" XSTR(I9) "*" #I8 "(%[" #I0 "]) \n\t" |
| 47 |
| 48 // O - output |
| 49 // IO - input/output |
| 50 // I - input (macro doesn't change it) |
| 51 #define MUL_SHIFT_SUM(O0, O1, O2, O3, O4, O5, O6, O7, \ |
| 52 IO0, IO1, IO2, IO3, \ |
| 53 I0, I1, I2, I3, I4, I5, I6, I7) \ |
| 54 "mul %[" #O0 "], %[" #I0 "], %[kC2] \n\t" \ |
| 55 "mul %[" #O1 "], %[" #I0 "], %[kC1] \n\t" \ |
| 56 "mul %[" #O2 "], %[" #I1 "], %[kC2] \n\t" \ |
| 57 "mul %[" #O3 "], %[" #I1 "], %[kC1] \n\t" \ |
| 58 "mul %[" #O4 "], %[" #I2 "], %[kC2] \n\t" \ |
| 59 "mul %[" #O5 "], %[" #I2 "], %[kC1] \n\t" \ |
| 60 "mul %[" #O6 "], %[" #I3 "], %[kC2] \n\t" \ |
| 61 "mul %[" #O7 "], %[" #I3 "], %[kC1] \n\t" \ |
| 62 "sra %[" #O0 "], %[" #O0 "], 16 \n\t" \ |
| 63 "sra %[" #O1 "], %[" #O1 "], 16 \n\t" \ |
| 64 "sra %[" #O2 "], %[" #O2 "], 16 \n\t" \ |
| 65 "sra %[" #O3 "], %[" #O3 "], 16 \n\t" \ |
| 66 "sra %[" #O4 "], %[" #O4 "], 16 \n\t" \ |
| 67 "sra %[" #O5 "], %[" #O5 "], 16 \n\t" \ |
| 68 "sra %[" #O6 "], %[" #O6 "], 16 \n\t" \ |
| 69 "sra %[" #O7 "], %[" #O7 "], 16 \n\t" \ |
| 70 "addu %[" #IO0 "], %[" #IO0 "], %[" #I4 "] \n\t" \ |
| 71 "addu %[" #IO1 "], %[" #IO1 "], %[" #I5 "] \n\t" \ |
| 72 "subu %[" #IO2 "], %[" #IO2 "], %[" #I6 "] \n\t" \ |
| 73 "subu %[" #IO3 "], %[" #IO3 "], %[" #I7 "] \n\t" |
| 74 |
| 75 // O - output |
| 76 // I - input (macro doesn't change it) |
| 77 #define INSERT_HALF_X2(O0, O1, \ |
| 78 I0, I1) \ |
| 79 "ins %[" #O0 "], %[" #I0 "], 16, 16 \n\t" \ |
| 80 "ins %[" #O1 "], %[" #I1 "], 16, 16 \n\t" |
| 81 |
| 82 // O - output |
| 83 // I - input (macro doesn't change it) |
| 84 #define SRA_16(O0, O1, O2, O3, \ |
| 85 I0, I1, I2, I3) \ |
| 86 "sra %[" #O0 "], %[" #I0 "], 16 \n\t" \ |
| 87 "sra %[" #O1 "], %[" #I1 "], 16 \n\t" \ |
| 88 "sra %[" #O2 "], %[" #I2 "], 16 \n\t" \ |
| 89 "sra %[" #O3 "], %[" #I3 "], 16 \n\t" |
| 90 |
| 91 // temp0[31..16 | 15..0] = temp8[31..16 | 15..0] + temp12[31..16 | 15..0] |
| 92 // temp1[31..16 | 15..0] = temp8[31..16 | 15..0] - temp12[31..16 | 15..0] |
| 93 // temp0[31..16 | 15..0] = temp0[31..16 >> 3 | 15..0 >> 3] |
| 94 // temp1[31..16 | 15..0] = temp1[31..16 >> 3 | 15..0 >> 3] |
| 95 // O - output |
| 96 // I - input (macro doesn't change it) |
| 97 #define SHIFT_R_SUM_X2(O0, O1, O2, O3, O4, O5, O6, O7, \ |
| 98 I0, I1, I2, I3, I4, I5, I6, I7) \ |
| 99 "addq.ph %[" #O0 "], %[" #I0 "], %[" #I4 "] \n\t" \ |
| 100 "subq.ph %[" #O1 "], %[" #I0 "], %[" #I4 "] \n\t" \ |
| 101 "addq.ph %[" #O2 "], %[" #I1 "], %[" #I5 "] \n\t" \ |
| 102 "subq.ph %[" #O3 "], %[" #I1 "], %[" #I5 "] \n\t" \ |
| 103 "addq.ph %[" #O4 "], %[" #I2 "], %[" #I6 "] \n\t" \ |
| 104 "subq.ph %[" #O5 "], %[" #I2 "], %[" #I6 "] \n\t" \ |
| 105 "addq.ph %[" #O6 "], %[" #I3 "], %[" #I7 "] \n\t" \ |
| 106 "subq.ph %[" #O7 "], %[" #I3 "], %[" #I7 "] \n\t" \ |
| 107 "shra.ph %[" #O0 "], %[" #O0 "], 3 \n\t" \ |
| 108 "shra.ph %[" #O1 "], %[" #O1 "], 3 \n\t" \ |
| 109 "shra.ph %[" #O2 "], %[" #O2 "], 3 \n\t" \ |
| 110 "shra.ph %[" #O3 "], %[" #O3 "], 3 \n\t" \ |
| 111 "shra.ph %[" #O4 "], %[" #O4 "], 3 \n\t" \ |
| 112 "shra.ph %[" #O5 "], %[" #O5 "], 3 \n\t" \ |
| 113 "shra.ph %[" #O6 "], %[" #O6 "], 3 \n\t" \ |
| 114 "shra.ph %[" #O7 "], %[" #O7 "], 3 \n\t" |
| 115 |
| 116 // precrq.ph.w temp0, temp8, temp2 |
| 117 // temp0 = temp8[31..16] | temp2[31..16] |
| 118 // ins temp2, temp8, 16, 16 |
| 119 // temp2 = temp8[31..16] | temp2[15..0] |
| 120 // O - output |
| 121 // IO - input/output |
| 122 // I - input (macro doesn't change it) |
| 123 #define PACK_2_HALVES_TO_WORD(O0, O1, O2, O3, \ |
| 124 IO0, IO1, IO2, IO3, \ |
| 125 I0, I1, I2, I3) \ |
| 126 "precrq.ph.w %[" #O0 "], %[" #I0 "], %[" #IO0 "] \n\t" \ |
| 127 "precrq.ph.w %[" #O1 "], %[" #I1 "], %[" #IO1 "] \n\t" \ |
| 128 "ins %[" #IO0 "], %[" #I0 "], 16, 16 \n\t" \ |
| 129 "ins %[" #IO1 "], %[" #I1 "], 16, 16 \n\t" \ |
| 130 "precrq.ph.w %[" #O2 "], %[" #I2 "], %[" #IO2 "] \n\t" \ |
| 131 "precrq.ph.w %[" #O3 "], %[" #I3 "], %[" #IO3 "] \n\t" \ |
| 132 "ins %[" #IO2 "], %[" #I2 "], 16, 16 \n\t" \ |
| 133 "ins %[" #IO3 "], %[" #I3 "], 16, 16 \n\t" |
| 134 |
| 135 // preceu.ph.qbr temp0, temp8 |
| 136 // temp0 = 0 | 0 | temp8[23..16] | temp8[7..0] |
| 137 // preceu.ph.qbl temp1, temp8 |
| 138 // temp1 = temp8[23..16] | temp8[7..0] | 0 | 0 |
| 139 // O - output |
| 140 // I - input (macro doesn't change it) |
| 141 #define CONVERT_2_BYTES_TO_HALF(O0, O1, O2, O3, O4, O5, O6, O7, \ |
| 142 I0, I1, I2, I3) \ |
| 143 "preceu.ph.qbr %[" #O0 "], %[" #I0 "] \n\t" \ |
| 144 "preceu.ph.qbl %[" #O1 "], %[" #I0 "] \n\t" \ |
| 145 "preceu.ph.qbr %[" #O2 "], %[" #I1 "] \n\t" \ |
| 146 "preceu.ph.qbl %[" #O3 "], %[" #I1 "] \n\t" \ |
| 147 "preceu.ph.qbr %[" #O4 "], %[" #I2 "] \n\t" \ |
| 148 "preceu.ph.qbl %[" #O5 "], %[" #I2 "] \n\t" \ |
| 149 "preceu.ph.qbr %[" #O6 "], %[" #I3 "] \n\t" \ |
| 150 "preceu.ph.qbl %[" #O7 "], %[" #I3 "] \n\t" |
| 151 |
| 152 // temp0[31..16 | 15..0] = temp0[31..16 | 15..0] + temp8[31..16 | 15..0] |
| 153 // temp0[31..16 | 15..0] = temp0[31..16 <<(s) 7 | 15..0 <<(s) 7] |
| 154 // temp1..temp7 same as temp0 |
| 155 // precrqu_s.qb.ph temp0, temp1, temp0: |
| 156 // temp0 = temp1[31..24] | temp1[15..8] | temp0[31..24] | temp0[15..8] |
| 157 // store temp0 to dst |
| 158 // IO - input/output |
| 159 // I - input (macro doesn't change it) |
| 160 #define STORE_SAT_SUM_X2(IO0, IO1, IO2, IO3, IO4, IO5, IO6, IO7, \ |
| 161 I0, I1, I2, I3, I4, I5, I6, I7, \ |
| 162 I8, I9, I10, I11, I12, I13) \ |
| 163 "addq.ph %[" #IO0 "], %[" #IO0 "], %[" #I0 "] \n\t" \ |
| 164 "addq.ph %[" #IO1 "], %[" #IO1 "], %[" #I1 "] \n\t" \ |
| 165 "addq.ph %[" #IO2 "], %[" #IO2 "], %[" #I2 "] \n\t" \ |
| 166 "addq.ph %[" #IO3 "], %[" #IO3 "], %[" #I3 "] \n\t" \ |
| 167 "addq.ph %[" #IO4 "], %[" #IO4 "], %[" #I4 "] \n\t" \ |
| 168 "addq.ph %[" #IO5 "], %[" #IO5 "], %[" #I5 "] \n\t" \ |
| 169 "addq.ph %[" #IO6 "], %[" #IO6 "], %[" #I6 "] \n\t" \ |
| 170 "addq.ph %[" #IO7 "], %[" #IO7 "], %[" #I7 "] \n\t" \ |
| 171 "shll_s.ph %[" #IO0 "], %[" #IO0 "], 7 \n\t" \ |
| 172 "shll_s.ph %[" #IO1 "], %[" #IO1 "], 7 \n\t" \ |
| 173 "shll_s.ph %[" #IO2 "], %[" #IO2 "], 7 \n\t" \ |
| 174 "shll_s.ph %[" #IO3 "], %[" #IO3 "], 7 \n\t" \ |
| 175 "shll_s.ph %[" #IO4 "], %[" #IO4 "], 7 \n\t" \ |
| 176 "shll_s.ph %[" #IO5 "], %[" #IO5 "], 7 \n\t" \ |
| 177 "shll_s.ph %[" #IO6 "], %[" #IO6 "], 7 \n\t" \ |
| 178 "shll_s.ph %[" #IO7 "], %[" #IO7 "], 7 \n\t" \ |
| 179 "precrqu_s.qb.ph %[" #IO0 "], %[" #IO1 "], %[" #IO0 "] \n\t" \ |
| 180 "precrqu_s.qb.ph %[" #IO2 "], %[" #IO3 "], %[" #IO2 "] \n\t" \ |
| 181 "precrqu_s.qb.ph %[" #IO4 "], %[" #IO5 "], %[" #IO4 "] \n\t" \ |
| 182 "precrqu_s.qb.ph %[" #IO6 "], %[" #IO7 "], %[" #IO6 "] \n\t" \ |
| 183 "usw %[" #IO0 "], " XSTR(I13) "*" #I9 "(%[" #I8 "]) \n\t" \ |
| 184 "usw %[" #IO2 "], " XSTR(I13) "*" #I10 "(%[" #I8 "]) \n\t" \ |
| 185 "usw %[" #IO4 "], " XSTR(I13) "*" #I11 "(%[" #I8 "]) \n\t" \ |
| 186 "usw %[" #IO6 "], " XSTR(I13) "*" #I12 "(%[" #I8 "]) \n\t" |
| 187 |
| 188 #define OUTPUT_EARLY_CLOBBER_REGS_10() \ |
| 189 : [temp1]"=&r"(temp1), [temp2]"=&r"(temp2), [temp3]"=&r"(temp3), \ |
| 190 [temp4]"=&r"(temp4), [temp5]"=&r"(temp5), [temp6]"=&r"(temp6), \ |
| 191 [temp7]"=&r"(temp7), [temp8]"=&r"(temp8), [temp9]"=&r"(temp9), \ |
| 192 [temp10]"=&r"(temp10) |
| 193 |
| 194 #define OUTPUT_EARLY_CLOBBER_REGS_18() \ |
| 195 OUTPUT_EARLY_CLOBBER_REGS_10(), \ |
| 196 [temp11]"=&r"(temp11), [temp12]"=&r"(temp12), [temp13]"=&r"(temp13), \ |
| 197 [temp14]"=&r"(temp14), [temp15]"=&r"(temp15), [temp16]"=&r"(temp16), \ |
| 198 [temp17]"=&r"(temp17), [temp18]"=&r"(temp18) |
| 199 |
| 200 #endif // WEBP_DSP_MIPS_MACRO_H_ |
OLD | NEW |