OLD | NEW |
(Empty) | |
| 1 /* |
| 2 * MIPS DSPr2 optimizations for libjpeg-turbo |
| 3 * |
| 4 * Copyright (C) 2013, MIPS Technologies, Inc., California. |
| 5 * All rights reserved. |
| 6 * Authors: Teodora Novkovic (teodora.novkovic@imgtec.com) |
| 7 * Darko Laus (darko.laus@imgtec.com) |
| 8 * This software is provided 'as-is', without any express or implied |
| 9 * warranty. In no event will the authors be held liable for any damages |
| 10 * arising from the use of this software. |
| 11 * |
| 12 * Permission is granted to anyone to use this software for any purpose, |
| 13 * including commercial applications, and to alter it and redistribute it |
| 14 * freely, subject to the following restrictions: |
| 15 * |
| 16 * 1. The origin of this software must not be misrepresented; you must not |
| 17 * claim that you wrote the original software. If you use this software |
| 18 * in a product, an acknowledgment in the product documentation would be |
| 19 * appreciated but is not required. |
| 20 * 2. Altered source versions must be plainly marked as such, and must not be |
| 21 * misrepresented as being the original software. |
| 22 * 3. This notice may not be removed or altered from any source distribution. |
| 23 */ |
| 24 |
| 25 #define zero $0 |
| 26 #define AT $1 |
| 27 #define v0 $2 |
| 28 #define v1 $3 |
| 29 #define a0 $4 |
| 30 #define a1 $5 |
| 31 #define a2 $6 |
| 32 #define a3 $7 |
| 33 #define t0 $8 |
| 34 #define t1 $9 |
| 35 #define t2 $10 |
| 36 #define t3 $11 |
| 37 #define t4 $12 |
| 38 #define t5 $13 |
| 39 #define t6 $14 |
| 40 #define t7 $15 |
| 41 #define s0 $16 |
| 42 #define s1 $17 |
| 43 #define s2 $18 |
| 44 #define s3 $19 |
| 45 #define s4 $20 |
| 46 #define s5 $21 |
| 47 #define s6 $22 |
| 48 #define s7 $23 |
| 49 #define t8 $24 |
| 50 #define t9 $25 |
| 51 #define k0 $26 |
| 52 #define k1 $27 |
| 53 #define gp $28 |
| 54 #define sp $29 |
| 55 #define fp $30 |
| 56 #define s8 $30 |
| 57 #define ra $31 |
| 58 |
| 59 #define f0 $f0 |
| 60 #define f1 $f1 |
| 61 #define f2 $f2 |
| 62 #define f3 $f3 |
| 63 #define f4 $f4 |
| 64 #define f5 $f5 |
| 65 #define f6 $f6 |
| 66 #define f7 $f7 |
| 67 #define f8 $f8 |
| 68 #define f9 $f9 |
| 69 #define f10 $f10 |
| 70 #define f11 $f11 |
| 71 #define f12 $f12 |
| 72 #define f13 $f13 |
| 73 #define f14 $f14 |
| 74 #define f15 $f15 |
| 75 #define f16 $f16 |
| 76 #define f17 $f17 |
| 77 #define f18 $f18 |
| 78 #define f19 $f19 |
| 79 #define f20 $f20 |
| 80 #define f21 $f21 |
| 81 #define f22 $f22 |
| 82 #define f23 $f23 |
| 83 #define f24 $f24 |
| 84 #define f25 $f25 |
| 85 #define f26 $f26 |
| 86 #define f27 $f27 |
| 87 #define f28 $f28 |
| 88 #define f29 $f29 |
| 89 #define f30 $f30 |
| 90 #define f31 $f31 |
| 91 |
| 92 /* |
| 93 * LEAF_MIPS32R2 - declare leaf routine for MIPS32r2 |
| 94 */ |
| 95 #define LEAF_MIPS32R2(symbol) \ |
| 96 .globl symbol; \ |
| 97 .align 2; \ |
| 98 .type symbol, @function; \ |
| 99 .ent symbol, 0; \ |
| 100 symbol: .frame sp, 0, ra; \ |
| 101 .set push; \ |
| 102 .set arch=mips32r2; \ |
| 103 .set noreorder; \ |
| 104 .set noat; |
| 105 |
| 106 /* |
| 107 * LEAF_MIPS_DSPR2 - declare leaf routine for MIPS DSPr2 |
| 108 */ |
| 109 #define LEAF_MIPS_DSPR2(symbol) \ |
| 110 LEAF_MIPS32R2(symbol) \ |
| 111 .set dspr2; |
| 112 |
| 113 /* |
| 114 * END - mark end of function |
| 115 */ |
| 116 #define END(function) \ |
| 117 .set pop; \ |
| 118 .end function; \ |
| 119 .size function,.-function |
| 120 |
| 121 /* |
| 122 * Checks if stack offset is big enough for storing/restoring regs_num |
| 123 * number of register to/from stack. Stack offset must be greater than |
| 124 * or equal to the number of bytes needed for storing registers (regs_num*4). |
| 125 * Since MIPS ABI allows usage of first 16 bytes of stack frame (this is |
| 126 * preserved for input arguments of the functions, already stored in a0-a3), |
| 127 * stack size can be further optimized by utilizing this space. |
| 128 */ |
| 129 .macro CHECK_STACK_OFFSET regs_num, stack_offset |
| 130 .if \stack_offset < \regs_num * 4 - 16 |
| 131 .error "Stack offset too small." |
| 132 .endif |
| 133 .endm |
| 134 |
| 135 /* |
| 136 * Saves set of registers on stack. Maximum number of registers that |
| 137 * can be saved on stack is limitted to 14 (a0-a3, v0-v1 and s0-s7). |
| 138 * Stack offset is number of bytes that are added to stack pointer (sp) |
| 139 * before registers are pushed in order to provide enough space on stack |
| 140 * (offset must be multiple of 4, and must be big enough, as described by |
| 141 * CHECK_STACK_OFFSET macro). This macro is intended to be used in |
| 142 * combination with RESTORE_REGS_FROM_STACK macro. Example: |
| 143 * SAVE_REGS_ON_STACK 4, v0, v1, s0, s1 |
| 144 * RESTORE_REGS_FROM_STACK 4, v0, v1, s0, s1 |
| 145 */ |
| 146 .macro SAVE_REGS_ON_STACK stack_offset = 0, r1, \ |
| 147 r2 = 0, r3 = 0, r4 = 0, \ |
| 148 r5 = 0, r6 = 0, r7 = 0, \ |
| 149 r8 = 0, r9 = 0, r10 = 0, \ |
| 150 r11 = 0, r12 = 0, r13 = 0, \ |
| 151 r14 = 0 |
| 152 .if (\stack_offset < 0) || (\stack_offset - (\stack_offset / 4) * 4) |
| 153 .error "Stack offset must be pozitive and multiple of 4." |
| 154 .endif |
| 155 .if \stack_offset != 0 |
| 156 addiu sp, sp, -\stack_offset |
| 157 .endif |
| 158 sw \r1, 0(sp) |
| 159 .if \r2 != 0 |
| 160 sw \r2, 4(sp) |
| 161 .endif |
| 162 .if \r3 != 0 |
| 163 sw \r3, 8(sp) |
| 164 .endif |
| 165 .if \r4 != 0 |
| 166 sw \r4, 12(sp) |
| 167 .endif |
| 168 .if \r5 != 0 |
| 169 CHECK_STACK_OFFSET 5, \stack_offset |
| 170 sw \r5, 16(sp) |
| 171 .endif |
| 172 .if \r6 != 0 |
| 173 CHECK_STACK_OFFSET 6, \stack_offset |
| 174 sw \r6, 20(sp) |
| 175 .endif |
| 176 .if \r7 != 0 |
| 177 CHECK_STACK_OFFSET 7, \stack_offset |
| 178 sw \r7, 24(sp) |
| 179 .endif |
| 180 .if \r8 != 0 |
| 181 CHECK_STACK_OFFSET 8, \stack_offset |
| 182 sw \r8, 28(sp) |
| 183 .endif |
| 184 .if \r9 != 0 |
| 185 CHECK_STACK_OFFSET 9, \stack_offset |
| 186 sw \r9, 32(sp) |
| 187 .endif |
| 188 .if \r10 != 0 |
| 189 CHECK_STACK_OFFSET 10, \stack_offset |
| 190 sw \r10, 36(sp) |
| 191 .endif |
| 192 .if \r11 != 0 |
| 193 CHECK_STACK_OFFSET 11, \stack_offset |
| 194 sw \r11, 40(sp) |
| 195 .endif |
| 196 .if \r12 != 0 |
| 197 CHECK_STACK_OFFSET 12, \stack_offset |
| 198 sw \r12, 44(sp) |
| 199 .endif |
| 200 .if \r13 != 0 |
| 201 CHECK_STACK_OFFSET 13, \stack_offset |
| 202 sw \r13, 48(sp) |
| 203 .endif |
| 204 .if \r14 != 0 |
| 205 CHECK_STACK_OFFSET 14, \stack_offset |
| 206 sw \r14, 52(sp) |
| 207 .endif |
| 208 .endm |
| 209 |
| 210 /* |
| 211 * Restores set of registers from stack. Maximum number of registers that |
| 212 * can be restored from stack is limitted to 14 (a0-a3, v0-v1 and s0-s7). |
| 213 * Stack offset is number of bytes that are added to stack pointer (sp) |
| 214 * after registers are restored (offset must be multiple of 4, and must |
| 215 * be big enough, as described by CHECK_STACK_OFFSET macro). This macro is |
| 216 * intended to be used in combination with RESTORE_REGS_FROM_STACK macro. |
| 217 * Example: |
| 218 * SAVE_REGS_ON_STACK 4, v0, v1, s0, s1 |
| 219 * RESTORE_REGS_FROM_STACK 4, v0, v1, s0, s1 |
| 220 */ |
| 221 .macro RESTORE_REGS_FROM_STACK stack_offset = 0, r1, \ |
| 222 r2 = 0, r3 = 0, r4 = 0, \ |
| 223 r5 = 0, r6 = 0, r7 = 0, \ |
| 224 r8 = 0, r9 = 0, r10 = 0, \ |
| 225 r11 = 0, r12 = 0, r13 = 0, \ |
| 226 r14 = 0 |
| 227 .if (\stack_offset < 0) || (\stack_offset - (\stack_offset/4)*4) |
| 228 .error "Stack offset must be pozitive and multiple of 4." |
| 229 .endif |
| 230 lw \r1, 0(sp) |
| 231 .if \r2 != 0 |
| 232 lw \r2, 4(sp) |
| 233 .endif |
| 234 .if \r3 != 0 |
| 235 lw \r3, 8(sp) |
| 236 .endif |
| 237 .if \r4 != 0 |
| 238 lw \r4, 12(sp) |
| 239 .endif |
| 240 .if \r5 != 0 |
| 241 CHECK_STACK_OFFSET 5, \stack_offset |
| 242 lw \r5, 16(sp) |
| 243 .endif |
| 244 .if \r6 != 0 |
| 245 CHECK_STACK_OFFSET 6, \stack_offset |
| 246 lw \r6, 20(sp) |
| 247 .endif |
| 248 .if \r7 != 0 |
| 249 CHECK_STACK_OFFSET 7, \stack_offset |
| 250 lw \r7, 24(sp) |
| 251 .endif |
| 252 .if \r8 != 0 |
| 253 CHECK_STACK_OFFSET 8, \stack_offset |
| 254 lw \r8, 28(sp) |
| 255 .endif |
| 256 .if \r9 != 0 |
| 257 CHECK_STACK_OFFSET 9, \stack_offset |
| 258 lw \r9, 32(sp) |
| 259 .endif |
| 260 .if \r10 != 0 |
| 261 CHECK_STACK_OFFSET 10, \stack_offset |
| 262 lw \r10, 36(sp) |
| 263 .endif |
| 264 .if \r11 != 0 |
| 265 CHECK_STACK_OFFSET 11, \stack_offset |
| 266 lw \r11, 40(sp) |
| 267 .endif |
| 268 .if \r12 != 0 |
| 269 CHECK_STACK_OFFSET 12, \stack_offset |
| 270 lw \r12, 44(sp) |
| 271 .endif |
| 272 .if \r13 != 0 |
| 273 CHECK_STACK_OFFSET 13, \stack_offset |
| 274 lw \r13, 48(sp) |
| 275 .endif |
| 276 .if \r14 != 0 |
| 277 CHECK_STACK_OFFSET 14, \stack_offset |
| 278 lw \r14, 52(sp) |
| 279 .endif |
| 280 .if \stack_offset != 0 |
| 281 addiu sp, sp, \stack_offset |
| 282 .endif |
| 283 .endm |
| 284 |
| 285 |
OLD | NEW |