| OLD | NEW | 
| (Empty) |  | 
 |    1 /* | 
 |    2  * MIPS DSPr2 optimizations for libjpeg-turbo | 
 |    3  * | 
 |    4  * Copyright (C) 2013, MIPS Technologies, Inc., California. | 
 |    5  * All rights reserved. | 
 |    6  * Authors:  Teodora Novkovic (teodora.novkovic@imgtec.com) | 
 |    7  *           Darko Laus       (darko.laus@imgtec.com) | 
 |    8  * This software is provided 'as-is', without any express or implied | 
 |    9  * warranty.  In no event will the authors be held liable for any damages | 
 |   10  * arising from the use of this software. | 
 |   11  * | 
 |   12  * Permission is granted to anyone to use this software for any purpose, | 
 |   13  * including commercial applications, and to alter it and redistribute it | 
 |   14  * freely, subject to the following restrictions: | 
 |   15  * | 
 |   16  * 1. The origin of this software must not be misrepresented; you must not | 
 |   17  *    claim that you wrote the original software. If you use this software | 
 |   18  *    in a product, an acknowledgment in the product documentation would be | 
 |   19  *    appreciated but is not required. | 
 |   20  * 2. Altered source versions must be plainly marked as such, and must not be | 
 |   21  *    misrepresented as being the original software. | 
 |   22  * 3. This notice may not be removed or altered from any source distribution. | 
 |   23  */ | 
 |   24  | 
 |   25 #define zero $0 | 
 |   26 #define AT   $1 | 
 |   27 #define v0   $2 | 
 |   28 #define v1   $3 | 
 |   29 #define a0   $4 | 
 |   30 #define a1   $5 | 
 |   31 #define a2   $6 | 
 |   32 #define a3   $7 | 
 |   33 #define t0   $8 | 
 |   34 #define t1   $9 | 
 |   35 #define t2   $10 | 
 |   36 #define t3   $11 | 
 |   37 #define t4   $12 | 
 |   38 #define t5   $13 | 
 |   39 #define t6   $14 | 
 |   40 #define t7   $15 | 
 |   41 #define s0   $16 | 
 |   42 #define s1   $17 | 
 |   43 #define s2   $18 | 
 |   44 #define s3   $19 | 
 |   45 #define s4   $20 | 
 |   46 #define s5   $21 | 
 |   47 #define s6   $22 | 
 |   48 #define s7   $23 | 
 |   49 #define t8   $24 | 
 |   50 #define t9   $25 | 
 |   51 #define k0   $26 | 
 |   52 #define k1   $27 | 
 |   53 #define gp   $28 | 
 |   54 #define sp   $29 | 
 |   55 #define fp   $30 | 
 |   56 #define s8   $30 | 
 |   57 #define ra   $31 | 
 |   58  | 
 |   59 #define f0   $f0 | 
 |   60 #define f1   $f1 | 
 |   61 #define f2   $f2 | 
 |   62 #define f3   $f3 | 
 |   63 #define f4   $f4 | 
 |   64 #define f5   $f5 | 
 |   65 #define f6   $f6 | 
 |   66 #define f7   $f7 | 
 |   67 #define f8   $f8 | 
 |   68 #define f9   $f9 | 
 |   69 #define f10  $f10 | 
 |   70 #define f11  $f11 | 
 |   71 #define f12  $f12 | 
 |   72 #define f13  $f13 | 
 |   73 #define f14  $f14 | 
 |   74 #define f15  $f15 | 
 |   75 #define f16  $f16 | 
 |   76 #define f17  $f17 | 
 |   77 #define f18  $f18 | 
 |   78 #define f19  $f19 | 
 |   79 #define f20  $f20 | 
 |   80 #define f21  $f21 | 
 |   81 #define f22  $f22 | 
 |   82 #define f23  $f23 | 
 |   83 #define f24  $f24 | 
 |   84 #define f25  $f25 | 
 |   85 #define f26  $f26 | 
 |   86 #define f27  $f27 | 
 |   87 #define f28  $f28 | 
 |   88 #define f29  $f29 | 
 |   89 #define f30  $f30 | 
 |   90 #define f31  $f31 | 
 |   91  | 
 |   92 /* | 
 |   93  * LEAF_MIPS32R2 - declare leaf routine for MIPS32r2 | 
 |   94  */ | 
 |   95 #define LEAF_MIPS32R2(symbol)                           \ | 
 |   96                 .globl  symbol;                         \ | 
 |   97                 .align  2;                              \ | 
 |   98                 .type   symbol, @function;              \ | 
 |   99                 .ent    symbol, 0;                      \ | 
 |  100 symbol:         .frame  sp, 0, ra;                      \ | 
 |  101                 .set    push;                           \ | 
 |  102                 .set    arch=mips32r2;                  \ | 
 |  103                 .set    noreorder;                      \ | 
 |  104                 .set    noat; | 
 |  105  | 
 |  106 /* | 
 |  107  * LEAF_MIPS_DSPR2 - declare leaf routine for MIPS DSPr2 | 
 |  108  */ | 
 |  109 #define LEAF_MIPS_DSPR2(symbol)                         \ | 
 |  110 LEAF_MIPS32R2(symbol)                                   \ | 
 |  111                 .set    dspr2; | 
 |  112  | 
 |  113 /* | 
 |  114  * END - mark end of function | 
 |  115  */ | 
 |  116 #define END(function)                                   \ | 
 |  117                 .set    pop;                            \ | 
 |  118                 .end    function;                       \ | 
 |  119                 .size   function,.-function | 
 |  120  | 
 |  121 /* | 
 |  122  * Checks if stack offset is big enough for storing/restoring regs_num | 
 |  123  * number of register to/from stack. Stack offset must be greater than | 
 |  124  * or equal to the number of bytes needed for storing registers (regs_num*4). | 
 |  125  * Since MIPS ABI allows usage of first 16 bytes of stack frame (this is | 
 |  126  * preserved for input arguments of the functions, already stored in a0-a3), | 
 |  127  * stack size can be further optimized by utilizing this space. | 
 |  128  */ | 
 |  129 .macro CHECK_STACK_OFFSET regs_num, stack_offset | 
 |  130 .if \stack_offset < \regs_num * 4 - 16 | 
 |  131 .error "Stack offset too small." | 
 |  132 .endif | 
 |  133 .endm | 
 |  134  | 
 |  135 /* | 
 |  136  * Saves set of registers on stack. Maximum number of registers that | 
 |  137  * can be saved on stack is limitted to 14 (a0-a3, v0-v1 and s0-s7). | 
 |  138  * Stack offset is number of bytes that are added to stack pointer (sp) | 
 |  139  * before registers are pushed in order to provide enough space on stack | 
 |  140  * (offset must be multiple of 4, and must be big enough, as described by | 
 |  141  * CHECK_STACK_OFFSET macro). This macro is intended to be used in | 
 |  142  * combination with RESTORE_REGS_FROM_STACK macro. Example: | 
 |  143  *  SAVE_REGS_ON_STACK      4, v0, v1, s0, s1 | 
 |  144  *  RESTORE_REGS_FROM_STACK 4, v0, v1, s0, s1 | 
 |  145  */ | 
 |  146 .macro SAVE_REGS_ON_STACK stack_offset = 0, r1, \ | 
 |  147                           r2  = 0, r3  = 0, r4  = 0, \ | 
 |  148                           r5  = 0, r6  = 0, r7  = 0, \ | 
 |  149                           r8  = 0, r9  = 0, r10 = 0, \ | 
 |  150                           r11 = 0, r12 = 0, r13 = 0, \ | 
 |  151                           r14 = 0 | 
 |  152     .if (\stack_offset < 0) || (\stack_offset - (\stack_offset / 4) * 4) | 
 |  153     .error "Stack offset must be pozitive and multiple of 4." | 
 |  154     .endif | 
 |  155     .if \stack_offset != 0 | 
 |  156     addiu           sp, sp, -\stack_offset | 
 |  157     .endif | 
 |  158     sw              \r1, 0(sp) | 
 |  159     .if \r2 != 0 | 
 |  160     sw              \r2, 4(sp) | 
 |  161     .endif | 
 |  162     .if \r3 != 0 | 
 |  163     sw              \r3, 8(sp) | 
 |  164     .endif | 
 |  165     .if \r4 != 0 | 
 |  166     sw              \r4, 12(sp) | 
 |  167     .endif | 
 |  168     .if \r5 != 0 | 
 |  169     CHECK_STACK_OFFSET 5, \stack_offset | 
 |  170     sw              \r5, 16(sp) | 
 |  171     .endif | 
 |  172     .if \r6 != 0 | 
 |  173     CHECK_STACK_OFFSET 6, \stack_offset | 
 |  174     sw              \r6, 20(sp) | 
 |  175     .endif | 
 |  176     .if \r7 != 0 | 
 |  177     CHECK_STACK_OFFSET 7, \stack_offset | 
 |  178     sw              \r7, 24(sp) | 
 |  179     .endif | 
 |  180     .if \r8 != 0 | 
 |  181     CHECK_STACK_OFFSET 8, \stack_offset | 
 |  182     sw              \r8, 28(sp) | 
 |  183     .endif | 
 |  184     .if \r9 != 0 | 
 |  185     CHECK_STACK_OFFSET 9, \stack_offset | 
 |  186     sw              \r9, 32(sp) | 
 |  187     .endif | 
 |  188     .if \r10 != 0 | 
 |  189     CHECK_STACK_OFFSET 10, \stack_offset | 
 |  190     sw              \r10, 36(sp) | 
 |  191     .endif | 
 |  192     .if \r11 != 0 | 
 |  193     CHECK_STACK_OFFSET 11, \stack_offset | 
 |  194     sw              \r11, 40(sp) | 
 |  195     .endif | 
 |  196     .if \r12 != 0 | 
 |  197     CHECK_STACK_OFFSET 12, \stack_offset | 
 |  198     sw              \r12, 44(sp) | 
 |  199     .endif | 
 |  200     .if \r13 != 0 | 
 |  201     CHECK_STACK_OFFSET 13, \stack_offset | 
 |  202     sw              \r13, 48(sp) | 
 |  203     .endif | 
 |  204     .if \r14 != 0 | 
 |  205     CHECK_STACK_OFFSET 14, \stack_offset | 
 |  206     sw              \r14, 52(sp) | 
 |  207     .endif | 
 |  208 .endm | 
 |  209  | 
 |  210 /* | 
 |  211  * Restores set of registers from stack. Maximum number of registers that | 
 |  212  * can be restored from stack is limitted to 14 (a0-a3, v0-v1 and s0-s7). | 
 |  213  * Stack offset is number of bytes that are added to stack pointer (sp) | 
 |  214  * after registers are restored (offset must be multiple of 4, and must | 
 |  215  * be big enough, as described by CHECK_STACK_OFFSET macro). This macro is | 
 |  216  * intended to be used in combination with RESTORE_REGS_FROM_STACK macro. | 
 |  217  * Example: | 
 |  218  *  SAVE_REGS_ON_STACK      4, v0, v1, s0, s1 | 
 |  219  *  RESTORE_REGS_FROM_STACK 4, v0, v1, s0, s1 | 
 |  220  */ | 
 |  221 .macro RESTORE_REGS_FROM_STACK stack_offset = 0, r1, \ | 
 |  222                                r2  = 0, r3  = 0, r4  = 0, \ | 
 |  223                                r5  = 0, r6  = 0, r7  = 0, \ | 
 |  224                                r8  = 0, r9  = 0, r10 = 0, \ | 
 |  225                                r11 = 0, r12 = 0, r13 = 0, \ | 
 |  226                                r14 = 0 | 
 |  227     .if (\stack_offset < 0) || (\stack_offset - (\stack_offset/4)*4) | 
 |  228     .error "Stack offset must be pozitive and multiple of 4." | 
 |  229     .endif | 
 |  230     lw              \r1, 0(sp) | 
 |  231     .if \r2 != 0 | 
 |  232     lw              \r2, 4(sp) | 
 |  233     .endif | 
 |  234     .if \r3 != 0 | 
 |  235     lw              \r3, 8(sp) | 
 |  236     .endif | 
 |  237     .if \r4 != 0 | 
 |  238     lw              \r4, 12(sp) | 
 |  239     .endif | 
 |  240     .if \r5 != 0 | 
 |  241     CHECK_STACK_OFFSET 5, \stack_offset | 
 |  242     lw              \r5, 16(sp) | 
 |  243     .endif | 
 |  244     .if \r6 != 0 | 
 |  245     CHECK_STACK_OFFSET 6, \stack_offset | 
 |  246     lw              \r6, 20(sp) | 
 |  247     .endif | 
 |  248     .if \r7 != 0 | 
 |  249     CHECK_STACK_OFFSET 7, \stack_offset | 
 |  250     lw              \r7, 24(sp) | 
 |  251     .endif | 
 |  252     .if \r8 != 0 | 
 |  253     CHECK_STACK_OFFSET 8, \stack_offset | 
 |  254     lw              \r8, 28(sp) | 
 |  255     .endif | 
 |  256     .if \r9 != 0 | 
 |  257     CHECK_STACK_OFFSET 9, \stack_offset | 
 |  258     lw              \r9, 32(sp) | 
 |  259     .endif | 
 |  260     .if \r10 != 0 | 
 |  261     CHECK_STACK_OFFSET 10, \stack_offset | 
 |  262     lw              \r10, 36(sp) | 
 |  263     .endif | 
 |  264     .if \r11 != 0 | 
 |  265     CHECK_STACK_OFFSET 11, \stack_offset | 
 |  266     lw              \r11, 40(sp) | 
 |  267     .endif | 
 |  268     .if \r12 != 0 | 
 |  269     CHECK_STACK_OFFSET 12, \stack_offset | 
 |  270     lw              \r12, 44(sp) | 
 |  271     .endif | 
 |  272     .if \r13 != 0 | 
 |  273     CHECK_STACK_OFFSET 13, \stack_offset | 
 |  274     lw              \r13, 48(sp) | 
 |  275     .endif | 
 |  276     .if \r14 != 0 | 
 |  277     CHECK_STACK_OFFSET 14, \stack_offset | 
 |  278     lw              \r14, 52(sp) | 
 |  279     .endif | 
 |  280     .if \stack_offset != 0 | 
 |  281     addiu           sp, sp, \stack_offset | 
 |  282     .endif | 
 |  283 .endm | 
 |  284  | 
 |  285  | 
| OLD | NEW |