Index: simd/jsimd_mips_dspr2_asm.h |
diff --git a/simd/jsimd_mips_dspr2_asm.h b/simd/jsimd_mips_dspr2_asm.h |
new file mode 100644 |
index 0000000000000000000000000000000000000000..50ec31bfbb65927f2b6916fd63cee7b2179015be |
--- /dev/null |
+++ b/simd/jsimd_mips_dspr2_asm.h |
@@ -0,0 +1,285 @@ |
+/* |
+ * MIPS DSPr2 optimizations for libjpeg-turbo |
+ * |
+ * Copyright (C) 2013, MIPS Technologies, Inc., California. |
+ * All rights reserved. |
+ * Authors: Teodora Novkovic (teodora.novkovic@imgtec.com) |
+ * Darko Laus (darko.laus@imgtec.com) |
+ * This software is provided 'as-is', without any express or implied |
+ * warranty. In no event will the authors be held liable for any damages |
+ * arising from the use of this software. |
+ * |
+ * Permission is granted to anyone to use this software for any purpose, |
+ * including commercial applications, and to alter it and redistribute it |
+ * freely, subject to the following restrictions: |
+ * |
+ * 1. The origin of this software must not be misrepresented; you must not |
+ * claim that you wrote the original software. If you use this software |
+ * in a product, an acknowledgment in the product documentation would be |
+ * appreciated but is not required. |
+ * 2. Altered source versions must be plainly marked as such, and must not be |
+ * misrepresented as being the original software. |
+ * 3. This notice may not be removed or altered from any source distribution. |
+ */ |
+ |
+#define zero $0 |
+#define AT $1 |
+#define v0 $2 |
+#define v1 $3 |
+#define a0 $4 |
+#define a1 $5 |
+#define a2 $6 |
+#define a3 $7 |
+#define t0 $8 |
+#define t1 $9 |
+#define t2 $10 |
+#define t3 $11 |
+#define t4 $12 |
+#define t5 $13 |
+#define t6 $14 |
+#define t7 $15 |
+#define s0 $16 |
+#define s1 $17 |
+#define s2 $18 |
+#define s3 $19 |
+#define s4 $20 |
+#define s5 $21 |
+#define s6 $22 |
+#define s7 $23 |
+#define t8 $24 |
+#define t9 $25 |
+#define k0 $26 |
+#define k1 $27 |
+#define gp $28 |
+#define sp $29 |
+#define fp $30 |
+#define s8 $30 |
+#define ra $31 |
+ |
+#define f0 $f0 |
+#define f1 $f1 |
+#define f2 $f2 |
+#define f3 $f3 |
+#define f4 $f4 |
+#define f5 $f5 |
+#define f6 $f6 |
+#define f7 $f7 |
+#define f8 $f8 |
+#define f9 $f9 |
+#define f10 $f10 |
+#define f11 $f11 |
+#define f12 $f12 |
+#define f13 $f13 |
+#define f14 $f14 |
+#define f15 $f15 |
+#define f16 $f16 |
+#define f17 $f17 |
+#define f18 $f18 |
+#define f19 $f19 |
+#define f20 $f20 |
+#define f21 $f21 |
+#define f22 $f22 |
+#define f23 $f23 |
+#define f24 $f24 |
+#define f25 $f25 |
+#define f26 $f26 |
+#define f27 $f27 |
+#define f28 $f28 |
+#define f29 $f29 |
+#define f30 $f30 |
+#define f31 $f31 |
+ |
+/* |
+ * LEAF_MIPS32R2 - declare leaf routine for MIPS32r2 |
+ */ |
+#define LEAF_MIPS32R2(symbol) \ |
+ .globl symbol; \ |
+ .align 2; \ |
+ .type symbol, @function; \ |
+ .ent symbol, 0; \ |
+symbol: .frame sp, 0, ra; \ |
+ .set push; \ |
+ .set arch=mips32r2; \ |
+ .set noreorder; \ |
+ .set noat; |
+ |
+/* |
+ * LEAF_MIPS_DSPR2 - declare leaf routine for MIPS DSPr2 |
+ */ |
+#define LEAF_MIPS_DSPR2(symbol) \ |
+LEAF_MIPS32R2(symbol) \ |
+ .set dspr2; |
+ |
+/* |
+ * END - mark end of function |
+ */ |
+#define END(function) \ |
+ .set pop; \ |
+ .end function; \ |
+ .size function,.-function |
+ |
+/* |
+ * Checks if stack offset is big enough for storing/restoring regs_num |
+ * number of register to/from stack. Stack offset must be greater than |
+ * or equal to the number of bytes needed for storing registers (regs_num*4). |
+ * Since MIPS ABI allows usage of first 16 bytes of stack frame (this is |
+ * preserved for input arguments of the functions, already stored in a0-a3), |
+ * stack size can be further optimized by utilizing this space. |
+ */ |
+.macro CHECK_STACK_OFFSET regs_num, stack_offset |
+.if \stack_offset < \regs_num * 4 - 16 |
+.error "Stack offset too small." |
+.endif |
+.endm |
+ |
+/* |
+ * Saves set of registers on stack. Maximum number of registers that |
+ * can be saved on stack is limitted to 14 (a0-a3, v0-v1 and s0-s7). |
+ * Stack offset is number of bytes that are added to stack pointer (sp) |
+ * before registers are pushed in order to provide enough space on stack |
+ * (offset must be multiple of 4, and must be big enough, as described by |
+ * CHECK_STACK_OFFSET macro). This macro is intended to be used in |
+ * combination with RESTORE_REGS_FROM_STACK macro. Example: |
+ * SAVE_REGS_ON_STACK 4, v0, v1, s0, s1 |
+ * RESTORE_REGS_FROM_STACK 4, v0, v1, s0, s1 |
+ */ |
+.macro SAVE_REGS_ON_STACK stack_offset = 0, r1, \ |
+ r2 = 0, r3 = 0, r4 = 0, \ |
+ r5 = 0, r6 = 0, r7 = 0, \ |
+ r8 = 0, r9 = 0, r10 = 0, \ |
+ r11 = 0, r12 = 0, r13 = 0, \ |
+ r14 = 0 |
+ .if (\stack_offset < 0) || (\stack_offset - (\stack_offset / 4) * 4) |
+ .error "Stack offset must be pozitive and multiple of 4." |
+ .endif |
+ .if \stack_offset != 0 |
+ addiu sp, sp, -\stack_offset |
+ .endif |
+ sw \r1, 0(sp) |
+ .if \r2 != 0 |
+ sw \r2, 4(sp) |
+ .endif |
+ .if \r3 != 0 |
+ sw \r3, 8(sp) |
+ .endif |
+ .if \r4 != 0 |
+ sw \r4, 12(sp) |
+ .endif |
+ .if \r5 != 0 |
+ CHECK_STACK_OFFSET 5, \stack_offset |
+ sw \r5, 16(sp) |
+ .endif |
+ .if \r6 != 0 |
+ CHECK_STACK_OFFSET 6, \stack_offset |
+ sw \r6, 20(sp) |
+ .endif |
+ .if \r7 != 0 |
+ CHECK_STACK_OFFSET 7, \stack_offset |
+ sw \r7, 24(sp) |
+ .endif |
+ .if \r8 != 0 |
+ CHECK_STACK_OFFSET 8, \stack_offset |
+ sw \r8, 28(sp) |
+ .endif |
+ .if \r9 != 0 |
+ CHECK_STACK_OFFSET 9, \stack_offset |
+ sw \r9, 32(sp) |
+ .endif |
+ .if \r10 != 0 |
+ CHECK_STACK_OFFSET 10, \stack_offset |
+ sw \r10, 36(sp) |
+ .endif |
+ .if \r11 != 0 |
+ CHECK_STACK_OFFSET 11, \stack_offset |
+ sw \r11, 40(sp) |
+ .endif |
+ .if \r12 != 0 |
+ CHECK_STACK_OFFSET 12, \stack_offset |
+ sw \r12, 44(sp) |
+ .endif |
+ .if \r13 != 0 |
+ CHECK_STACK_OFFSET 13, \stack_offset |
+ sw \r13, 48(sp) |
+ .endif |
+ .if \r14 != 0 |
+ CHECK_STACK_OFFSET 14, \stack_offset |
+ sw \r14, 52(sp) |
+ .endif |
+.endm |
+ |
+/* |
+ * Restores set of registers from stack. Maximum number of registers that |
+ * can be restored from stack is limitted to 14 (a0-a3, v0-v1 and s0-s7). |
+ * Stack offset is number of bytes that are added to stack pointer (sp) |
+ * after registers are restored (offset must be multiple of 4, and must |
+ * be big enough, as described by CHECK_STACK_OFFSET macro). This macro is |
+ * intended to be used in combination with RESTORE_REGS_FROM_STACK macro. |
+ * Example: |
+ * SAVE_REGS_ON_STACK 4, v0, v1, s0, s1 |
+ * RESTORE_REGS_FROM_STACK 4, v0, v1, s0, s1 |
+ */ |
+.macro RESTORE_REGS_FROM_STACK stack_offset = 0, r1, \ |
+ r2 = 0, r3 = 0, r4 = 0, \ |
+ r5 = 0, r6 = 0, r7 = 0, \ |
+ r8 = 0, r9 = 0, r10 = 0, \ |
+ r11 = 0, r12 = 0, r13 = 0, \ |
+ r14 = 0 |
+ .if (\stack_offset < 0) || (\stack_offset - (\stack_offset/4)*4) |
+ .error "Stack offset must be pozitive and multiple of 4." |
+ .endif |
+ lw \r1, 0(sp) |
+ .if \r2 != 0 |
+ lw \r2, 4(sp) |
+ .endif |
+ .if \r3 != 0 |
+ lw \r3, 8(sp) |
+ .endif |
+ .if \r4 != 0 |
+ lw \r4, 12(sp) |
+ .endif |
+ .if \r5 != 0 |
+ CHECK_STACK_OFFSET 5, \stack_offset |
+ lw \r5, 16(sp) |
+ .endif |
+ .if \r6 != 0 |
+ CHECK_STACK_OFFSET 6, \stack_offset |
+ lw \r6, 20(sp) |
+ .endif |
+ .if \r7 != 0 |
+ CHECK_STACK_OFFSET 7, \stack_offset |
+ lw \r7, 24(sp) |
+ .endif |
+ .if \r8 != 0 |
+ CHECK_STACK_OFFSET 8, \stack_offset |
+ lw \r8, 28(sp) |
+ .endif |
+ .if \r9 != 0 |
+ CHECK_STACK_OFFSET 9, \stack_offset |
+ lw \r9, 32(sp) |
+ .endif |
+ .if \r10 != 0 |
+ CHECK_STACK_OFFSET 10, \stack_offset |
+ lw \r10, 36(sp) |
+ .endif |
+ .if \r11 != 0 |
+ CHECK_STACK_OFFSET 11, \stack_offset |
+ lw \r11, 40(sp) |
+ .endif |
+ .if \r12 != 0 |
+ CHECK_STACK_OFFSET 12, \stack_offset |
+ lw \r12, 44(sp) |
+ .endif |
+ .if \r13 != 0 |
+ CHECK_STACK_OFFSET 13, \stack_offset |
+ lw \r13, 48(sp) |
+ .endif |
+ .if \r14 != 0 |
+ CHECK_STACK_OFFSET 14, \stack_offset |
+ lw \r14, 52(sp) |
+ .endif |
+ .if \stack_offset != 0 |
+ addiu sp, sp, \stack_offset |
+ .endif |
+.endm |
+ |
+ |