Index: third_party/libjpeg_turbo/simd/jsimdext.inc |
=================================================================== |
--- third_party/libjpeg_turbo/simd/jsimdext.inc (revision 0) |
+++ third_party/libjpeg_turbo/simd/jsimdext.inc (revision 0) |
@@ -0,0 +1,372 @@ |
+; |
+; jsimdext.inc - common declarations |
+; |
+; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB |
+; Copyright 2010 D. R. Commander |
+; |
+; Based on |
+; x86 SIMD extension for IJG JPEG library - version 1.02 |
+; |
+; Copyright (C) 1999-2006, MIYASAKA Masaru. |
+; |
+; This software is provided 'as-is', without any express or implied |
+; warranty. In no event will the authors be held liable for any damages |
+; arising from the use of this software. |
+; |
+; Permission is granted to anyone to use this software for any purpose, |
+; including commercial applications, and to alter it and redistribute it |
+; freely, subject to the following restrictions: |
+; |
+; 1. The origin of this software must not be misrepresented; you must not |
+; claim that you wrote the original software. If you use this software |
+; in a product, an acknowledgment in the product documentation would be |
+; appreciated but is not required. |
+; 2. Altered source versions must be plainly marked as such, and must not be |
+; misrepresented as being the original software. |
+; 3. This notice may not be removed or altered from any source distribution. |
+; |
+; [TAB8] |
+ |
+; ========================================================================== |
+; System-dependent configurations |
+ |
+%ifdef WIN32 ; ----(nasm -fwin32 -DWIN32 ...)-------- |
+; * Microsoft Visual C++ |
+; * MinGW (Minimalist GNU for Windows) |
+; * CygWin |
+; * LCC-Win32 |
+ |
+; -- segment definition -- |
+; |
+%define SEG_TEXT .text align=16 public use32 class=CODE |
+%define SEG_CONST .rdata align=16 public use32 class=CONST |
+ |
+%elifdef WIN64 ; ----(nasm -fwin64 -DWIN64 ...)-------- |
+; * Microsoft Visual C++ |
+ |
+; -- segment definition -- |
+; |
+%define SEG_TEXT .text align=16 public use64 class=CODE |
+%define SEG_CONST .rdata align=16 public use64 class=CONST |
+%ifdef MSVC |
+%define EXTN(name) name ; foo() -> foo |
+%endif |
+ |
+%elifdef OBJ32 ; ----(nasm -fobj -DOBJ32 ...)---------- |
+; * Borland C++ (Win32) |
+ |
+; -- segment definition -- |
+; |
+%define SEG_TEXT .text align=16 public use32 class=CODE |
+%define SEG_CONST .data align=16 public use32 class=DATA |
+ |
+%elifdef ELF ; ----(nasm -felf[64] -DELF ...)------------ |
+; * Linux |
+; * *BSD family Unix using elf format |
+; * Unix System V, including Solaris x86, UnixWare and SCO Unix |
+ |
+; mark stack as non-executable |
+section .note.GNU-stack noalloc noexec nowrite progbits |
+ |
+; -- segment definition -- |
+; |
+%ifdef __x86_64__ |
+%define SEG_TEXT .text progbits align=16 |
+%define SEG_CONST .rodata progbits align=16 |
+%else |
+%define SEG_TEXT .text progbits alloc exec nowrite align=16 |
+%define SEG_CONST .rodata progbits alloc noexec nowrite align=16 |
+%endif |
+ |
+; To make the code position-independent, append -DPIC to the commandline |
+; |
+%define GOT_SYMBOL _GLOBAL_OFFSET_TABLE_ ; ELF supports PIC |
+%define EXTN(name) name ; foo() -> foo |
+ |
+%elifdef AOUT ; ----(nasm -faoutb/aout -DAOUT ...)---- |
+; * Older Linux using a.out format (nasm -f aout -DAOUT ...) |
+; * *BSD family Unix using a.out format (nasm -f aoutb -DAOUT ...) |
+ |
+; -- segment definition -- |
+; |
+%define SEG_TEXT .text |
+%define SEG_CONST .data |
+ |
+; To make the code position-independent, append -DPIC to the commandline |
+; |
+%define GOT_SYMBOL __GLOBAL_OFFSET_TABLE_ ; BSD-style a.out supports PIC |
+ |
+%elifdef MACHO ; ----(nasm -fmacho -DMACHO ...)-------- |
+; * NeXTstep/OpenStep/Rhapsody/Darwin/MacOS X (Mach-O format) |
+ |
+; -- segment definition -- |
+; |
+%define SEG_TEXT .text ;align=16 ; nasm doesn't accept align=16. why? |
+%define SEG_CONST .rodata align=16 |
+ |
+; The generation of position-independent code (PIC) is the default on Darwin. |
+; |
+%define PIC |
+%define GOT_SYMBOL _MACHO_PIC_ ; Mach-O style code-relative addressing |
+ |
+%else ; ----(Other case)---------------------- |
+ |
+; -- segment definition -- |
+; |
+%define SEG_TEXT .text |
+%define SEG_CONST .data |
+ |
+%endif ; ---------------------------------------------- |
+ |
+; ========================================================================== |
+ |
+; -------------------------------------------------------------------------- |
+; Common types |
+; |
+%ifdef __x86_64__ |
+%define POINTER qword ; general pointer type |
+%define SIZEOF_POINTER SIZEOF_QWORD ; sizeof(POINTER) |
+%define POINTER_BIT QWORD_BIT ; sizeof(POINTER)*BYTE_BIT |
+%else |
+%define POINTER dword ; general pointer type |
+%define SIZEOF_POINTER SIZEOF_DWORD ; sizeof(POINTER) |
+%define POINTER_BIT DWORD_BIT ; sizeof(POINTER)*BYTE_BIT |
+%endif |
+ |
+%define INT dword ; signed integer type |
+%define SIZEOF_INT SIZEOF_DWORD ; sizeof(INT) |
+%define INT_BIT DWORD_BIT ; sizeof(INT)*BYTE_BIT |
+ |
+%define FP32 dword ; IEEE754 single |
+%define SIZEOF_FP32 SIZEOF_DWORD ; sizeof(FP32) |
+%define FP32_BIT DWORD_BIT ; sizeof(FP32)*BYTE_BIT |
+ |
+%define MMWORD qword ; int64 (MMX register) |
+%define SIZEOF_MMWORD SIZEOF_QWORD ; sizeof(MMWORD) |
+%define MMWORD_BIT QWORD_BIT ; sizeof(MMWORD)*BYTE_BIT |
+ |
+; NASM is buggy and doesn't properly handle operand sizes for SSE |
+; instructions, so for now we have to define XMMWORD as blank. |
+%define XMMWORD ; int128 (SSE register) |
+%define SIZEOF_XMMWORD SIZEOF_OWORD ; sizeof(XMMWORD) |
+%define XMMWORD_BIT OWORD_BIT ; sizeof(XMMWORD)*BYTE_BIT |
+ |
+; Similar hacks for when we load a dword or MMWORD into an xmm# register |
+%define XMM_DWORD |
+%define XMM_MMWORD |
+ |
+%define SIZEOF_BYTE 1 ; sizeof(BYTE) |
+%define SIZEOF_WORD 2 ; sizeof(WORD) |
+%define SIZEOF_DWORD 4 ; sizeof(DWORD) |
+%define SIZEOF_QWORD 8 ; sizeof(QWORD) |
+%define SIZEOF_OWORD 16 ; sizeof(OWORD) |
+ |
+%define BYTE_BIT 8 ; CHAR_BIT in C |
+%define WORD_BIT 16 ; sizeof(WORD)*BYTE_BIT |
+%define DWORD_BIT 32 ; sizeof(DWORD)*BYTE_BIT |
+%define QWORD_BIT 64 ; sizeof(QWORD)*BYTE_BIT |
+%define OWORD_BIT 128 ; sizeof(OWORD)*BYTE_BIT |
+ |
+; -------------------------------------------------------------------------- |
+; External Symbol Name |
+; |
+%ifndef EXTN |
+%define EXTN(name) _ %+ name ; foo() -> _foo |
+%endif |
+ |
+; -------------------------------------------------------------------------- |
+; Macros for position-independent code (PIC) support |
+; |
+%ifndef GOT_SYMBOL |
+%undef PIC |
+%endif |
+ |
+%ifdef PIC ; ------------------------------------------- |
+ |
+%ifidn GOT_SYMBOL,_MACHO_PIC_ ; -------------------- |
+ |
+; At present, nasm doesn't seem to support PIC generation for Mach-O. |
+; The PIC support code below is a little tricky. |
+ |
+ SECTION SEG_CONST |
+const_base: |
+ |
+%define GOTOFF(got,sym) (got) + (sym) - const_base |
+ |
+%imacro get_GOT 1 |
+ ; NOTE: this macro destroys ecx resister. |
+ call %%geteip |
+ add ecx, byte (%%ref - $) |
+ jmp short %%adjust |
+%%geteip: |
+ mov ecx, POINTER [esp] |
+ ret |
+%%adjust: |
+ push ebp |
+ xor ebp,ebp ; ebp = 0 |
+%ifidni %1,ebx ; (%1 == ebx) |
+ ; db 0x8D,0x9C + jmp near const_base = |
+ ; lea ebx, [ecx+ebp*8+(const_base-%%ref)] ; 8D,9C,E9,(offset32) |
+ db 0x8D,0x9C ; 8D,9C |
+ jmp near const_base ; E9,(const_base-%%ref) |
+%%ref: |
+%else ; (%1 != ebx) |
+ ; db 0x8D,0x8C + jmp near const_base = |
+ ; lea ecx, [ecx+ebp*8+(const_base-%%ref)] ; 8D,8C,E9,(offset32) |
+ db 0x8D,0x8C ; 8D,8C |
+ jmp near const_base ; E9,(const_base-%%ref) |
+%%ref: mov %1, ecx |
+%endif ; (%1 == ebx) |
+ pop ebp |
+%endmacro |
+ |
+%else ; GOT_SYMBOL != _MACHO_PIC_ ---------------- |
+ |
+%define GOTOFF(got,sym) (got) + (sym) wrt ..gotoff |
+ |
+%imacro get_GOT 1 |
+ extern GOT_SYMBOL |
+ call %%geteip |
+ add %1, GOT_SYMBOL + $$ - $ wrt ..gotpc |
+ jmp short %%done |
+%%geteip: |
+ mov %1, POINTER [esp] |
+ ret |
+%%done: |
+%endmacro |
+ |
+%endif ; GOT_SYMBOL == _MACHO_PIC_ ---------------- |
+ |
+%imacro pushpic 1.nolist |
+ push %1 |
+%endmacro |
+%imacro poppic 1.nolist |
+ pop %1 |
+%endmacro |
+%imacro movpic 2.nolist |
+ mov %1,%2 |
+%endmacro |
+ |
+%else ; !PIC ----------------------------------------- |
+ |
+%define GOTOFF(got,sym) (sym) |
+ |
+%imacro get_GOT 1.nolist |
+%endmacro |
+%imacro pushpic 1.nolist |
+%endmacro |
+%imacro poppic 1.nolist |
+%endmacro |
+%imacro movpic 2.nolist |
+%endmacro |
+ |
+%endif ; PIC ----------------------------------------- |
+ |
+; -------------------------------------------------------------------------- |
+; Align the next instruction on {2,4,8,16,..}-byte boundary. |
+; ".balign n,,m" in GNU as |
+; |
+%define MSKLE(x,y) (~(((y) & 0xFFFF) - ((x) & 0xFFFF)) >> 16) |
+%define FILLB(b,n) (($$-(b)) & ((n)-1)) |
+ |
+%imacro alignx 1-2.nolist 0xFFFF |
+%%bs: times MSKLE(FILLB(%%bs,%1),%2) & MSKLE(16,FILLB($,%1)) & FILLB($,%1) \ |
+ db 0x90 ; nop |
+ times MSKLE(FILLB(%%bs,%1),%2) & FILLB($,%1)/9 \ |
+ db 0x8D,0x9C,0x23,0x00,0x00,0x00,0x00 ; lea ebx,[ebx+0x00000000] |
+ times MSKLE(FILLB(%%bs,%1),%2) & FILLB($,%1)/7 \ |
+ db 0x8D,0xAC,0x25,0x00,0x00,0x00,0x00 ; lea ebp,[ebp+0x00000000] |
+ times MSKLE(FILLB(%%bs,%1),%2) & FILLB($,%1)/6 \ |
+ db 0x8D,0xAD,0x00,0x00,0x00,0x00 ; lea ebp,[ebp+0x00000000] |
+ times MSKLE(FILLB(%%bs,%1),%2) & FILLB($,%1)/4 \ |
+ db 0x8D,0x6C,0x25,0x00 ; lea ebp,[ebp+0x00] |
+ times MSKLE(FILLB(%%bs,%1),%2) & FILLB($,%1)/3 \ |
+ db 0x8D,0x6D,0x00 ; lea ebp,[ebp+0x00] |
+ times MSKLE(FILLB(%%bs,%1),%2) & FILLB($,%1)/2 \ |
+ db 0x8B,0xED ; mov ebp,ebp |
+ times MSKLE(FILLB(%%bs,%1),%2) & FILLB($,%1)/1 \ |
+ db 0x90 ; nop |
+%endmacro |
+ |
+; Align the next data on {2,4,8,16,..}-byte boundary. |
+; |
+%imacro alignz 1.nolist |
+ align %1, db 0 ; filling zeros |
+%endmacro |
+ |
+%ifdef __x86_64__ |
+ |
+%ifdef WIN64 |
+ |
+%imacro collect_args 0 |
+ push r10 |
+ push r11 |
+ push r12 |
+ push r13 |
+ push r14 |
+ push r15 |
+ mov r10, rcx |
+ mov r11, rdx |
+ mov r12, r8 |
+ mov r13, r9 |
+ mov r14, [rax+48] |
+ mov r15, [rax+56] |
+ push rsi |
+ push rdi |
+ sub rsp, SIZEOF_XMMWORD |
+ movlpd XMMWORD [rsp], xmm6 |
+ sub rsp, SIZEOF_XMMWORD |
+ movlpd XMMWORD [rsp], xmm7 |
+%endmacro |
+ |
+%imacro uncollect_args 0 |
+ movlpd xmm7, XMMWORD [rsp] |
+ add rsp, SIZEOF_XMMWORD |
+ movlpd xmm6, XMMWORD [rsp] |
+ add rsp, SIZEOF_XMMWORD |
+ pop rdi |
+ pop rsi |
+ pop r15 |
+ pop r14 |
+ pop r13 |
+ pop r12 |
+ pop r11 |
+ pop r10 |
+%endmacro |
+ |
+%else |
+ |
+%imacro collect_args 0 |
+ push r10 |
+ push r11 |
+ push r12 |
+ push r13 |
+ push r14 |
+ push r15 |
+ mov r10, rdi |
+ mov r11, rsi |
+ mov r12, rdx |
+ mov r13, rcx |
+ mov r14, r8 |
+ mov r15, r9 |
+%endmacro |
+ |
+%imacro uncollect_args 0 |
+ pop r15 |
+ pop r14 |
+ pop r13 |
+ pop r12 |
+ pop r11 |
+ pop r10 |
+%endmacro |
+ |
+%endif |
+ |
+%endif |
+ |
+; -------------------------------------------------------------------------- |
+; Defines picked up from the C headers |
+; |
+%include "jsimdcfg.inc" |
+ |
+; -------------------------------------------------------------------------- |