| OLD | NEW |
| 1 ; | 1 ; |
| 2 ; jsimdcpu.asm - SIMD instruction support check | 2 ; jsimdcpu.asm - SIMD instruction support check |
| 3 ; | 3 ; |
| 4 ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB | 4 ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB |
| 5 ; | 5 ; |
| 6 ; Based on | 6 ; Based on |
| 7 ; x86 SIMD extension for IJG JPEG library | 7 ; x86 SIMD extension for IJG JPEG library |
| 8 ; Copyright (C) 1999-2006, MIYASAKA Masaru. | 8 ; Copyright (C) 1999-2006, MIYASAKA Masaru. |
| 9 ; For conditions of distribution and use, see copyright notice in jsimdext.inc | 9 ; For conditions of distribution and use, see copyright notice in jsimdext.inc |
| 10 ; | 10 ; |
| 11 ; This file should be assembled with NASM (Netwide Assembler), | 11 ; This file should be assembled with NASM (Netwide Assembler), |
| 12 ; can *not* be assembled with Microsoft's MASM or any compatible | 12 ; can *not* be assembled with Microsoft's MASM or any compatible |
| 13 ; assembler (including Borland's Turbo Assembler). | 13 ; assembler (including Borland's Turbo Assembler). |
| 14 ; NASM is available from http://nasm.sourceforge.net/ or | 14 ; NASM is available from http://nasm.sourceforge.net/ or |
| 15 ; http://sourceforge.net/project/showfiles.php?group_id=6208 | 15 ; http://sourceforge.net/project/showfiles.php?group_id=6208 |
| 16 ; | 16 ; |
| 17 ; [TAB8] | 17 ; [TAB8] |
| 18 | 18 |
| 19 %include "jsimdext.inc" | 19 %include "jsimdext.inc" |
| 20 | 20 |
| 21 ; -------------------------------------------------------------------------- | 21 ; -------------------------------------------------------------------------- |
| 22 » SECTION»SEG_TEXT | 22 SECTION SEG_TEXT |
| 23 » BITS» 32 | 23 BITS 32 |
| 24 ; | 24 ; |
| 25 ; Check if the CPU supports SIMD instructions | 25 ; Check if the CPU supports SIMD instructions |
| 26 ; | 26 ; |
| 27 ; GLOBAL(unsigned int) | 27 ; GLOBAL(unsigned int) |
| 28 ; jpeg_simd_cpu_support (void) | 28 ; jpeg_simd_cpu_support (void) |
| 29 ; | 29 ; |
| 30 | 30 |
| 31 » align» 16 | 31 align 16 |
| 32 » global» EXTN(jpeg_simd_cpu_support) PRIVATE | 32 global EXTN(jpeg_simd_cpu_support) |
| 33 | 33 |
| 34 EXTN(jpeg_simd_cpu_support): | 34 EXTN(jpeg_simd_cpu_support): |
| 35 » push» ebx | 35 push ebx |
| 36 ;» push» ecx» » ; need not be preserved | 36 ; push ecx ; need not be preserved |
| 37 ;» push» edx» » ; need not be preserved | 37 ; push edx ; need not be preserved |
| 38 ;» push» esi» » ; unused | 38 ; push esi ; unused |
| 39 » push» edi | 39 push edi |
| 40 | 40 |
| 41 » xor» edi,edi»» » ; simd support flag | 41 xor edi,edi ; simd support flag |
| 42 | 42 |
| 43 » pushfd | 43 pushfd |
| 44 » pop» eax | 44 pop eax |
| 45 » mov» edx,eax | 45 mov edx,eax |
| 46 » xor» eax, 1<<21» » ; flip ID bit in EFLAGS | 46 xor eax, 1<<21 ; flip ID bit in EFLAGS |
| 47 » push» eax | 47 push eax |
| 48 » popfd | 48 popfd |
| 49 » pushfd | 49 pushfd |
| 50 » pop» eax | 50 pop eax |
| 51 » xor» eax,edx | 51 xor eax,edx |
| 52 » jz» short .return» » ; CPUID is not supported | 52 jz short .return ; CPUID is not supported |
| 53 | 53 |
| 54 » ; Check for MMX instruction support | 54 ; Check for MMX instruction support |
| 55 » xor» eax,eax | 55 xor eax,eax |
| 56 » cpuid | 56 cpuid |
| 57 » test» eax,eax | 57 test eax,eax |
| 58 » jz» short .return | 58 jz short .return |
| 59 | 59 |
| 60 » xor» eax,eax | 60 xor eax,eax |
| 61 » inc» eax | 61 inc eax |
| 62 » cpuid | 62 cpuid |
| 63 » mov» eax,edx»» » ; eax = Standard feature flags | 63 mov eax,edx ; eax = Standard feature flags |
| 64 | 64 |
| 65 » test» eax, 1<<23» » ; bit23:MMX | 65 test eax, 1<<23 ; bit23:MMX |
| 66 » jz» short .no_mmx | 66 jz short .no_mmx |
| 67 » or» edi, byte JSIMD_MMX | 67 or edi, byte JSIMD_MMX |
| 68 .no_mmx: | 68 .no_mmx: |
| 69 » test» eax, 1<<25» » ; bit25:SSE | 69 test eax, 1<<25 ; bit25:SSE |
| 70 » jz» short .no_sse | 70 jz short .no_sse |
| 71 » or» edi, byte JSIMD_SSE | 71 or edi, byte JSIMD_SSE |
| 72 .no_sse: | 72 .no_sse: |
| 73 » test» eax, 1<<26» » ; bit26:SSE2 | 73 test eax, 1<<26 ; bit26:SSE2 |
| 74 » jz» short .no_sse2 | 74 jz short .no_sse2 |
| 75 » or» edi, byte JSIMD_SSE2 | 75 or edi, byte JSIMD_SSE2 |
| 76 .no_sse2: | 76 .no_sse2: |
| 77 | 77 |
| 78 » ; Check for 3DNow! instruction support | 78 ; Check for 3DNow! instruction support |
| 79 » mov» eax, 0x80000000 | 79 mov eax, 0x80000000 |
| 80 » cpuid | 80 cpuid |
| 81 » cmp» eax, 0x80000000 | 81 cmp eax, 0x80000000 |
| 82 » jbe» short .return | 82 jbe short .return |
| 83 | 83 |
| 84 » mov» eax, 0x80000001 | 84 mov eax, 0x80000001 |
| 85 » cpuid | 85 cpuid |
| 86 » mov» eax,edx»» » ; eax = Extended feature flags | 86 mov eax,edx ; eax = Extended feature flags |
| 87 | 87 |
| 88 » test» eax, 1<<31» » ; bit31:3DNow!(vendor independent) | 88 test eax, 1<<31 ; bit31:3DNow!(vendor independent) |
| 89 » jz» short .no_3dnow | 89 jz short .no_3dnow |
| 90 » or» edi, byte JSIMD_3DNOW | 90 or edi, byte JSIMD_3DNOW |
| 91 .no_3dnow: | 91 .no_3dnow: |
| 92 | 92 |
| 93 .return: | 93 .return: |
| 94 » mov» eax,edi | 94 mov eax,edi |
| 95 | 95 |
| 96 » pop» edi | 96 pop edi |
| 97 ;» pop» esi» » ; unused | 97 ; pop esi ; unused |
| 98 ;» pop» edx» » ; need not be preserved | 98 ; pop edx ; need not be preserved |
| 99 ;» pop» ecx» » ; need not be preserved | 99 ; pop ecx ; need not be preserved |
| 100 » pop» ebx | 100 pop ebx |
| 101 » ret | 101 ret |
| 102 | 102 |
| 103 ; For some reason, the OS X linker does not honor the request to align the | 103 ; For some reason, the OS X linker does not honor the request to align the |
| 104 ; segment unless we do this. | 104 ; segment unless we do this. |
| 105 » align» 16 | 105 align 16 |
| OLD | NEW |