OLD | NEW |
1 ; | 1 ; |
2 ; jsimdcpu.asm - SIMD instruction support check | 2 ; jsimdcpu.asm - SIMD instruction support check |
3 ; | 3 ; |
4 ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB | 4 ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB |
5 ; | 5 ; |
6 ; Based on | 6 ; Based on |
7 ; x86 SIMD extension for IJG JPEG library | 7 ; x86 SIMD extension for IJG JPEG library |
8 ; Copyright (C) 1999-2006, MIYASAKA Masaru. | 8 ; Copyright (C) 1999-2006, MIYASAKA Masaru. |
9 ; For conditions of distribution and use, see copyright notice in jsimdext.inc | 9 ; For conditions of distribution and use, see copyright notice in jsimdext.inc |
10 ; | 10 ; |
11 ; This file should be assembled with NASM (Netwide Assembler), | 11 ; This file should be assembled with NASM (Netwide Assembler), |
12 ; can *not* be assembled with Microsoft's MASM or any compatible | 12 ; can *not* be assembled with Microsoft's MASM or any compatible |
13 ; assembler (including Borland's Turbo Assembler). | 13 ; assembler (including Borland's Turbo Assembler). |
14 ; NASM is available from http://nasm.sourceforge.net/ or | 14 ; NASM is available from http://nasm.sourceforge.net/ or |
15 ; http://sourceforge.net/project/showfiles.php?group_id=6208 | 15 ; http://sourceforge.net/project/showfiles.php?group_id=6208 |
16 ; | 16 ; |
17 ; [TAB8] | 17 ; [TAB8] |
18 | 18 |
19 %include "jsimdext.inc" | 19 %include "jsimdext.inc" |
20 | 20 |
21 ; -------------------------------------------------------------------------- | 21 ; -------------------------------------------------------------------------- |
22 » SECTION»SEG_TEXT | 22 SECTION SEG_TEXT |
23 » BITS» 32 | 23 BITS 32 |
24 ; | 24 ; |
25 ; Check if the CPU supports SIMD instructions | 25 ; Check if the CPU supports SIMD instructions |
26 ; | 26 ; |
27 ; GLOBAL(unsigned int) | 27 ; GLOBAL(unsigned int) |
28 ; jpeg_simd_cpu_support (void) | 28 ; jpeg_simd_cpu_support (void) |
29 ; | 29 ; |
30 | 30 |
31 » align» 16 | 31 align 16 |
32 » global» EXTN(jpeg_simd_cpu_support) PRIVATE | 32 global EXTN(jpeg_simd_cpu_support) |
33 | 33 |
34 EXTN(jpeg_simd_cpu_support): | 34 EXTN(jpeg_simd_cpu_support): |
35 » push» ebx | 35 push ebx |
36 ;» push» ecx» » ; need not be preserved | 36 ; push ecx ; need not be preserved |
37 ;» push» edx» » ; need not be preserved | 37 ; push edx ; need not be preserved |
38 ;» push» esi» » ; unused | 38 ; push esi ; unused |
39 » push» edi | 39 push edi |
40 | 40 |
41 » xor» edi,edi»» » ; simd support flag | 41 xor edi,edi ; simd support flag |
42 | 42 |
43 » pushfd | 43 pushfd |
44 » pop» eax | 44 pop eax |
45 » mov» edx,eax | 45 mov edx,eax |
46 » xor» eax, 1<<21» » ; flip ID bit in EFLAGS | 46 xor eax, 1<<21 ; flip ID bit in EFLAGS |
47 » push» eax | 47 push eax |
48 » popfd | 48 popfd |
49 » pushfd | 49 pushfd |
50 » pop» eax | 50 pop eax |
51 » xor» eax,edx | 51 xor eax,edx |
52 » jz» short .return» » ; CPUID is not supported | 52 jz short .return ; CPUID is not supported |
53 | 53 |
54 » ; Check for MMX instruction support | 54 ; Check for MMX instruction support |
55 » xor» eax,eax | 55 xor eax,eax |
56 » cpuid | 56 cpuid |
57 » test» eax,eax | 57 test eax,eax |
58 » jz» short .return | 58 jz short .return |
59 | 59 |
60 » xor» eax,eax | 60 xor eax,eax |
61 » inc» eax | 61 inc eax |
62 » cpuid | 62 cpuid |
63 » mov» eax,edx»» » ; eax = Standard feature flags | 63 mov eax,edx ; eax = Standard feature flags |
64 | 64 |
65 » test» eax, 1<<23» » ; bit23:MMX | 65 test eax, 1<<23 ; bit23:MMX |
66 » jz» short .no_mmx | 66 jz short .no_mmx |
67 » or» edi, byte JSIMD_MMX | 67 or edi, byte JSIMD_MMX |
68 .no_mmx: | 68 .no_mmx: |
69 » test» eax, 1<<25» » ; bit25:SSE | 69 test eax, 1<<25 ; bit25:SSE |
70 » jz» short .no_sse | 70 jz short .no_sse |
71 » or» edi, byte JSIMD_SSE | 71 or edi, byte JSIMD_SSE |
72 .no_sse: | 72 .no_sse: |
73 » test» eax, 1<<26» » ; bit26:SSE2 | 73 test eax, 1<<26 ; bit26:SSE2 |
74 » jz» short .no_sse2 | 74 jz short .no_sse2 |
75 » or» edi, byte JSIMD_SSE2 | 75 or edi, byte JSIMD_SSE2 |
76 .no_sse2: | 76 .no_sse2: |
77 | 77 |
78 » ; Check for 3DNow! instruction support | 78 ; Check for 3DNow! instruction support |
79 » mov» eax, 0x80000000 | 79 mov eax, 0x80000000 |
80 » cpuid | 80 cpuid |
81 » cmp» eax, 0x80000000 | 81 cmp eax, 0x80000000 |
82 » jbe» short .return | 82 jbe short .return |
83 | 83 |
84 » mov» eax, 0x80000001 | 84 mov eax, 0x80000001 |
85 » cpuid | 85 cpuid |
86 » mov» eax,edx»» » ; eax = Extended feature flags | 86 mov eax,edx ; eax = Extended feature flags |
87 | 87 |
88 » test» eax, 1<<31» » ; bit31:3DNow!(vendor independent) | 88 test eax, 1<<31 ; bit31:3DNow!(vendor independent) |
89 » jz» short .no_3dnow | 89 jz short .no_3dnow |
90 » or» edi, byte JSIMD_3DNOW | 90 or edi, byte JSIMD_3DNOW |
91 .no_3dnow: | 91 .no_3dnow: |
92 | 92 |
93 .return: | 93 .return: |
94 » mov» eax,edi | 94 mov eax,edi |
95 | 95 |
96 » pop» edi | 96 pop edi |
97 ;» pop» esi» » ; unused | 97 ; pop esi ; unused |
98 ;» pop» edx» » ; need not be preserved | 98 ; pop edx ; need not be preserved |
99 ;» pop» ecx» » ; need not be preserved | 99 ; pop ecx ; need not be preserved |
100 » pop» ebx | 100 pop ebx |
101 » ret | 101 ret |
102 | 102 |
103 ; For some reason, the OS X linker does not honor the request to align the | 103 ; For some reason, the OS X linker does not honor the request to align the |
104 ; segment unless we do this. | 104 ; segment unless we do this. |
105 » align» 16 | 105 align 16 |
OLD | NEW |