OLD | NEW |
(Empty) | |
| 1 Enabling SSE support |
| 2 |
| 3 Copyright (c) 2016 Google, Inc. |
| 4 Written by Mike Klein, Matt Sarett |
| 5 |
| 6 This INSTALL file written by Glenn Randers-Pehrson, 2016. |
| 7 |
| 8 If you have moved intel_init.c and filter_sse2_intrinsics.c to a different |
| 9 directory, be sure to update the '#include "../../pngpriv.h"' line in both |
| 10 files if necessary to point to the correct relative location of pngpriv.h |
| 11 with respect to the new location of those files. |
| 12 |
| 13 To enable SSE support in libpng, follow the instructions in I, II, or III, |
| 14 below: |
| 15 |
| 16 I. Using patched "configure" scripts: |
| 17 |
| 18 First, apply intel_sse.patch in your build directory. |
| 19 |
| 20 patch -i contrib/intel/intel_sse.patch -p1 |
| 21 |
| 22 Then, if you are not building in a new GIT clone, e.g., in a tar |
| 23 distribution, remove any existing pre-built configure scripts: |
| 24 |
| 25 ./configure --enable-maintainer-mode |
| 26 make maintainer-clean |
| 27 ./autogen.sh --maintainer --clean |
| 28 |
| 29 Finally, configure libpng with -DPNG_INTEL_SSE in CPPFLAGS: |
| 30 |
| 31 ./autogen.sh --maintainer |
| 32 CPPFLAGS="-DPNG_INTEL_SSE" ./configure [options] |
| 33 make CPPFLAGS="-DPNG_INTEL_SSE" [options] |
| 34 make |
| 35 |
| 36 II. Using a custom makefile: |
| 37 |
| 38 If you are using a custom makefile makefile, you will have to update it |
| 39 manually to include contrib/intel/*.o in the dependencies, and to define |
| 40 PNG_INTEL_SSE. |
| 41 |
| 42 III. Using manually updated "configure" scripts: |
| 43 |
| 44 If you prefer, manually edit pngpriv.h, configure.ac, and Makefile.am, |
| 45 following the instructions below, then follow the instructions in |
| 46 section II of INSTALL in the main libpng directory, then configure libpng |
| 47 with -DPNG_INTEL_SSE in CPPFLAGS. |
| 48 |
| 49 1. Insert the following lines above the copyright line near the top of |
| 50 configure.ac: |
| 51 |
| 52 -----------------cut---------------- |
| 53 # Copyright (c) 2016 Google, Inc. |
| 54 # Written by Mike Klein and Matt Sarett |
| 55 # Derived from the ARM supporting code in libpng/configure.ac, which was |
| 56 -----------------cut---------------- |
| 57 |
| 58 2. Add the following code to configure.ac under HOST SPECIFIC OPTIONS |
| 59 directly beneath the section for ARM: |
| 60 |
| 61 -----------------cut---------------- |
| 62 # INTEL |
| 63 # ===== |
| 64 # |
| 65 # INTEL SSE (SIMD) support. |
| 66 |
| 67 AC_ARG_ENABLE([intel-sse], |
| 68 AS_HELP_STRING([[[--enable-intel-sse]]], |
| 69 [Enable Intel SSE optimizations: =no/off, yes/on:] |
| 70 [no/off: disable the optimizations;] |
| 71 [yes/on: enable the optimizations.] |
| 72 [If not specified: determined by the compiler.]), |
| 73 [case "$enableval" in |
| 74 no|off) |
| 75 # disable the default enabling: |
| 76 AC_DEFINE([PNG_INTEL_SSE_OPT], [0], |
| 77 [Disable Intel SSE optimizations]) |
| 78 # Prevent inclusion of the assembler files below: |
| 79 enable_intel_sse=no;; |
| 80 yes|on) |
| 81 AC_DEFINE([PNG_INTEL_SSE_OPT], [1], |
| 82 [Enable Intel SSE optimizations]);; |
| 83 *) |
| 84 AC_MSG_ERROR([--enable-intel-sse=${enable_intel_sse}: invalid value]) |
| 85 esac]) |
| 86 |
| 87 # Add Intel specific files to all builds where the host_cpu is Intel ('x86*') |
| 88 # or where Intel optimizations were explicitly requested (this allows a |
| 89 # fallback if a future host CPU does not match 'x86*') |
| 90 AM_CONDITIONAL([PNG_INTEL_SSE], |
| 91 [test "$enable_intel_sse" != 'no' && |
| 92 case "$host_cpu" in |
| 93 i?86|x86_64) :;; |
| 94 *) test "$enable_intel_sse" != '';; |
| 95 esac]) |
| 96 -----------------cut---------------- |
| 97 |
| 98 3. Insert the following lines above the copyright line near the top of |
| 99 Makefile.am: |
| 100 |
| 101 -----------------cut---------------- |
| 102 # Copyright (c) 2016 Google, Inc. |
| 103 # Written by Mike Klein and Matt Sarett |
| 104 # Derived from the ARM supporting code in libpng/configure.ac, which was |
| 105 -----------------cut---------------- |
| 106 |
| 107 4. Add the following code to Makefile.am under HOST SPECIFIC OPTIONS |
| 108 directly beneath the "if PNG_ARM_NEON ... endif" statement: |
| 109 |
| 110 -----------------cut---------------- |
| 111 if PNG_INTEL_SSE |
| 112 libpng@PNGLIB_MAJOR@@PNGLIB_MINOR@_la_SOURCES += contrib/intel/intel_init.c\ |
| 113 contrib/intel/filter_sse2_intrinsics.c |
| 114 endif |
| 115 -----------------cut---------------- |
| 116 |
| 117 5. Add the following lines to pngpriv.h, following the PNG_ARM_NEON_OPT |
| 118 code: |
| 119 |
| 120 -----------------cut---------------- |
| 121 #ifndef PNG_INTEL_SSE_OPT |
| 122 # ifdef PNG_INTEL_SSE |
| 123 /* Only check for SSE if the build configuration has been modified to |
| 124 * enable SSE optimizations. This means that these optimizations will |
| 125 * be off by default. See contrib/intel for more details. |
| 126 */ |
| 127 # if defined(__SSE4_1__) || defined(__AVX__) || defined(__SSSE3__) || \ |
| 128 defined(__SSE2__) || defined(_M_X64) || defined(_M_AMD64) || \ |
| 129 (defined(_M_IX86_FP) && _M_IX86_FP >= 2) |
| 130 # define PNG_INTEL_SSE_OPT 1 |
| 131 # endif |
| 132 # endif |
| 133 #endif |
| 134 |
| 135 #if PNG_INTEL_SSE_OPT > 0 |
| 136 # ifndef PNG_INTEL_SSE_IMPLEMENTATION |
| 137 # if defined(__SSE4_1__) || defined(__AVX__) |
| 138 /* We are not actually using AVX, but checking for AVX is the best |
| 139 way we can detect SSE4.1 and SSSE3 on MSVC. |
| 140 */ |
| 141 # define PNG_INTEL_SSE_IMPLEMENTATION 3 |
| 142 # elif defined(__SSSE3__) |
| 143 # define PNG_INTEL_SSE_IMPLEMENTATION 2 |
| 144 # elif defined(__SSE2__) || defined(_M_X64) || defined(_M_AMD64) || \ |
| 145 (defined(_M_IX86_FP) && _M_IX86_FP >= 2) |
| 146 # define PNG_INTEL_SSE_IMPLEMENTATION 1 |
| 147 # else |
| 148 # define PNG_INTEL_SSE_IMPLEMENTATION 0 |
| 149 # endif |
| 150 # endif |
| 151 |
| 152 # if PNG_INTEL_SSE_IMPLEMENTATION > 0 |
| 153 # define PNG_FILTER_OPTIMIZATIONS png_init_filter_functions_sse2 |
| 154 # endif |
| 155 #endif |
| 156 |
| 157 -----------------cut---------------- |
| 158 |
| 159 5. Add the following lines to pngpriv.h, following the prototype for |
| 160 png_read_filter_row_paeth4_neon: |
| 161 |
| 162 -----------------cut---------------- |
| 163 PNG_INTERNAL_FUNCTION(void,png_read_filter_row_sub3_sse2,(png_row_infop |
| 164 row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY); |
| 165 PNG_INTERNAL_FUNCTION(void,png_read_filter_row_sub4_sse2,(png_row_infop |
| 166 row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY); |
| 167 PNG_INTERNAL_FUNCTION(void,png_read_filter_row_avg3_sse2,(png_row_infop |
| 168 row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY); |
| 169 PNG_INTERNAL_FUNCTION(void,png_read_filter_row_avg4_sse2,(png_row_infop |
| 170 row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY); |
| 171 PNG_INTERNAL_FUNCTION(void,png_read_filter_row_paeth3_sse2,(png_row_infop |
| 172 row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY); |
| 173 PNG_INTERNAL_FUNCTION(void,png_read_filter_row_paeth4_sse2,(png_row_infop |
| 174 row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY); |
| 175 |
| 176 -----------------cut---------------- |
OLD | NEW |