OLD | NEW |
(Empty) | |
| 1 Enabling SSE support |
| 2 |
| 3 Copyright (c) 2016 Google, Inc. |
| 4 Written by Mike Klein, Matt Sarett |
| 5 |
| 6 This INSTALL file written by Glenn Randers-Pehrson, 2016. |
| 7 |
| 8 If you have moved intel_init.c and filter_sse2_intrinsics.c to a different |
| 9 directory, be sure to update the '#include "../../pngpriv.h"' line in both |
| 10 files if necessary to point to the correct relative location of pngpriv.h |
| 11 with respect to the new location of those files. |
| 12 |
| 13 To enable SSE support in libpng, follow the instructions in I, II, or III, |
| 14 below: |
| 15 |
| 16 I. Using patched "configure" scripts: |
| 17 |
| 18 First, apply intel_sse.patch in your build directory. |
| 19 |
| 20 patch -i contrib/intel/intel_sse.patch -p1 |
| 21 |
| 22 Then, if you are not building in a new GIT clone, e.g., in a tar |
| 23 distribution, remove any existing pre-built configure scripts: |
| 24 |
| 25 ./configure --enable-maintainer-mode |
| 26 make maintainer-clean |
| 27 ./autogen.sh --maintainer --clean |
| 28 |
| 29 Finally, configure libpng with -DPNG_INTEL_SSE in CPPFLAGS: |
| 30 |
| 31 ./autogen.sh --maintainer |
| 32 CPPFLAGS="-DPNG_INTEL_SSE" ./configure [options] |
| 33 make CPPFLAGS="-DPNG_INTEL_SSE" [options] |
| 34 make |
| 35 |
| 36 II. Using a custom makefile: |
| 37 |
| 38 If you are using a custom makefile makefile, you will have to update it |
| 39 manually to include contrib/intel/*.o in the dependencies, and to define |
| 40 PNG_INTEL_SSE. |
| 41 |
| 42 III. Using manually updated "configure" scripts: |
| 43 |
| 44 If you prefer, manually edit pngpriv.h, configure.ac, and Makefile.am, |
| 45 following the instructions below, then follow the instructions in |
| 46 section II of INSTALL in the main libpng directory, then configure libpng |
| 47 with -DPNG_INTEL_SSE in CPPFLAGS. |
| 48 |
| 49 1. Add the following code to configure.ac under HOST SPECIFIC OPTIONS |
| 50 directly beneath the section for ARM: |
| 51 |
| 52 -----------------cut---------------- |
| 53 # INTEL |
| 54 # ===== |
| 55 # |
| 56 # INTEL SSE (SIMD) support. |
| 57 |
| 58 AC_ARG_ENABLE([intel-sse], |
| 59 AS_HELP_STRING([[[--enable-intel-sse]]], |
| 60 [Enable Intel SSE optimizations: =no/off, yes/on:] |
| 61 [no/off: disable the optimizations;] |
| 62 [yes/on: enable the optimizations.] |
| 63 [If not specified: determined by the compiler.]), |
| 64 [case "$enableval" in |
| 65 no|off) |
| 66 # disable the default enabling: |
| 67 AC_DEFINE([PNG_INTEL_SSE_OPT], [0], |
| 68 [Disable Intel SSE optimizations]) |
| 69 # Prevent inclusion of the assembler files below: |
| 70 enable_intel_sse=no;; |
| 71 yes|on) |
| 72 AC_DEFINE([PNG_INTEL_SSE_OPT], [1], |
| 73 [Enable Intel SSE optimizations]);; |
| 74 *) |
| 75 AC_MSG_ERROR([--enable-intel-sse=${enable_intel_sse}: invalid value]) |
| 76 esac]) |
| 77 |
| 78 # Add Intel specific files to all builds where the host_cpu is Intel ('x86*') |
| 79 # or where Intel optimizations were explicitly requested (this allows a |
| 80 # fallback if a future host CPU does not match 'x86*') |
| 81 AM_CONDITIONAL([PNG_INTEL_SSE], |
| 82 [test "$enable_intel_sse" != 'no' && |
| 83 case "$host_cpu" in |
| 84 i?86|x86_64) :;; |
| 85 *) test "$enable_intel_sse" != '';; |
| 86 esac]) |
| 87 -----------------cut---------------- |
| 88 |
| 89 2. Add the following code to Makefile.am under HOST SPECIFIC OPTIONS |
| 90 directly beneath the "if PNG_ARM_NEON ... endif" statement: |
| 91 |
| 92 -----------------cut---------------- |
| 93 if PNG_INTEL_SSE |
| 94 libpng@PNGLIB_MAJOR@@PNGLIB_MINOR@_la_SOURCES += contrib/intel/intel_init.c\ |
| 95 contrib/intel/filter_sse2_intrinsics.c |
| 96 endif |
| 97 -----------------cut---------------- |
| 98 |
| 99 3. Add the following lines to pngpriv.h, following the PNG_ARM_NEON_OPT |
| 100 code: |
| 101 |
| 102 -----------------cut---------------- |
| 103 #ifndef PNG_INTEL_SSE_OPT |
| 104 # ifdef PNG_INTEL_SSE |
| 105 /* Only check for SSE if the build configuration has been modified to |
| 106 * enable SSE optimizations. This means that these optimizations will |
| 107 * be off by default. See contrib/intel for more details. |
| 108 */ |
| 109 # if defined(__SSE4_1__) || defined(__AVX__) || defined(__SSSE3__) || \ |
| 110 defined(__SSE2__) || defined(_M_X64) || defined(_M_AMD64) || \ |
| 111 (defined(_M_IX86_FP) && _M_IX86_FP >= 2) |
| 112 # define PNG_INTEL_SSE_OPT 1 |
| 113 # endif |
| 114 # endif |
| 115 #endif |
| 116 |
| 117 #if PNG_INTEL_SSE_OPT > 0 |
| 118 # ifndef PNG_INTEL_SSE_IMPLEMENTATION |
| 119 # if defined(__SSE4_1__) || defined(__AVX__) |
| 120 /* We are not actually using AVX, but checking for AVX is the best |
| 121 way we can detect SSE4.1 and SSSE3 on MSVC. |
| 122 */ |
| 123 # define PNG_INTEL_SSE_IMPLEMENTATION 3 |
| 124 # elif defined(__SSSE3__) |
| 125 # define PNG_INTEL_SSE_IMPLEMENTATION 2 |
| 126 # elif defined(__SSE2__) || defined(_M_X64) || defined(_M_AMD64) || \ |
| 127 (defined(_M_IX86_FP) && _M_IX86_FP >= 2) |
| 128 # define PNG_INTEL_SSE_IMPLEMENTATION 1 |
| 129 # else |
| 130 # define PNG_INTEL_SSE_IMPLEMENTATION 0 |
| 131 # endif |
| 132 # endif |
| 133 |
| 134 # if PNG_INTEL_SSE_IMPLEMENTATION > 0 |
| 135 # define PNG_FILTER_OPTIMIZATIONS png_init_filter_functions_sse2 |
| 136 # endif |
| 137 #endif |
| 138 |
| 139 -----------------cut---------------- |
| 140 |
| 141 4. Add the following lines to pngpriv.h, following the prototype for |
| 142 png_read_filter_row_paeth4_neon: |
| 143 |
| 144 -----------------cut---------------- |
| 145 PNG_INTERNAL_FUNCTION(void,png_read_filter_row_sub3_sse2,(png_row_infop |
| 146 row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY); |
| 147 PNG_INTERNAL_FUNCTION(void,png_read_filter_row_sub4_sse2,(png_row_infop |
| 148 row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY); |
| 149 PNG_INTERNAL_FUNCTION(void,png_read_filter_row_avg3_sse2,(png_row_infop |
| 150 row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY); |
| 151 PNG_INTERNAL_FUNCTION(void,png_read_filter_row_avg4_sse2,(png_row_infop |
| 152 row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY); |
| 153 PNG_INTERNAL_FUNCTION(void,png_read_filter_row_paeth3_sse2,(png_row_infop |
| 154 row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY); |
| 155 PNG_INTERNAL_FUNCTION(void,png_read_filter_row_paeth4_sse2,(png_row_infop |
| 156 row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY); |
| 157 |
| 158 -----------------cut---------------- |
OLD | NEW |