| OLD | NEW |
| 1 /* | 1 /* |
| 2 * Copyright 2011 The LibYuv Project Authors. All rights reserved. | 2 * Copyright 2011 The LibYuv Project Authors. All rights reserved. |
| 3 * | 3 * |
| 4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
| 5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
| 6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
| 7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
| 8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
| 9 */ | 9 */ |
| 10 | 10 |
| (...skipping 2102 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 2113 // Enable optimizations. | 2113 // Enable optimizations. |
| 2114 MaskCpuFlags(benchmark_cpu_info); | 2114 MaskCpuFlags(benchmark_cpu_info); |
| 2115 double opt_time = get_time(); | 2115 double opt_time = get_time(); |
| 2116 for (j = 0; j < benchmark_iterations; j++) { | 2116 for (j = 0; j < benchmark_iterations; j++) { |
| 2117 HalfFloatPlane(reinterpret_cast<uint16*>(orig_y), benchmark_width * 2, | 2117 HalfFloatPlane(reinterpret_cast<uint16*>(orig_y), benchmark_width * 2, |
| 2118 reinterpret_cast<uint16*>(dst_opt), benchmark_width * 2, | 2118 reinterpret_cast<uint16*>(dst_opt), benchmark_width * 2, |
| 2119 scale, benchmark_width, benchmark_height); | 2119 scale, benchmark_width, benchmark_height); |
| 2120 } | 2120 } |
| 2121 opt_time = (get_time() - opt_time) / benchmark_iterations; | 2121 opt_time = (get_time() - opt_time) / benchmark_iterations; |
| 2122 | 2122 |
| 2123 int diff = 0; | 2123 int max_diff = 0; |
| 2124 for (i = 0; i < y_plane_size; ++i) { | 2124 for (i = 0; i < y_plane_size / 2; ++i) { |
| 2125 diff = dst_c[i] - dst_opt[i]; | 2125 int abs_diff = |
| 2126 if (diff) break; | 2126 abs(static_cast<int>(reinterpret_cast<uint16*>(dst_c)[i]) - |
| 2127 static_cast<int>(reinterpret_cast<uint16*>(dst_opt)[i])); |
| 2128 if (abs_diff > max_diff) { |
| 2129 max_diff = abs_diff; |
| 2130 } |
| 2127 } | 2131 } |
| 2128 | 2132 |
| 2129 free_aligned_buffer_page_end(orig_y); | 2133 free_aligned_buffer_page_end(orig_y); |
| 2130 return diff; | 2134 return max_diff; |
| 2131 } | 2135 } |
| 2132 | 2136 |
| 2137 #if defined(__arm__) |
| 2138 static void EnableFlushDenormalToZero(void) { |
| 2139 uint32_t cw; |
| 2140 __asm__ __volatile__ ( |
| 2141 "vmrs %0, fpscr \n" |
| 2142 "orr %0, %0, #0x1000000 \n" |
| 2143 "vmsr fpscr, %0 \n" |
| 2144 : "=r"(cw) :: "memory"); |
| 2145 } |
| 2146 #endif |
| 2147 |
| 2133 // 5 bit exponent with bias of 15 will underflow to a denormal if scale causes | 2148 // 5 bit exponent with bias of 15 will underflow to a denormal if scale causes |
| 2134 // exponent to be less than 0. 15 - log2(65536) = -1/ This shouldnt normally | 2149 // exponent to be less than 0. 15 - log2(65536) = -1/ This shouldnt normally |
| 2135 // happen since scale is 1/(1<<bits) where bits is 9, 10 or 12. | 2150 // happen since scale is 1/(1<<bits) where bits is 9, 10 or 12. |
| 2136 #define MAXHALFDIFF 0 | 2151 |
| 2137 TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_16bit_denormal) { | 2152 TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_16bit_denormal) { |
| 2153 // 32 bit arm rounding on denormal case is off by 1 compared to C. |
| 2154 #if defined(__arm__) |
| 2155 EnableFlushDenormalToZero(); |
| 2156 #endif |
| 2138 int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_, | 2157 int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_, |
| 2139 benchmark_iterations_, | 2158 benchmark_iterations_, |
| 2140 disable_cpu_flags_, benchmark_cpu_info_, | 2159 disable_cpu_flags_, benchmark_cpu_info_, |
| 2141 1.0f / 65536.0f, 65535); | 2160 1.0f / 65536.0f, 65535); |
| 2142 EXPECT_LE(diff, MAXHALFDIFF); | 2161 EXPECT_EQ(0, diff); |
| 2162 } |
| 2163 |
| 2164 TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_16bit_One) { |
| 2165 int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_, |
| 2166 benchmark_iterations_, |
| 2167 disable_cpu_flags_, benchmark_cpu_info_, |
| 2168 1.0f, 65535); |
| 2169 EXPECT_LE(diff, 1); |
| 2170 } |
| 2171 |
| 2172 TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_16bit_Opt) { |
| 2173 int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_, |
| 2174 benchmark_iterations_, |
| 2175 disable_cpu_flags_, benchmark_cpu_info_, |
| 2176 1.0f / 4096.0f, 65535); |
| 2177 EXPECT_EQ(0, diff); |
| 2143 } | 2178 } |
| 2144 | 2179 |
| 2145 TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_10bit_Opt) { | 2180 TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_10bit_Opt) { |
| 2146 int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_, | 2181 int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_, |
| 2147 benchmark_iterations_, | 2182 benchmark_iterations_, |
| 2148 disable_cpu_flags_, benchmark_cpu_info_, | 2183 disable_cpu_flags_, benchmark_cpu_info_, |
| 2149 1.0f / 1024.0f, 1023); | 2184 1.0f / 1024.0f, 1023); |
| 2150 EXPECT_LE(diff, MAXHALFDIFF); | 2185 EXPECT_EQ(0, diff); |
| 2151 } | 2186 } |
| 2152 | 2187 |
| 2153 TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_9bit_Opt) { | 2188 TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_9bit_Opt) { |
| 2154 int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_, | 2189 int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_, |
| 2155 benchmark_iterations_, | 2190 benchmark_iterations_, |
| 2156 disable_cpu_flags_, benchmark_cpu_info_, | 2191 disable_cpu_flags_, benchmark_cpu_info_, |
| 2157 1.0f / 512.0f, 511); | 2192 1.0f / 512.0f, 511); |
| 2158 EXPECT_LE(diff, MAXHALFDIFF); | 2193 EXPECT_EQ(0, diff); |
| 2159 } | 2194 } |
| 2160 | 2195 |
| 2161 TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_Opt) { | 2196 TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_Opt) { |
| 2162 int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_, | 2197 int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_, |
| 2163 benchmark_iterations_, | 2198 benchmark_iterations_, |
| 2164 disable_cpu_flags_, benchmark_cpu_info_, | 2199 disable_cpu_flags_, benchmark_cpu_info_, |
| 2165 1.0f / 4096.0f, 4095); | 2200 1.0f / 4096.0f, 4095); |
| 2166 EXPECT_LE(diff, MAXHALFDIFF); | 2201 EXPECT_EQ(0, diff); |
| 2167 } | |
| 2168 | |
| 2169 TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_One) { | |
| 2170 int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_, | |
| 2171 benchmark_iterations_, | |
| 2172 disable_cpu_flags_, benchmark_cpu_info_, | |
| 2173 1.0f, 4095); | |
| 2174 EXPECT_LE(diff, MAXHALFDIFF); | |
| 2175 } | 2202 } |
| 2176 | 2203 |
| 2177 TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_Offby1) { | 2204 TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_Offby1) { |
| 2178 int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_, | 2205 int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_, |
| 2179 benchmark_iterations_, | 2206 benchmark_iterations_, |
| 2180 disable_cpu_flags_, benchmark_cpu_info_, | 2207 disable_cpu_flags_, benchmark_cpu_info_, |
| 2181 1.0f / 4095.0f, 4095); | 2208 1.0f / 4095.0f, 4095); |
| 2182 EXPECT_LE(diff, MAXHALFDIFF); | 2209 EXPECT_EQ(0, diff); |
| 2183 } | 2210 } |
| 2184 | 2211 |
| 2212 TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_One) { |
| 2213 int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_, |
| 2214 benchmark_iterations_, |
| 2215 disable_cpu_flags_, benchmark_cpu_info_, |
| 2216 1.0f, 2047); |
| 2217 EXPECT_EQ(0, diff); |
| 2218 } |
| 2219 |
| 2220 TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_12bit_One) { |
| 2221 int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_, |
| 2222 benchmark_iterations_, |
| 2223 disable_cpu_flags_, benchmark_cpu_info_, |
| 2224 1.0f, 4095); |
| 2225 EXPECT_LE(diff, 1); |
| 2226 } |
| 2185 | 2227 |
| 2186 TEST_F(LibYUVPlanarTest, TestARGBLumaColorTable) { | 2228 TEST_F(LibYUVPlanarTest, TestARGBLumaColorTable) { |
| 2187 SIMD_ALIGNED(uint8 orig_pixels[1280][4]); | 2229 SIMD_ALIGNED(uint8 orig_pixels[1280][4]); |
| 2188 SIMD_ALIGNED(uint8 dst_pixels_opt[1280][4]); | 2230 SIMD_ALIGNED(uint8 dst_pixels_opt[1280][4]); |
| 2189 SIMD_ALIGNED(uint8 dst_pixels_c[1280][4]); | 2231 SIMD_ALIGNED(uint8 dst_pixels_c[1280][4]); |
| 2190 memset(orig_pixels, 0, sizeof(orig_pixels)); | 2232 memset(orig_pixels, 0, sizeof(orig_pixels)); |
| 2191 | 2233 |
| 2192 align_buffer_page_end(lumacolortable, 32768); | 2234 align_buffer_page_end(lumacolortable, 32768); |
| 2193 int v = 0; | 2235 int v = 0; |
| 2194 for (int i = 0; i < 32768; ++i) { | 2236 for (int i = 0; i < 32768; ++i) { |
| (...skipping 354 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 2549 } | 2591 } |
| 2550 | 2592 |
| 2551 free_aligned_buffer_page_end(src_pixels); | 2593 free_aligned_buffer_page_end(src_pixels); |
| 2552 free_aligned_buffer_page_end(tmp_pixels_u); | 2594 free_aligned_buffer_page_end(tmp_pixels_u); |
| 2553 free_aligned_buffer_page_end(tmp_pixels_v); | 2595 free_aligned_buffer_page_end(tmp_pixels_v); |
| 2554 free_aligned_buffer_page_end(dst_pixels_opt); | 2596 free_aligned_buffer_page_end(dst_pixels_opt); |
| 2555 free_aligned_buffer_page_end(dst_pixels_c); | 2597 free_aligned_buffer_page_end(dst_pixels_c); |
| 2556 } | 2598 } |
| 2557 | 2599 |
| 2558 } // namespace libyuv | 2600 } // namespace libyuv |
| OLD | NEW |