OLD | NEW |
1 /* | 1 /* |
2 * Copyright 2011 The LibYuv Project Authors. All rights reserved. | 2 * Copyright 2011 The LibYuv Project Authors. All rights reserved. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
9 */ | 9 */ |
10 | 10 |
(...skipping 2102 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2113 // Enable optimizations. | 2113 // Enable optimizations. |
2114 MaskCpuFlags(benchmark_cpu_info); | 2114 MaskCpuFlags(benchmark_cpu_info); |
2115 double opt_time = get_time(); | 2115 double opt_time = get_time(); |
2116 for (j = 0; j < benchmark_iterations; j++) { | 2116 for (j = 0; j < benchmark_iterations; j++) { |
2117 HalfFloatPlane(reinterpret_cast<uint16*>(orig_y), benchmark_width * 2, | 2117 HalfFloatPlane(reinterpret_cast<uint16*>(orig_y), benchmark_width * 2, |
2118 reinterpret_cast<uint16*>(dst_opt), benchmark_width * 2, | 2118 reinterpret_cast<uint16*>(dst_opt), benchmark_width * 2, |
2119 scale, benchmark_width, benchmark_height); | 2119 scale, benchmark_width, benchmark_height); |
2120 } | 2120 } |
2121 opt_time = (get_time() - opt_time) / benchmark_iterations; | 2121 opt_time = (get_time() - opt_time) / benchmark_iterations; |
2122 | 2122 |
2123 int diff = 0; | 2123 int max_diff = 0; |
2124 for (i = 0; i < y_plane_size; ++i) { | 2124 for (i = 0; i < y_plane_size / 2; ++i) { |
2125 diff = dst_c[i] - dst_opt[i]; | 2125 int abs_diff = |
2126 if (diff) break; | 2126 abs(static_cast<int>(reinterpret_cast<uint16*>(dst_c)[i]) - |
| 2127 static_cast<int>(reinterpret_cast<uint16*>(dst_opt)[i])); |
| 2128 if (abs_diff > max_diff) { |
| 2129 max_diff = abs_diff; |
| 2130 } |
2127 } | 2131 } |
2128 | 2132 |
2129 free_aligned_buffer_page_end(orig_y); | 2133 free_aligned_buffer_page_end(orig_y); |
2130 return diff; | 2134 return max_diff; |
2131 } | 2135 } |
2132 | 2136 |
| 2137 #if defined(__arm__) |
| 2138 static void EnableFlushDenormalToZero(void) { |
| 2139 uint32_t cw; |
| 2140 __asm__ __volatile__ ( |
| 2141 "vmrs %0, fpscr \n" |
| 2142 "orr %0, %0, #0x1000000 \n" |
| 2143 "vmsr fpscr, %0 \n" |
| 2144 : "=r"(cw) :: "memory"); |
| 2145 } |
| 2146 #endif |
| 2147 |
2133 // 5 bit exponent with bias of 15 will underflow to a denormal if scale causes | 2148 // 5 bit exponent with bias of 15 will underflow to a denormal if scale causes |
2134 // exponent to be less than 0. 15 - log2(65536) = -1/ This shouldnt normally | 2149 // exponent to be less than 0. 15 - log2(65536) = -1/ This shouldnt normally |
2135 // happen since scale is 1/(1<<bits) where bits is 9, 10 or 12. | 2150 // happen since scale is 1/(1<<bits) where bits is 9, 10 or 12. |
2136 #define MAXHALFDIFF 0 | 2151 |
2137 TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_16bit_denormal) { | 2152 TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_16bit_denormal) { |
| 2153 // 32 bit arm rounding on denormal case is off by 1 compared to C. |
| 2154 #if defined(__arm__) |
| 2155 EnableFlushDenormalToZero(); |
| 2156 #endif |
2138 int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_, | 2157 int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_, |
2139 benchmark_iterations_, | 2158 benchmark_iterations_, |
2140 disable_cpu_flags_, benchmark_cpu_info_, | 2159 disable_cpu_flags_, benchmark_cpu_info_, |
2141 1.0f / 65536.0f, 65535); | 2160 1.0f / 65536.0f, 65535); |
2142 EXPECT_LE(diff, MAXHALFDIFF); | 2161 EXPECT_EQ(0, diff); |
| 2162 } |
| 2163 |
| 2164 TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_16bit_One) { |
| 2165 int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_, |
| 2166 benchmark_iterations_, |
| 2167 disable_cpu_flags_, benchmark_cpu_info_, |
| 2168 1.0f, 65535); |
| 2169 EXPECT_LE(diff, 1); |
| 2170 } |
| 2171 |
| 2172 TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_16bit_Opt) { |
| 2173 int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_, |
| 2174 benchmark_iterations_, |
| 2175 disable_cpu_flags_, benchmark_cpu_info_, |
| 2176 1.0f / 4096.0f, 65535); |
| 2177 EXPECT_EQ(0, diff); |
2143 } | 2178 } |
2144 | 2179 |
2145 TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_10bit_Opt) { | 2180 TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_10bit_Opt) { |
2146 int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_, | 2181 int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_, |
2147 benchmark_iterations_, | 2182 benchmark_iterations_, |
2148 disable_cpu_flags_, benchmark_cpu_info_, | 2183 disable_cpu_flags_, benchmark_cpu_info_, |
2149 1.0f / 1024.0f, 1023); | 2184 1.0f / 1024.0f, 1023); |
2150 EXPECT_LE(diff, MAXHALFDIFF); | 2185 EXPECT_EQ(0, diff); |
2151 } | 2186 } |
2152 | 2187 |
2153 TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_9bit_Opt) { | 2188 TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_9bit_Opt) { |
2154 int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_, | 2189 int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_, |
2155 benchmark_iterations_, | 2190 benchmark_iterations_, |
2156 disable_cpu_flags_, benchmark_cpu_info_, | 2191 disable_cpu_flags_, benchmark_cpu_info_, |
2157 1.0f / 512.0f, 511); | 2192 1.0f / 512.0f, 511); |
2158 EXPECT_LE(diff, MAXHALFDIFF); | 2193 EXPECT_EQ(0, diff); |
2159 } | 2194 } |
2160 | 2195 |
2161 TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_Opt) { | 2196 TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_Opt) { |
2162 int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_, | 2197 int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_, |
2163 benchmark_iterations_, | 2198 benchmark_iterations_, |
2164 disable_cpu_flags_, benchmark_cpu_info_, | 2199 disable_cpu_flags_, benchmark_cpu_info_, |
2165 1.0f / 4096.0f, 4095); | 2200 1.0f / 4096.0f, 4095); |
2166 EXPECT_LE(diff, MAXHALFDIFF); | 2201 EXPECT_EQ(0, diff); |
2167 } | |
2168 | |
2169 TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_One) { | |
2170 int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_, | |
2171 benchmark_iterations_, | |
2172 disable_cpu_flags_, benchmark_cpu_info_, | |
2173 1.0f, 4095); | |
2174 EXPECT_LE(diff, MAXHALFDIFF); | |
2175 } | 2202 } |
2176 | 2203 |
2177 TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_Offby1) { | 2204 TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_Offby1) { |
2178 int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_, | 2205 int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_, |
2179 benchmark_iterations_, | 2206 benchmark_iterations_, |
2180 disable_cpu_flags_, benchmark_cpu_info_, | 2207 disable_cpu_flags_, benchmark_cpu_info_, |
2181 1.0f / 4095.0f, 4095); | 2208 1.0f / 4095.0f, 4095); |
2182 EXPECT_LE(diff, MAXHALFDIFF); | 2209 EXPECT_EQ(0, diff); |
2183 } | 2210 } |
2184 | 2211 |
| 2212 TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_One) { |
| 2213 int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_, |
| 2214 benchmark_iterations_, |
| 2215 disable_cpu_flags_, benchmark_cpu_info_, |
| 2216 1.0f, 2047); |
| 2217 EXPECT_EQ(0, diff); |
| 2218 } |
| 2219 |
| 2220 TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_12bit_One) { |
| 2221 int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_, |
| 2222 benchmark_iterations_, |
| 2223 disable_cpu_flags_, benchmark_cpu_info_, |
| 2224 1.0f, 4095); |
| 2225 EXPECT_LE(diff, 1); |
| 2226 } |
2185 | 2227 |
2186 TEST_F(LibYUVPlanarTest, TestARGBLumaColorTable) { | 2228 TEST_F(LibYUVPlanarTest, TestARGBLumaColorTable) { |
2187 SIMD_ALIGNED(uint8 orig_pixels[1280][4]); | 2229 SIMD_ALIGNED(uint8 orig_pixels[1280][4]); |
2188 SIMD_ALIGNED(uint8 dst_pixels_opt[1280][4]); | 2230 SIMD_ALIGNED(uint8 dst_pixels_opt[1280][4]); |
2189 SIMD_ALIGNED(uint8 dst_pixels_c[1280][4]); | 2231 SIMD_ALIGNED(uint8 dst_pixels_c[1280][4]); |
2190 memset(orig_pixels, 0, sizeof(orig_pixels)); | 2232 memset(orig_pixels, 0, sizeof(orig_pixels)); |
2191 | 2233 |
2192 align_buffer_page_end(lumacolortable, 32768); | 2234 align_buffer_page_end(lumacolortable, 32768); |
2193 int v = 0; | 2235 int v = 0; |
2194 for (int i = 0; i < 32768; ++i) { | 2236 for (int i = 0; i < 32768; ++i) { |
(...skipping 354 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2549 } | 2591 } |
2550 | 2592 |
2551 free_aligned_buffer_page_end(src_pixels); | 2593 free_aligned_buffer_page_end(src_pixels); |
2552 free_aligned_buffer_page_end(tmp_pixels_u); | 2594 free_aligned_buffer_page_end(tmp_pixels_u); |
2553 free_aligned_buffer_page_end(tmp_pixels_v); | 2595 free_aligned_buffer_page_end(tmp_pixels_v); |
2554 free_aligned_buffer_page_end(dst_pixels_opt); | 2596 free_aligned_buffer_page_end(dst_pixels_opt); |
2555 free_aligned_buffer_page_end(dst_pixels_c); | 2597 free_aligned_buffer_page_end(dst_pixels_c); |
2556 } | 2598 } |
2557 | 2599 |
2558 } // namespace libyuv | 2600 } // namespace libyuv |
OLD | NEW |