| OLD | NEW |
| 1 /* | 1 /* |
| 2 * Copyright 2011 The LibYuv Project Authors. All rights reserved. | 2 * Copyright 2011 The LibYuv Project Authors. All rights reserved. |
| 3 * | 3 * |
| 4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
| 5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
| 6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
| 7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
| 8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
| 9 */ | 9 */ |
| 10 | 10 |
| (...skipping 2066 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 2077 EXPECT_EQ(dst_pixels_c[i][0], dst_pixels_opt[i][0]); | 2077 EXPECT_EQ(dst_pixels_c[i][0], dst_pixels_opt[i][0]); |
| 2078 EXPECT_EQ(dst_pixels_c[i][1], dst_pixels_opt[i][1]); | 2078 EXPECT_EQ(dst_pixels_c[i][1], dst_pixels_opt[i][1]); |
| 2079 EXPECT_EQ(dst_pixels_c[i][2], dst_pixels_opt[i][2]); | 2079 EXPECT_EQ(dst_pixels_c[i][2], dst_pixels_opt[i][2]); |
| 2080 EXPECT_EQ(dst_pixels_c[i][3], dst_pixels_opt[i][3]); | 2080 EXPECT_EQ(dst_pixels_c[i][3], dst_pixels_opt[i][3]); |
| 2081 } | 2081 } |
| 2082 } | 2082 } |
| 2083 | 2083 |
| 2084 int TestHalfFloatPlane(int benchmark_width, int benchmark_height, | 2084 int TestHalfFloatPlane(int benchmark_width, int benchmark_height, |
| 2085 int benchmark_iterations, | 2085 int benchmark_iterations, |
| 2086 int disable_cpu_flags, int benchmark_cpu_info, | 2086 int disable_cpu_flags, int benchmark_cpu_info, |
| 2087 float scale) { | 2087 float scale, int mask) { |
| 2088 int i, j; | 2088 int i, j; |
| 2089 const int y_plane_size = benchmark_width * benchmark_height * 2; | 2089 const int y_plane_size = benchmark_width * benchmark_height * 2; |
| 2090 | 2090 |
| 2091 align_buffer_page_end(orig_y, y_plane_size); | 2091 align_buffer_page_end(orig_y, y_plane_size * 3); |
| 2092 align_buffer_page_end(dst_c, y_plane_size); | 2092 uint8* dst_opt = orig_y + y_plane_size; |
| 2093 align_buffer_page_end(dst_opt, y_plane_size); | 2093 uint8* dst_c = orig_y + y_plane_size * 2; |
| 2094 |
| 2094 MemRandomize(orig_y, y_plane_size); | 2095 MemRandomize(orig_y, y_plane_size); |
| 2095 memset(dst_c, 0, y_plane_size); | 2096 memset(dst_c, 0, y_plane_size); |
| 2096 memset(dst_opt, 1, y_plane_size); | 2097 memset(dst_opt, 1, y_plane_size); |
| 2097 | 2098 |
| 2099 for (i = 0; i < y_plane_size / 2; ++i) { |
| 2100 reinterpret_cast<uint16*>(orig_y)[i] = static_cast<uint16>(i & mask); |
| 2101 } |
| 2102 |
| 2098 // Disable all optimizations. | 2103 // Disable all optimizations. |
| 2099 MaskCpuFlags(disable_cpu_flags); | 2104 MaskCpuFlags(disable_cpu_flags); |
| 2100 double c_time = get_time(); | 2105 double c_time = get_time(); |
| 2101 for (j = 0; j < benchmark_iterations; j++) { | 2106 for (j = 0; j < benchmark_iterations; j++) { |
| 2102 HalfFloatPlane(reinterpret_cast<uint16*>(orig_y), benchmark_width * 2, | 2107 HalfFloatPlane(reinterpret_cast<uint16*>(orig_y), benchmark_width * 2, |
| 2103 reinterpret_cast<uint16*>(dst_c), benchmark_width * 2, | 2108 reinterpret_cast<uint16*>(dst_c), benchmark_width * 2, |
| 2104 scale, benchmark_width, benchmark_height); | 2109 scale, benchmark_width, benchmark_height); |
| 2105 } | 2110 } |
| 2106 c_time = (get_time() - c_time) / benchmark_iterations; | 2111 c_time = (get_time() - c_time) / benchmark_iterations; |
| 2107 | 2112 |
| 2108 // Enable optimizations. | 2113 // Enable optimizations. |
| 2109 MaskCpuFlags(benchmark_cpu_info); | 2114 MaskCpuFlags(benchmark_cpu_info); |
| 2110 double opt_time = get_time(); | 2115 double opt_time = get_time(); |
| 2111 for (j = 0; j < benchmark_iterations; j++) { | 2116 for (j = 0; j < benchmark_iterations; j++) { |
| 2112 HalfFloatPlane(reinterpret_cast<uint16*>(orig_y), benchmark_width * 2, | 2117 HalfFloatPlane(reinterpret_cast<uint16*>(orig_y), benchmark_width * 2, |
| 2113 reinterpret_cast<uint16*>(dst_opt), benchmark_width * 2, | 2118 reinterpret_cast<uint16*>(dst_opt), benchmark_width * 2, |
| 2114 scale, benchmark_width, benchmark_height); | 2119 scale, benchmark_width, benchmark_height); |
| 2115 } | 2120 } |
| 2116 opt_time = (get_time() - opt_time) / benchmark_iterations; | 2121 opt_time = (get_time() - opt_time) / benchmark_iterations; |
| 2117 | 2122 |
| 2118 int diff = 0; | 2123 int diff = 0; |
| 2119 for (i = 0; i < y_plane_size; ++i) { | 2124 for (i = 0; i < y_plane_size; ++i) { |
| 2120 diff = dst_c[i] - dst_opt[i]; | 2125 diff = dst_c[i] - dst_opt[i]; |
| 2121 if (diff) break; | 2126 if (diff) break; |
| 2122 } | 2127 } |
| 2123 | 2128 |
| 2124 free_aligned_buffer_page_end(orig_y); | 2129 free_aligned_buffer_page_end(orig_y); |
| 2125 free_aligned_buffer_page_end(dst_c); | |
| 2126 free_aligned_buffer_page_end(dst_opt); | |
| 2127 return diff; | 2130 return diff; |
| 2128 } | 2131 } |
| 2129 | 2132 |
| 2130 // 5 bit exponent with bias of 15 will underflow to a denormal if scale causes | 2133 // 5 bit exponent with bias of 15 will underflow to a denormal if scale causes |
| 2131 // exponent to be less than 0. 15 - log2(65536) = -1/ This shouldnt normally | 2134 // exponent to be less than 0. 15 - log2(65536) = -1/ This shouldnt normally |
| 2132 // happen since scale is 1/(1<<bits) where bits is 9, 10 or 12. | 2135 // happen since scale is 1/(1<<bits) where bits is 9, 10 or 12. |
| 2133 TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_denormal) { | 2136 #define MAXHALFDIFF 0 |
| 2137 TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_16bit_denormal) { |
| 2134 int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_, | 2138 int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_, |
| 2135 benchmark_iterations_, | 2139 benchmark_iterations_, |
| 2136 disable_cpu_flags_, benchmark_cpu_info_, | 2140 disable_cpu_flags_, benchmark_cpu_info_, |
| 2137 1.0f / 65536.0f); | 2141 1.0f / 65536.0f, 65535); |
| 2138 EXPECT_EQ(diff, 0); | 2142 EXPECT_LE(diff, MAXHALFDIFF); |
| 2143 } |
| 2144 |
| 2145 TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_10bit_Opt) { |
| 2146 int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_, |
| 2147 benchmark_iterations_, |
| 2148 disable_cpu_flags_, benchmark_cpu_info_, |
| 2149 1.0f / 1024.0f, 1023); |
| 2150 EXPECT_LE(diff, MAXHALFDIFF); |
| 2151 } |
| 2152 |
| 2153 TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_9bit_Opt) { |
| 2154 int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_, |
| 2155 benchmark_iterations_, |
| 2156 disable_cpu_flags_, benchmark_cpu_info_, |
| 2157 1.0f / 512.0f, 511); |
| 2158 EXPECT_LE(diff, MAXHALFDIFF); |
| 2139 } | 2159 } |
| 2140 | 2160 |
| 2141 TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_Opt) { | 2161 TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_Opt) { |
| 2142 int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_, | 2162 int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_, |
| 2143 benchmark_iterations_, | 2163 benchmark_iterations_, |
| 2144 disable_cpu_flags_, benchmark_cpu_info_, | 2164 disable_cpu_flags_, benchmark_cpu_info_, |
| 2145 1.0f / 4096.0f); | 2165 1.0f / 4096.0f, 4095); |
| 2146 EXPECT_EQ(diff, 0); | 2166 EXPECT_LE(diff, MAXHALFDIFF); |
| 2167 } |
| 2168 |
| 2169 TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_One) { |
| 2170 int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_, |
| 2171 benchmark_iterations_, |
| 2172 disable_cpu_flags_, benchmark_cpu_info_, |
| 2173 1.0f, 4095); |
| 2174 EXPECT_LE(diff, MAXHALFDIFF); |
| 2147 } | 2175 } |
| 2148 | 2176 |
| 2149 TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_Offby1) { | 2177 TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_Offby1) { |
| 2150 int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_, | 2178 int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_, |
| 2151 benchmark_iterations_, | 2179 benchmark_iterations_, |
| 2152 disable_cpu_flags_, benchmark_cpu_info_, | 2180 disable_cpu_flags_, benchmark_cpu_info_, |
| 2153 1.0f / 1023.0f); | 2181 1.0f / 4095.0f, 4095); |
| 2154 EXPECT_EQ(diff, 0); | 2182 EXPECT_LE(diff, MAXHALFDIFF); |
| 2155 } | 2183 } |
| 2156 | 2184 |
| 2185 |
| 2157 TEST_F(LibYUVPlanarTest, TestARGBLumaColorTable) { | 2186 TEST_F(LibYUVPlanarTest, TestARGBLumaColorTable) { |
| 2158 SIMD_ALIGNED(uint8 orig_pixels[1280][4]); | 2187 SIMD_ALIGNED(uint8 orig_pixels[1280][4]); |
| 2159 SIMD_ALIGNED(uint8 dst_pixels_opt[1280][4]); | 2188 SIMD_ALIGNED(uint8 dst_pixels_opt[1280][4]); |
| 2160 SIMD_ALIGNED(uint8 dst_pixels_c[1280][4]); | 2189 SIMD_ALIGNED(uint8 dst_pixels_c[1280][4]); |
| 2161 memset(orig_pixels, 0, sizeof(orig_pixels)); | 2190 memset(orig_pixels, 0, sizeof(orig_pixels)); |
| 2162 | 2191 |
| 2163 align_buffer_page_end(lumacolortable, 32768); | 2192 align_buffer_page_end(lumacolortable, 32768); |
| 2164 int v = 0; | 2193 int v = 0; |
| 2165 for (int i = 0; i < 32768; ++i) { | 2194 for (int i = 0; i < 32768; ++i) { |
| 2166 lumacolortable[i] = v; | 2195 lumacolortable[i] = v; |
| (...skipping 353 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 2520 } | 2549 } |
| 2521 | 2550 |
| 2522 free_aligned_buffer_page_end(src_pixels); | 2551 free_aligned_buffer_page_end(src_pixels); |
| 2523 free_aligned_buffer_page_end(tmp_pixels_u); | 2552 free_aligned_buffer_page_end(tmp_pixels_u); |
| 2524 free_aligned_buffer_page_end(tmp_pixels_v); | 2553 free_aligned_buffer_page_end(tmp_pixels_v); |
| 2525 free_aligned_buffer_page_end(dst_pixels_opt); | 2554 free_aligned_buffer_page_end(dst_pixels_opt); |
| 2526 free_aligned_buffer_page_end(dst_pixels_c); | 2555 free_aligned_buffer_page_end(dst_pixels_c); |
| 2527 } | 2556 } |
| 2528 | 2557 |
| 2529 } // namespace libyuv | 2558 } // namespace libyuv |
| OLD | NEW |