OLD | NEW |
1 /* | 1 /* |
2 * Copyright 2011 The LibYuv Project Authors. All rights reserved. | 2 * Copyright 2011 The LibYuv Project Authors. All rights reserved. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
9 */ | 9 */ |
10 | 10 |
(...skipping 2066 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2077 EXPECT_EQ(dst_pixels_c[i][0], dst_pixels_opt[i][0]); | 2077 EXPECT_EQ(dst_pixels_c[i][0], dst_pixels_opt[i][0]); |
2078 EXPECT_EQ(dst_pixels_c[i][1], dst_pixels_opt[i][1]); | 2078 EXPECT_EQ(dst_pixels_c[i][1], dst_pixels_opt[i][1]); |
2079 EXPECT_EQ(dst_pixels_c[i][2], dst_pixels_opt[i][2]); | 2079 EXPECT_EQ(dst_pixels_c[i][2], dst_pixels_opt[i][2]); |
2080 EXPECT_EQ(dst_pixels_c[i][3], dst_pixels_opt[i][3]); | 2080 EXPECT_EQ(dst_pixels_c[i][3], dst_pixels_opt[i][3]); |
2081 } | 2081 } |
2082 } | 2082 } |
2083 | 2083 |
2084 int TestHalfFloatPlane(int benchmark_width, int benchmark_height, | 2084 int TestHalfFloatPlane(int benchmark_width, int benchmark_height, |
2085 int benchmark_iterations, | 2085 int benchmark_iterations, |
2086 int disable_cpu_flags, int benchmark_cpu_info, | 2086 int disable_cpu_flags, int benchmark_cpu_info, |
2087 float scale) { | 2087 float scale, int mask) { |
2088 int i, j; | 2088 int i, j; |
2089 const int y_plane_size = benchmark_width * benchmark_height * 2; | 2089 const int y_plane_size = benchmark_width * benchmark_height * 2; |
2090 | 2090 |
2091 align_buffer_page_end(orig_y, y_plane_size); | 2091 align_buffer_page_end(orig_y, y_plane_size * 3); |
2092 align_buffer_page_end(dst_c, y_plane_size); | 2092 uint8* dst_opt = orig_y + y_plane_size; |
2093 align_buffer_page_end(dst_opt, y_plane_size); | 2093 uint8* dst_c = orig_y + y_plane_size * 2; |
| 2094 |
2094 MemRandomize(orig_y, y_plane_size); | 2095 MemRandomize(orig_y, y_plane_size); |
2095 memset(dst_c, 0, y_plane_size); | 2096 memset(dst_c, 0, y_plane_size); |
2096 memset(dst_opt, 1, y_plane_size); | 2097 memset(dst_opt, 1, y_plane_size); |
2097 | 2098 |
| 2099 for (i = 0; i < y_plane_size / 2; ++i) { |
| 2100 reinterpret_cast<uint16*>(orig_y)[i] = static_cast<uint16>(i & mask); |
| 2101 } |
| 2102 |
2098 // Disable all optimizations. | 2103 // Disable all optimizations. |
2099 MaskCpuFlags(disable_cpu_flags); | 2104 MaskCpuFlags(disable_cpu_flags); |
2100 double c_time = get_time(); | 2105 double c_time = get_time(); |
2101 for (j = 0; j < benchmark_iterations; j++) { | 2106 for (j = 0; j < benchmark_iterations; j++) { |
2102 HalfFloatPlane(reinterpret_cast<uint16*>(orig_y), benchmark_width * 2, | 2107 HalfFloatPlane(reinterpret_cast<uint16*>(orig_y), benchmark_width * 2, |
2103 reinterpret_cast<uint16*>(dst_c), benchmark_width * 2, | 2108 reinterpret_cast<uint16*>(dst_c), benchmark_width * 2, |
2104 scale, benchmark_width, benchmark_height); | 2109 scale, benchmark_width, benchmark_height); |
2105 } | 2110 } |
2106 c_time = (get_time() - c_time) / benchmark_iterations; | 2111 c_time = (get_time() - c_time) / benchmark_iterations; |
2107 | 2112 |
2108 // Enable optimizations. | 2113 // Enable optimizations. |
2109 MaskCpuFlags(benchmark_cpu_info); | 2114 MaskCpuFlags(benchmark_cpu_info); |
2110 double opt_time = get_time(); | 2115 double opt_time = get_time(); |
2111 for (j = 0; j < benchmark_iterations; j++) { | 2116 for (j = 0; j < benchmark_iterations; j++) { |
2112 HalfFloatPlane(reinterpret_cast<uint16*>(orig_y), benchmark_width * 2, | 2117 HalfFloatPlane(reinterpret_cast<uint16*>(orig_y), benchmark_width * 2, |
2113 reinterpret_cast<uint16*>(dst_opt), benchmark_width * 2, | 2118 reinterpret_cast<uint16*>(dst_opt), benchmark_width * 2, |
2114 scale, benchmark_width, benchmark_height); | 2119 scale, benchmark_width, benchmark_height); |
2115 } | 2120 } |
2116 opt_time = (get_time() - opt_time) / benchmark_iterations; | 2121 opt_time = (get_time() - opt_time) / benchmark_iterations; |
2117 | 2122 |
2118 int diff = 0; | 2123 int diff = 0; |
2119 for (i = 0; i < y_plane_size; ++i) { | 2124 for (i = 0; i < y_plane_size; ++i) { |
2120 diff = dst_c[i] - dst_opt[i]; | 2125 diff = dst_c[i] - dst_opt[i]; |
2121 if (diff) break; | 2126 if (diff) break; |
2122 } | 2127 } |
2123 | 2128 |
2124 free_aligned_buffer_page_end(orig_y); | 2129 free_aligned_buffer_page_end(orig_y); |
2125 free_aligned_buffer_page_end(dst_c); | |
2126 free_aligned_buffer_page_end(dst_opt); | |
2127 return diff; | 2130 return diff; |
2128 } | 2131 } |
2129 | 2132 |
2130 // 5 bit exponent with bias of 15 will underflow to a denormal if scale causes | 2133 // 5 bit exponent with bias of 15 will underflow to a denormal if scale causes |
2131 // exponent to be less than 0. 15 - log2(65536) = -1/ This shouldnt normally | 2134 // exponent to be less than 0. 15 - log2(65536) = -1/ This shouldnt normally |
2132 // happen since scale is 1/(1<<bits) where bits is 9, 10 or 12. | 2135 // happen since scale is 1/(1<<bits) where bits is 9, 10 or 12. |
2133 TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_denormal) { | 2136 #define MAXHALFDIFF 0 |
| 2137 TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_16bit_denormal) { |
2134 int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_, | 2138 int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_, |
2135 benchmark_iterations_, | 2139 benchmark_iterations_, |
2136 disable_cpu_flags_, benchmark_cpu_info_, | 2140 disable_cpu_flags_, benchmark_cpu_info_, |
2137 1.0f / 65536.0f); | 2141 1.0f / 65536.0f, 65535); |
2138 EXPECT_EQ(diff, 0); | 2142 EXPECT_LE(diff, MAXHALFDIFF); |
| 2143 } |
| 2144 |
| 2145 TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_10bit_Opt) { |
| 2146 int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_, |
| 2147 benchmark_iterations_, |
| 2148 disable_cpu_flags_, benchmark_cpu_info_, |
| 2149 1.0f / 1024.0f, 1023); |
| 2150 EXPECT_LE(diff, MAXHALFDIFF); |
| 2151 } |
| 2152 |
| 2153 TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_9bit_Opt) { |
| 2154 int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_, |
| 2155 benchmark_iterations_, |
| 2156 disable_cpu_flags_, benchmark_cpu_info_, |
| 2157 1.0f / 512.0f, 511); |
| 2158 EXPECT_LE(diff, MAXHALFDIFF); |
2139 } | 2159 } |
2140 | 2160 |
2141 TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_Opt) { | 2161 TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_Opt) { |
2142 int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_, | 2162 int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_, |
2143 benchmark_iterations_, | 2163 benchmark_iterations_, |
2144 disable_cpu_flags_, benchmark_cpu_info_, | 2164 disable_cpu_flags_, benchmark_cpu_info_, |
2145 1.0f / 4096.0f); | 2165 1.0f / 4096.0f, 4095); |
2146 EXPECT_EQ(diff, 0); | 2166 EXPECT_LE(diff, MAXHALFDIFF); |
| 2167 } |
| 2168 |
| 2169 TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_One) { |
| 2170 int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_, |
| 2171 benchmark_iterations_, |
| 2172 disable_cpu_flags_, benchmark_cpu_info_, |
| 2173 1.0f, 4095); |
| 2174 EXPECT_LE(diff, MAXHALFDIFF); |
2147 } | 2175 } |
2148 | 2176 |
2149 TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_Offby1) { | 2177 TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_Offby1) { |
2150 int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_, | 2178 int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_, |
2151 benchmark_iterations_, | 2179 benchmark_iterations_, |
2152 disable_cpu_flags_, benchmark_cpu_info_, | 2180 disable_cpu_flags_, benchmark_cpu_info_, |
2153 1.0f / 1023.0f); | 2181 1.0f / 4095.0f, 4095); |
2154 EXPECT_EQ(diff, 0); | 2182 EXPECT_LE(diff, MAXHALFDIFF); |
2155 } | 2183 } |
2156 | 2184 |
| 2185 |
2157 TEST_F(LibYUVPlanarTest, TestARGBLumaColorTable) { | 2186 TEST_F(LibYUVPlanarTest, TestARGBLumaColorTable) { |
2158 SIMD_ALIGNED(uint8 orig_pixels[1280][4]); | 2187 SIMD_ALIGNED(uint8 orig_pixels[1280][4]); |
2159 SIMD_ALIGNED(uint8 dst_pixels_opt[1280][4]); | 2188 SIMD_ALIGNED(uint8 dst_pixels_opt[1280][4]); |
2160 SIMD_ALIGNED(uint8 dst_pixels_c[1280][4]); | 2189 SIMD_ALIGNED(uint8 dst_pixels_c[1280][4]); |
2161 memset(orig_pixels, 0, sizeof(orig_pixels)); | 2190 memset(orig_pixels, 0, sizeof(orig_pixels)); |
2162 | 2191 |
2163 align_buffer_page_end(lumacolortable, 32768); | 2192 align_buffer_page_end(lumacolortable, 32768); |
2164 int v = 0; | 2193 int v = 0; |
2165 for (int i = 0; i < 32768; ++i) { | 2194 for (int i = 0; i < 32768; ++i) { |
2166 lumacolortable[i] = v; | 2195 lumacolortable[i] = v; |
(...skipping 353 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2520 } | 2549 } |
2521 | 2550 |
2522 free_aligned_buffer_page_end(src_pixels); | 2551 free_aligned_buffer_page_end(src_pixels); |
2523 free_aligned_buffer_page_end(tmp_pixels_u); | 2552 free_aligned_buffer_page_end(tmp_pixels_u); |
2524 free_aligned_buffer_page_end(tmp_pixels_v); | 2553 free_aligned_buffer_page_end(tmp_pixels_v); |
2525 free_aligned_buffer_page_end(dst_pixels_opt); | 2554 free_aligned_buffer_page_end(dst_pixels_opt); |
2526 free_aligned_buffer_page_end(dst_pixels_c); | 2555 free_aligned_buffer_page_end(dst_pixels_c); |
2527 } | 2556 } |
2528 | 2557 |
2529 } // namespace libyuv | 2558 } // namespace libyuv |
OLD | NEW |