Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(651)

Side by Side Diff: unit_test/planar_test.cc

Issue 2478313004: HalfFloat neon armv7 fix for destination pointer. (Closed)
Patch Set: added caveat on rounding difference Created 4 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « source/row_neon64.cc ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 * Copyright 2011 The LibYuv Project Authors. All rights reserved. 2 * Copyright 2011 The LibYuv Project Authors. All rights reserved.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license 4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source 5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found 6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may 7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree. 8 * be found in the AUTHORS file in the root of the source tree.
9 */ 9 */
10 10
(...skipping 2102 matching lines...) Expand 10 before | Expand all | Expand 10 after
2113 // Enable optimizations. 2113 // Enable optimizations.
2114 MaskCpuFlags(benchmark_cpu_info); 2114 MaskCpuFlags(benchmark_cpu_info);
2115 double opt_time = get_time(); 2115 double opt_time = get_time();
2116 for (j = 0; j < benchmark_iterations; j++) { 2116 for (j = 0; j < benchmark_iterations; j++) {
2117 HalfFloatPlane(reinterpret_cast<uint16*>(orig_y), benchmark_width * 2, 2117 HalfFloatPlane(reinterpret_cast<uint16*>(orig_y), benchmark_width * 2,
2118 reinterpret_cast<uint16*>(dst_opt), benchmark_width * 2, 2118 reinterpret_cast<uint16*>(dst_opt), benchmark_width * 2,
2119 scale, benchmark_width, benchmark_height); 2119 scale, benchmark_width, benchmark_height);
2120 } 2120 }
2121 opt_time = (get_time() - opt_time) / benchmark_iterations; 2121 opt_time = (get_time() - opt_time) / benchmark_iterations;
2122 2122
2123 int diff = 0; 2123 int max_diff = 0;
2124 for (i = 0; i < y_plane_size; ++i) { 2124 for (i = 0; i < y_plane_size / 2; ++i) {
2125 diff = dst_c[i] - dst_opt[i]; 2125 int abs_diff =
2126 if (diff) break; 2126 abs(static_cast<int>(reinterpret_cast<uint16*>(dst_c)[i]) -
2127 static_cast<int>(reinterpret_cast<uint16*>(dst_opt)[i]));
2128 if (abs_diff > max_diff) {
2129 max_diff = abs_diff;
2130 }
2127 } 2131 }
2128 2132
2129 free_aligned_buffer_page_end(orig_y); 2133 free_aligned_buffer_page_end(orig_y);
2130 return diff; 2134 return max_diff;
2131 } 2135 }
2132 2136
2137 #if defined(__arm__)
2138 static void EnableFlushDenormalToZero(void) {
2139 uint32_t cw;
2140 __asm__ __volatile__ (
2141 "vmrs %0, fpscr \n"
2142 "orr %0, %0, #0x1000000 \n"
2143 "vmsr fpscr, %0 \n"
2144 : "=r"(cw) :: "memory");
2145 }
2146 #endif
2147
2133 // 5 bit exponent with bias of 15 will underflow to a denormal if scale causes 2148 // 5 bit exponent with bias of 15 will underflow to a denormal if scale causes
2134 // exponent to be less than 0. 15 - log2(65536) = -1/ This shouldnt normally 2149 // exponent to be less than 0. 15 - log2(65536) = -1/ This shouldnt normally
2135 // happen since scale is 1/(1<<bits) where bits is 9, 10 or 12. 2150 // happen since scale is 1/(1<<bits) where bits is 9, 10 or 12.
2136 #define MAXHALFDIFF 0 2151
2137 TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_16bit_denormal) { 2152 TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_16bit_denormal) {
2153 // 32 bit arm rounding on denormal case is off by 1 compared to C.
2154 #if defined(__arm__)
2155 EnableFlushDenormalToZero();
2156 #endif
2138 int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_, 2157 int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_,
2139 benchmark_iterations_, 2158 benchmark_iterations_,
2140 disable_cpu_flags_, benchmark_cpu_info_, 2159 disable_cpu_flags_, benchmark_cpu_info_,
2141 1.0f / 65536.0f, 65535); 2160 1.0f / 65536.0f, 65535);
2142 EXPECT_LE(diff, MAXHALFDIFF); 2161 EXPECT_EQ(0, diff);
2162 }
2163
2164 TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_16bit_One) {
2165 int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_,
2166 benchmark_iterations_,
2167 disable_cpu_flags_, benchmark_cpu_info_,
2168 1.0f, 65535);
2169 EXPECT_LE(diff, 1);
2170 }
2171
2172 TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_16bit_Opt) {
2173 int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_,
2174 benchmark_iterations_,
2175 disable_cpu_flags_, benchmark_cpu_info_,
2176 1.0f / 4096.0f, 65535);
2177 EXPECT_EQ(0, diff);
2143 } 2178 }
2144 2179
2145 TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_10bit_Opt) { 2180 TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_10bit_Opt) {
2146 int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_, 2181 int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_,
2147 benchmark_iterations_, 2182 benchmark_iterations_,
2148 disable_cpu_flags_, benchmark_cpu_info_, 2183 disable_cpu_flags_, benchmark_cpu_info_,
2149 1.0f / 1024.0f, 1023); 2184 1.0f / 1024.0f, 1023);
2150 EXPECT_LE(diff, MAXHALFDIFF); 2185 EXPECT_EQ(0, diff);
2151 } 2186 }
2152 2187
2153 TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_9bit_Opt) { 2188 TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_9bit_Opt) {
2154 int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_, 2189 int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_,
2155 benchmark_iterations_, 2190 benchmark_iterations_,
2156 disable_cpu_flags_, benchmark_cpu_info_, 2191 disable_cpu_flags_, benchmark_cpu_info_,
2157 1.0f / 512.0f, 511); 2192 1.0f / 512.0f, 511);
2158 EXPECT_LE(diff, MAXHALFDIFF); 2193 EXPECT_EQ(0, diff);
2159 } 2194 }
2160 2195
2161 TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_Opt) { 2196 TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_Opt) {
2162 int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_, 2197 int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_,
2163 benchmark_iterations_, 2198 benchmark_iterations_,
2164 disable_cpu_flags_, benchmark_cpu_info_, 2199 disable_cpu_flags_, benchmark_cpu_info_,
2165 1.0f / 4096.0f, 4095); 2200 1.0f / 4096.0f, 4095);
2166 EXPECT_LE(diff, MAXHALFDIFF); 2201 EXPECT_EQ(0, diff);
2167 }
2168
2169 TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_One) {
2170 int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_,
2171 benchmark_iterations_,
2172 disable_cpu_flags_, benchmark_cpu_info_,
2173 1.0f, 4095);
2174 EXPECT_LE(diff, MAXHALFDIFF);
2175 } 2202 }
2176 2203
2177 TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_Offby1) { 2204 TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_Offby1) {
2178 int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_, 2205 int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_,
2179 benchmark_iterations_, 2206 benchmark_iterations_,
2180 disable_cpu_flags_, benchmark_cpu_info_, 2207 disable_cpu_flags_, benchmark_cpu_info_,
2181 1.0f / 4095.0f, 4095); 2208 1.0f / 4095.0f, 4095);
2182 EXPECT_LE(diff, MAXHALFDIFF); 2209 EXPECT_EQ(0, diff);
2183 } 2210 }
2184 2211
2212 TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_One) {
2213 int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_,
2214 benchmark_iterations_,
2215 disable_cpu_flags_, benchmark_cpu_info_,
2216 1.0f, 2047);
2217 EXPECT_EQ(0, diff);
2218 }
2219
2220 TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_12bit_One) {
2221 int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_,
2222 benchmark_iterations_,
2223 disable_cpu_flags_, benchmark_cpu_info_,
2224 1.0f, 4095);
2225 EXPECT_LE(diff, 1);
2226 }
2185 2227
2186 TEST_F(LibYUVPlanarTest, TestARGBLumaColorTable) { 2228 TEST_F(LibYUVPlanarTest, TestARGBLumaColorTable) {
2187 SIMD_ALIGNED(uint8 orig_pixels[1280][4]); 2229 SIMD_ALIGNED(uint8 orig_pixels[1280][4]);
2188 SIMD_ALIGNED(uint8 dst_pixels_opt[1280][4]); 2230 SIMD_ALIGNED(uint8 dst_pixels_opt[1280][4]);
2189 SIMD_ALIGNED(uint8 dst_pixels_c[1280][4]); 2231 SIMD_ALIGNED(uint8 dst_pixels_c[1280][4]);
2190 memset(orig_pixels, 0, sizeof(orig_pixels)); 2232 memset(orig_pixels, 0, sizeof(orig_pixels));
2191 2233
2192 align_buffer_page_end(lumacolortable, 32768); 2234 align_buffer_page_end(lumacolortable, 32768);
2193 int v = 0; 2235 int v = 0;
2194 for (int i = 0; i < 32768; ++i) { 2236 for (int i = 0; i < 32768; ++i) {
(...skipping 354 matching lines...) Expand 10 before | Expand all | Expand 10 after
2549 } 2591 }
2550 2592
2551 free_aligned_buffer_page_end(src_pixels); 2593 free_aligned_buffer_page_end(src_pixels);
2552 free_aligned_buffer_page_end(tmp_pixels_u); 2594 free_aligned_buffer_page_end(tmp_pixels_u);
2553 free_aligned_buffer_page_end(tmp_pixels_v); 2595 free_aligned_buffer_page_end(tmp_pixels_v);
2554 free_aligned_buffer_page_end(dst_pixels_opt); 2596 free_aligned_buffer_page_end(dst_pixels_opt);
2555 free_aligned_buffer_page_end(dst_pixels_c); 2597 free_aligned_buffer_page_end(dst_pixels_c);
2556 } 2598 }
2557 2599
2558 } // namespace libyuv 2600 } // namespace libyuv
OLDNEW
« no previous file with comments | « source/row_neon64.cc ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698