unit_test/planar_test.cc - Issue 2478313004: HalfFloat neon armv7 fix for destination pointer.

Side by Side Diff: unit_test/planar_test.cc

Issue 2478313004: HalfFloat neon armv7 fix for destination pointer. (Closed)

Patch Set: added caveat on rounding difference Created 4 years, 1 month ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
1 /*	1 /*

2 * Copyright 2011 The LibYuv Project Authors. All rights reserved.	2 * Copyright 2011 The LibYuv Project Authors. All rights reserved.

3 *	3 *

4 * Use of this source code is governed by a BSD-style license	4 * Use of this source code is governed by a BSD-style license

5 * that can be found in the LICENSE file in the root of the source	5 * that can be found in the LICENSE file in the root of the source

6 * tree. An additional intellectual property rights grant can be found	6 * tree. An additional intellectual property rights grant can be found

7 * in the file PATENTS. All contributing project authors may	7 * in the file PATENTS. All contributing project authors may

8 * be found in the AUTHORS file in the root of the source tree.	8 * be found in the AUTHORS file in the root of the source tree.

9 */	9 */

10	10

(...skipping 2102 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
2113 // Enable optimizations.	2113 // Enable optimizations.

2114 MaskCpuFlags(benchmark_cpu_info);	2114 MaskCpuFlags(benchmark_cpu_info);

2115 double opt_time = get_time();	2115 double opt_time = get_time();

2116 for (j = 0; j < benchmark_iterations; j++) {	2116 for (j = 0; j < benchmark_iterations; j++) {

2117 HalfFloatPlane(reinterpret_cast<uint16>(orig_y), benchmark_width 2,	2117 HalfFloatPlane(reinterpret_cast<uint16>(orig_y), benchmark_width 2,

2118 reinterpret_cast<uint16>(dst_opt), benchmark_width 2,	2118 reinterpret_cast<uint16>(dst_opt), benchmark_width 2,

2119 scale, benchmark_width, benchmark_height);	2119 scale, benchmark_width, benchmark_height);

2120 }	2120 }

2121 opt_time = (get_time() - opt_time) / benchmark_iterations;	2121 opt_time = (get_time() - opt_time) / benchmark_iterations;

2122	2122

2123 int diff = 0;	2123 int max_diff = 0;

2124 for (i = 0; i < y_plane_size; ++i) {	2124 for (i = 0; i < y_plane_size / 2; ++i) {

2125 diff = dst_c[i] - dst_opt[i];	2125 int abs_diff =

2126 if (diff) break;	2126 abs(static_cast<int>(reinterpret_cast<uint16*>(dst_c)[i]) -

	2127 static_cast<int>(reinterpret_cast<uint16*>(dst_opt)[i]));

	2128 if (abs_diff > max_diff) {

	2129 max_diff = abs_diff;

	2130 }

2127 }	2131 }

2128	2132

2129 free_aligned_buffer_page_end(orig_y);	2133 free_aligned_buffer_page_end(orig_y);

2130 return diff;	2134 return max_diff;

2131 }	2135 }

2132	2136

	2137 #if defined(__arm__)

	2138 static void EnableFlushDenormalToZero(void) {

	2139 uint32_t cw;

	2140 __asm__ __volatile__ (

	2141 "vmrs %0, fpscr \n"

	2142 "orr %0, %0, #0x1000000 \n"

	2143 "vmsr fpscr, %0 \n"

	2144 : "=r"(cw) :: "memory");

	2145 }

	2146 #endif

	2147

2133 // 5 bit exponent with bias of 15 will underflow to a denormal if scale causes	2148 // 5 bit exponent with bias of 15 will underflow to a denormal if scale causes

2134 // exponent to be less than 0. 15 - log2(65536) = -1/ This shouldnt normally	2149 // exponent to be less than 0. 15 - log2(65536) = -1/ This shouldnt normally

2135 // happen since scale is 1/(1<<bits) where bits is 9, 10 or 12.	2150 // happen since scale is 1/(1<<bits) where bits is 9, 10 or 12.

2136 #define MAXHALFDIFF 0	2151

2137 TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_16bit_denormal) {	2152 TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_16bit_denormal) {

	2153 // 32 bit arm rounding on denormal case is off by 1 compared to C.

	2154 #if defined(__arm__)

	2155 EnableFlushDenormalToZero();

	2156 #endif

2138 int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_,	2157 int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_,

2139 benchmark_iterations_,	2158 benchmark_iterations_,

2140 disable_cpu_flags_, benchmark_cpu_info_,	2159 disable_cpu_flags_, benchmark_cpu_info_,

2141 1.0f / 65536.0f, 65535);	2160 1.0f / 65536.0f, 65535);

2142 EXPECT_LE(diff, MAXHALFDIFF);	2161 EXPECT_EQ(0, diff);

	2162 }

	2163

	2164 TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_16bit_One) {

	2165 int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_,

	2166 benchmark_iterations_,

	2167 disable_cpu_flags_, benchmark_cpu_info_,

	2168 1.0f, 65535);

	2169 EXPECT_LE(diff, 1);

	2170 }

	2171

	2172 TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_16bit_Opt) {

	2173 int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_,

	2174 benchmark_iterations_,

	2175 disable_cpu_flags_, benchmark_cpu_info_,

	2176 1.0f / 4096.0f, 65535);

	2177 EXPECT_EQ(0, diff);

2143 }	2178 }

2144	2179

2145 TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_10bit_Opt) {	2180 TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_10bit_Opt) {

2146 int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_,	2181 int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_,

2147 benchmark_iterations_,	2182 benchmark_iterations_,

2148 disable_cpu_flags_, benchmark_cpu_info_,	2183 disable_cpu_flags_, benchmark_cpu_info_,

2149 1.0f / 1024.0f, 1023);	2184 1.0f / 1024.0f, 1023);

2150 EXPECT_LE(diff, MAXHALFDIFF);	2185 EXPECT_EQ(0, diff);

2151 }	2186 }

2152	2187

2153 TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_9bit_Opt) {	2188 TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_9bit_Opt) {

2154 int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_,	2189 int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_,

2155 benchmark_iterations_,	2190 benchmark_iterations_,

2156 disable_cpu_flags_, benchmark_cpu_info_,	2191 disable_cpu_flags_, benchmark_cpu_info_,

2157 1.0f / 512.0f, 511);	2192 1.0f / 512.0f, 511);

2158 EXPECT_LE(diff, MAXHALFDIFF);	2193 EXPECT_EQ(0, diff);

2159 }	2194 }

2160	2195

2161 TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_Opt) {	2196 TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_Opt) {

2162 int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_,	2197 int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_,

2163 benchmark_iterations_,	2198 benchmark_iterations_,

2164 disable_cpu_flags_, benchmark_cpu_info_,	2199 disable_cpu_flags_, benchmark_cpu_info_,

2165 1.0f / 4096.0f, 4095);	2200 1.0f / 4096.0f, 4095);

2166 EXPECT_LE(diff, MAXHALFDIFF);	2201 EXPECT_EQ(0, diff);

2167 }

2168

2169 TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_One) {

2170 int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_,

2171 benchmark_iterations_,

2172 disable_cpu_flags_, benchmark_cpu_info_,

2173 1.0f, 4095);

2174 EXPECT_LE(diff, MAXHALFDIFF);

2175 }	2202 }

2176	2203

2177 TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_Offby1) {	2204 TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_Offby1) {

2178 int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_,	2205 int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_,

2179 benchmark_iterations_,	2206 benchmark_iterations_,

2180 disable_cpu_flags_, benchmark_cpu_info_,	2207 disable_cpu_flags_, benchmark_cpu_info_,

2181 1.0f / 4095.0f, 4095);	2208 1.0f / 4095.0f, 4095);

2182 EXPECT_LE(diff, MAXHALFDIFF);	2209 EXPECT_EQ(0, diff);

2183 }	2210 }

2184	2211

	2212 TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_One) {

	2213 int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_,

	2214 benchmark_iterations_,

	2215 disable_cpu_flags_, benchmark_cpu_info_,

	2216 1.0f, 2047);

	2217 EXPECT_EQ(0, diff);

	2218 }

	2219

	2220 TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_12bit_One) {

	2221 int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_,

	2222 benchmark_iterations_,

	2223 disable_cpu_flags_, benchmark_cpu_info_,

	2224 1.0f, 4095);

	2225 EXPECT_LE(diff, 1);

	2226 }

2185	2227

2186 TEST_F(LibYUVPlanarTest, TestARGBLumaColorTable) {	2228 TEST_F(LibYUVPlanarTest, TestARGBLumaColorTable) {

2187 SIMD_ALIGNED(uint8 orig_pixels[1280][4]);	2229 SIMD_ALIGNED(uint8 orig_pixels[1280][4]);

2188 SIMD_ALIGNED(uint8 dst_pixels_opt[1280][4]);	2230 SIMD_ALIGNED(uint8 dst_pixels_opt[1280][4]);

2189 SIMD_ALIGNED(uint8 dst_pixels_c[1280][4]);	2231 SIMD_ALIGNED(uint8 dst_pixels_c[1280][4]);

2190 memset(orig_pixels, 0, sizeof(orig_pixels));	2232 memset(orig_pixels, 0, sizeof(orig_pixels));

2191	2233

2192 align_buffer_page_end(lumacolortable, 32768);	2234 align_buffer_page_end(lumacolortable, 32768);

2193 int v = 0;	2235 int v = 0;

2194 for (int i = 0; i < 32768; ++i) {	2236 for (int i = 0; i < 32768; ++i) {

(...skipping 354 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
2549 }	2591 }

2550	2592

2551 free_aligned_buffer_page_end(src_pixels);	2593 free_aligned_buffer_page_end(src_pixels);

2552 free_aligned_buffer_page_end(tmp_pixels_u);	2594 free_aligned_buffer_page_end(tmp_pixels_u);

2553 free_aligned_buffer_page_end(tmp_pixels_v);	2595 free_aligned_buffer_page_end(tmp_pixels_v);

2554 free_aligned_buffer_page_end(dst_pixels_opt);	2596 free_aligned_buffer_page_end(dst_pixels_opt);

2555 free_aligned_buffer_page_end(dst_pixels_c);	2597 free_aligned_buffer_page_end(dst_pixels_c);

2556 }	2598 }

2557	2599

2558 } // namespace libyuv	2600 } // namespace libyuv

OLD	NEW

« no previous file with comments | « source/row_neon64.cc ('k') | no next file » | no next file with comments »