skia/ext/convolver_unittest.cc - Issue 178013006: Split Convolver.SIMDVerification into two tests

Side by Side Diff: skia/ext/convolver_unittest.cc

Issue 178013006: Split Convolver.SIMDVerification into two tests (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master

Patch Set: Created 6 years, 9 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.	1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 #include <string.h>	5 #include <string.h>

6 #include <time.h>	6 #include <time.h>

7 #include <algorithm>	7 #include <algorithm>

8 #include <numeric>	8 #include <numeric>

9 #include <vector>	9 #include <vector>

10	10

(...skipping 194 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
205 filter.AddFilter(66, factors6, arraysize(factors6));	205 filter.AddFilter(66, factors6, arraysize(factors6));

206 ASSERT_EQ(6, filter.max_filter());	206 ASSERT_EQ(6, filter.max_filter());

207 ASSERT_EQ(6, filter.num_values());	207 ASSERT_EQ(6, filter.num_values());

208	208

209 values = filter.FilterForValue(5, &filter_offset, &filter_length);	209 values = filter.FilterForValue(5, &filter_offset, &filter_length);

210 ASSERT_TRUE(values == NULL); // filter_length == 0 => values is NULL	210 ASSERT_TRUE(values == NULL); // filter_length == 0 => values is NULL

211 ASSERT_EQ(66, filter_offset); // value passed in	211 ASSERT_EQ(66, filter_offset); // value passed in

212 ASSERT_EQ(0, filter_length);	212 ASSERT_EQ(0, filter_length);

213 }	213 }

214	214

215 #if defined(THREAD_SANITIZER)	215 void VerifySIMD(unsigned int source_width,

216 // Times out under ThreadSanitizer, http://crbug.com/134400.	216 unsigned int source_height,

217 #define MAYBE_SIMDVerification DISABLED_SIMDVerification	217 unsigned int dest_width,

218 #else	218 unsigned int dest_height) {

219 #define MAYBE_SIMDVerification SIMDVerification

220 #endif

221 TEST(Convolver, MAYBE_SIMDVerification) {

222 int source_sizes[][2] = {

223 {1,1}, {1,2}, {1,3}, {1,4}, {1,5},

224 {2,1}, {2,2}, {2,3}, {2,4}, {2,5},

225 {3,1}, {3,2}, {3,3}, {3,4}, {3,5},

226 {4,1}, {4,2}, {4,3}, {4,4}, {4,5},

227 #ifdef NDEBUG

228 {1920, 1080},

229 {720, 480},

230 {1377, 523},

231 #endif

232 {325, 241}

233 };

234 #ifdef NDEBUG

235 int dest_sizes[][2] = { {1280, 1024}, {480, 270}, {177, 123} };

236 #else

237 int dest_sizes[][2] = { {128, 102}, {48, 27}, {17, 13} };

238 #endif

239 float filter[] = { 0.05f, -0.15f, 0.6f, 0.6f, -0.15f, 0.05f };	219 float filter[] = { 0.05f, -0.15f, 0.6f, 0.6f, -0.15f, 0.05f };

	220 // Preparing convolve coefficients.

	221 ConvolutionFilter1D x_filter, y_filter;

	222 for (unsigned int p = 0; p < dest_width; ++p) {

	223 unsigned int offset = source_width * p / dest_width;

	224 EXPECT_LT(offset, source_width);

	225 x_filter.AddFilter(offset, filter,

	226 std::min<int>(arraysize(filter),

	227 source_width - offset));

	228 }

	229 x_filter.PaddingForSIMD();

	230 for (unsigned int p = 0; p < dest_height; ++p) {

	231 unsigned int offset = source_height * p / dest_height;

	232 y_filter.AddFilter(offset, filter,

	233 std::min<int>(arraysize(filter),

	234 source_height - offset));

	235 }

	236 y_filter.PaddingForSIMD();

	237

	238 // Allocate input and output skia bitmap.

	239 SkBitmap source, result_c, result_sse;

	240 source.setConfig(SkBitmap::kARGB_8888_Config,

	241 source_width, source_height);

	242 source.allocPixels();

	243 result_c.setConfig(SkBitmap::kARGB_8888_Config,

	244 dest_width, dest_height);

	245 result_c.allocPixels();

	246 result_sse.setConfig(SkBitmap::kARGB_8888_Config,

	247 dest_width, dest_height);

	248 result_sse.allocPixels();

	249

	250 // Randomize source bitmap for testing.

	251 unsigned char* src_ptr = static_cast<unsigned char*>(source.getPixels());

	252 for (int y = 0; y < source.height(); y++) {

	253 for (unsigned int x = 0; x < source.rowBytes(); x++)

	254 src_ptr[x] = rand() % 255;

	255 src_ptr += source.rowBytes();

	256 }

	257

	258 // Test both cases with different has_alpha.

	259 for (int alpha = 0; alpha < 2; alpha++) {

	260 // Convolve using C code.

	261 base::TimeTicks resize_start;

	262 base::TimeDelta delta_c, delta_sse;

	263 unsigned char* r1 = static_cast<unsigned char*>(result_c.getPixels());

	264 unsigned char* r2 = static_cast<unsigned char*>(result_sse.getPixels());

	265

	266 resize_start = base::TimeTicks::Now();

	267 BGRAConvolve2D(static_cast<const uint8*>(source.getPixels()),

	268 static_cast<int>(source.rowBytes()),

	269 (alpha != 0), x_filter, y_filter,

	270 static_cast<int>(result_c.rowBytes()), r1, false);

	271 delta_c = base::TimeTicks::Now() - resize_start;

	272

	273 resize_start = base::TimeTicks::Now();

	274 // Convolve using SSE2 code

	275 BGRAConvolve2D(static_cast<const uint8*>(source.getPixels()),

	276 static_cast<int>(source.rowBytes()),

	277 (alpha != 0), x_filter, y_filter,

	278 static_cast<int>(result_sse.rowBytes()), r2, true);

	279 delta_sse = base::TimeTicks::Now() - resize_start;

	280

	281 // Unfortunately I could not enable the performance check now.

	282 // Most bots use debug version, and there are great difference between

	283 // the code generation for intrinsic, etc. In release version speed

	284 // difference was 150%-200% depend on alpha channel presence;

	285 // while in debug version speed difference was 96%-120%.

	286 // TODO(jiesun): optimize further until we could enable this for

	287 // debug version too.

	288 // EXPECT_LE(delta_sse, delta_c);

	289

	290 int64 c_us = delta_c.InMicroseconds();

	291 int64 sse_us = delta_sse.InMicroseconds();

	292 VLOG(1) << "from:" << source_width << "x" << source_height

	293 << " to:" << dest_width << "x" << dest_height

	294 << (alpha ? " with alpha" : " w/o alpha");

	295 VLOG(1) << "c:" << c_us << " sse:" << sse_us;

	296 VLOG(1) << "ratio:" << static_cast<float>(c_us) / sse_us;

	297

	298 // Comparing result.

	299 for (unsigned int i = 0; i < dest_height; i++) {

	300 EXPECT_FALSE(memcmp(r1, r2, dest_width * 4)); // RGBA always

	301 r1 += result_c.rowBytes();

	302 r2 += result_sse.rowBytes();

	303 }

	304 }

	305 }

	306

	307 TEST(Convolver, VerifySIMDEdgeCases) {

	308 srand(static_cast<unsigned int>(time(0)));

	309 // Loop over all possible (small) image sizes

	310 for (unsigned int width = 1; width < 20; width++) {

	311 for (unsigned int height = 1; height < 20; height++) {

	312 VerifySIMD(width, height, 8, 8);

	313 VerifySIMD(8, 8, width, height);

	314 }

	315 }

	316 }

	317

	318 // Verify that lage upscales/downscales produce the same result

	319 // with and without SIMD.

	320 TEST(Convolver, VerifySIMDPrecision) {

	321 int source_sizes[][2] = { {1920, 1080}, {1377, 523}, {325, 241} };

	322 int dest_sizes[][2] = { {1280, 1024}, {177, 123} };
	Stephen White 2014/02/25 21:13:59 I'm surprised these run in reasonable time in DEBU I'm surprised these run in reasonable time in DEBUG. The big image combinations were taking approximately 45 seconds before, and intermittently falling over the 45 second timeout, leading to flakiness. But perhaps that was only due to the small (edge cases) being resized to the large destination sizes, or because you've removed the 640x480 destination size. At any rate, please keep an eye on the runtimes on the bots when this lands, since they may have weaker hardware than what you're running locally. hubbe 2014/02/25 21:59:18 A good chunk of time was spent comparing the image Show quoted text On 2014/02/25 21:13:59, Stephen White wrote: > I'm surprised these run in reasonable time in DEBUG. The big image combinations > were taking approximately 45 seconds before, and intermittently falling over the > 45 second timeout, leading to flakiness. But perhaps that was only due to the > small (edge cases) being resized to the large destination sizes, or because > you've removed the 640x480 destination size. > > At any rate, please keep an eye on the runtimes on the bots when this lands, > since they may have weaker hardware than what you're running locally. A good chunk of time was spent comparing the images I think. With ASAN, on my machine, replacing the comparison with memcmp saved 50% of the time...
240	323

241 srand(static_cast<unsigned int>(time(0)));	324 srand(static_cast<unsigned int>(time(0)));

242	325

243 // Loop over some specific source and destination dimensions.	326 // Loop over some specific source and destination dimensions.

244 for (unsigned int i = 0; i < arraysize(source_sizes); ++i) {	327 for (unsigned int i = 0; i < arraysize(source_sizes); ++i) {

245 unsigned int source_width = source_sizes[i][0];	328 unsigned int source_width = source_sizes[i][0];

246 unsigned int source_height = source_sizes[i][1];	329 unsigned int source_height = source_sizes[i][1];

247 for (unsigned int j = 0; j < arraysize(dest_sizes); ++j) {	330 for (unsigned int j = 0; j < arraysize(dest_sizes); ++j) {

248 unsigned int dest_width = dest_sizes[j][0];	331 unsigned int dest_width = dest_sizes[j][0];

249 unsigned int dest_height = dest_sizes[j][1];	332 unsigned int dest_height = dest_sizes[j][1];

250	333 VerifySIMD(source_width, source_height, dest_width, dest_height);

251 // Preparing convolve coefficients.

252 ConvolutionFilter1D x_filter, y_filter;

253 for (unsigned int p = 0; p < dest_width; ++p) {

254 unsigned int offset = source_width * p / dest_width;

255 EXPECT_LT(offset, source_width);

256 x_filter.AddFilter(offset, filter,

257 std::min<int>(arraysize(filter),

258 source_width - offset));

259 }

260 x_filter.PaddingForSIMD();

261 for (unsigned int p = 0; p < dest_height; ++p) {

262 unsigned int offset = source_height * p / dest_height;

263 y_filter.AddFilter(offset, filter,

264 std::min<int>(arraysize(filter),

265 source_height - offset));

266 }

267 y_filter.PaddingForSIMD();

268

269 // Allocate input and output skia bitmap.

270 SkBitmap source, result_c, result_sse;

271 source.setConfig(SkBitmap::kARGB_8888_Config,

272 source_width, source_height);

273 source.allocPixels();

274 result_c.setConfig(SkBitmap::kARGB_8888_Config,

275 dest_width, dest_height);

276 result_c.allocPixels();

277 result_sse.setConfig(SkBitmap::kARGB_8888_Config,

278 dest_width, dest_height);

279 result_sse.allocPixels();

280

281 // Randomize source bitmap for testing.

282 unsigned char* src_ptr = static_cast<unsigned char*>(source.getPixels());

283 for (int y = 0; y < source.height(); y++) {

284 for (unsigned int x = 0; x < source.rowBytes(); x++)

285 src_ptr[x] = rand() % 255;

286 src_ptr += source.rowBytes();

287 }

288

289 // Test both cases with different has_alpha.

290 for (int alpha = 0; alpha < 2; alpha++) {

291 // Convolve using C code.

292 base::TimeTicks resize_start;

293 base::TimeDelta delta_c, delta_sse;

294 unsigned char* r1 = static_cast<unsigned char*>(result_c.getPixels());

295 unsigned char* r2 = static_cast<unsigned char*>(result_sse.getPixels());

296

297 resize_start = base::TimeTicks::Now();

298 BGRAConvolve2D(static_cast<const uint8*>(source.getPixels()),

299 static_cast<int>(source.rowBytes()),

300 (alpha != 0), x_filter, y_filter,

301 static_cast<int>(result_c.rowBytes()), r1, false);

302 delta_c = base::TimeTicks::Now() - resize_start;

303

304 resize_start = base::TimeTicks::Now();

305 // Convolve using SSE2 code

306 BGRAConvolve2D(static_cast<const uint8*>(source.getPixels()),

307 static_cast<int>(source.rowBytes()),

308 (alpha != 0), x_filter, y_filter,

309 static_cast<int>(result_sse.rowBytes()), r2, true);

310 delta_sse = base::TimeTicks::Now() - resize_start;

311

312 // Unfortunately I could not enable the performance check now.

313 // Most bots use debug version, and there are great difference between

314 // the code generation for intrinsic, etc. In release version speed

315 // difference was 150%-200% depend on alpha channel presence;

316 // while in debug version speed difference was 96%-120%.

317 // TODO(jiesun): optimize further until we could enable this for

318 // debug version too.

319 // EXPECT_LE(delta_sse, delta_c);

320

321 int64 c_us = delta_c.InMicroseconds();

322 int64 sse_us = delta_sse.InMicroseconds();

323 VLOG(1) << "from:" << source_width << "x" << source_height

324 << " to:" << dest_width << "x" << dest_height

325 << (alpha ? " with alpha" : " w/o alpha");

326 VLOG(1) << "c:" << c_us << " sse:" << sse_us;

327 VLOG(1) << "ratio:" << static_cast<float>(c_us) / sse_us;

328

329 // Comparing result.

330 for (unsigned int i = 0; i < dest_height; i++) {

331 for (unsigned int x = 0; x < dest_width * 4; x++) { // RGBA always.

332 EXPECT_EQ(r1[x], r2[x]);

333 }

334 r1 += result_c.rowBytes();

335 r2 += result_sse.rowBytes();

336 }

337 }

338 }	334 }

339 }	335 }

340 }	336 }

341	337

342 TEST(Convolver, SeparableSingleConvolution) {	338 TEST(Convolver, SeparableSingleConvolution) {

343 static const int kImgWidth = 1024;	339 static const int kImgWidth = 1024;

344 static const int kImgHeight = 1024;	340 static const int kImgHeight = 1024;

345 static const int kChannelCount = 3;	341 static const int kChannelCount = 3;

346 static const int kStrideSlack = 22;	342 static const int kStrideSlack = 22;

347 ConvolutionFilter1D filter;	343 ConvolutionFilter1D filter;

(...skipping 185 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
533 fp_gradient_kernel.end()), -1.5f);	529 fp_gradient_kernel.end()), -1.5f);

534 EXPECT_LT(*std::min_element(fp_gradient_kernel.begin(),	530 EXPECT_LT(*std::min_element(fp_gradient_kernel.begin(),

535 fp_gradient_kernel.end()), 0.0f);	531 fp_gradient_kernel.end()), 0.0f);

536 EXPECT_LT(*std::max_element(fp_gradient_kernel.begin(),	532 EXPECT_LT(*std::max_element(fp_gradient_kernel.begin(),

537 fp_gradient_kernel.end()), 1.5f);	533 fp_gradient_kernel.end()), 1.5f);

538 EXPECT_GT(*std::max_element(fp_gradient_kernel.begin(),	534 EXPECT_GT(*std::max_element(fp_gradient_kernel.begin(),

539 fp_gradient_kernel.end()), 0.0f);	535 fp_gradient_kernel.end()), 0.0f);

540 }	536 }

541	537

542 } // namespace skia	538 } // namespace skia

OLD	NEW

« no previous file with comments | « no previous file | no next file » | no next file with comments »