Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(174)

Side by Side Diff: unit_test/planar_test.cc

Issue 1505433002: AVX2 YUV alpha blender and improved unittests (Closed) Base URL: https://chromium.googlesource.com/libyuv/libyuv@master
Patch Set: off by 1 fix on win Created 5 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « source/row_win.cc ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 * Copyright 2011 The LibYuv Project Authors. All rights reserved. 2 * Copyright 2011 The LibYuv Project Authors. All rights reserved.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license 4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source 5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found 6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may 7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree. 8 * be found in the AUTHORS file in the root of the source tree.
9 */ 9 */
10 10
(...skipping 1145 matching lines...) Expand 10 before | Expand all | Expand 10 after
1156 EXPECT_LE(max_diff, 1); 1156 EXPECT_LE(max_diff, 1);
1157 } 1157 }
1158 1158
1159 TEST_F(LibYUVPlanarTest, ARGBBlend_Opt) { 1159 TEST_F(LibYUVPlanarTest, ARGBBlend_Opt) {
1160 int max_diff = TestBlend(benchmark_width_, benchmark_height_, 1160 int max_diff = TestBlend(benchmark_width_, benchmark_height_,
1161 benchmark_iterations_, 1161 benchmark_iterations_,
1162 disable_cpu_flags_, benchmark_cpu_info_, +1, 0); 1162 disable_cpu_flags_, benchmark_cpu_info_, +1, 0);
1163 EXPECT_LE(max_diff, 1); 1163 EXPECT_LE(max_diff, 1);
1164 } 1164 }
1165 1165
1166 #ifdef HAS_BLENDPLANEROW_SSSE3 1166 #ifdef HAS_BLENDPLANEROW_AVX2
1167 // TODO(fbarchard): Switch to I420Blend. 1167 // TODO(fbarchard): Switch to I420Blend.
1168 static void TestBlendPlane(int width, int height, int benchmark_iterations, 1168 static void TestBlendPlaneRow(int width, int height, int benchmark_iterations,
1169 int invert, int off) { 1169 int invert, int off) {
1170 int has_ssse3 = TestCpuFlag(kCpuHasSSSE3); 1170 int has_ssse3 = TestCpuFlag(kCpuHasSSSE3);
1171 int has_avx2 = TestCpuFlag(kCpuHasAVX2);
1171 width = width * height; 1172 width = width * height;
1172 height = 1; 1173 height = 1;
1173 if (width < 1) {
1174 width = 1;
1175 }
1176 if (width < 256) { 1174 if (width < 256) {
1177 width = 256; 1175 width = 256;
1178 } 1176 }
1179 const int kBpp = 1; 1177 const int kBpp = 1;
1180 const int kStride = width * kBpp; 1178 const int kStride = width * kBpp;
1181 align_buffer_64(src_argb_a, kStride * height + off); 1179 align_buffer_64(src_argb_a, kStride * height + off);
1182 align_buffer_64(src_argb_b, kStride * height + off); 1180 align_buffer_64(src_argb_b, kStride * height + off);
1183 align_buffer_64(src_argb_alpha, kStride * height + off); 1181 align_buffer_64(src_argb_alpha, kStride * height + off);
1184 align_buffer_64(dst_argb_c, kStride * height); 1182 align_buffer_64(dst_argb_c, kStride * height + off);
1185 align_buffer_64(dst_argb_opt, kStride * height); 1183 align_buffer_64(dst_argb_opt, kStride * height + off);
1184 memset(dst_argb_c, 255, kStride * height + off);
1185 memset(dst_argb_opt, 255, kStride * height + off);
1186 1186
1187 if (has_ssse3) { 1187 if (has_ssse3) {
1188 for (int i = 0; i < 255; ++i) { 1188 // Test source is maintained exactly if alpha is 255.
1189 src_argb_a[i] = i; 1189 for (int i = 0; i < 256; ++i) {
1190 src_argb_b[i] = 255 - i; 1190 src_argb_a[i + off] = i;
1191 src_argb_alpha[i] = 255; 1191 src_argb_b[i + off] = 255 - i;
1192 src_argb_alpha[i + off] = 255;
1192 } 1193 }
1193 memset(dst_argb_opt, 0xfb, kStride * height);
1194 BlendPlaneRow_SSSE3(src_argb_a + off, 1194 BlendPlaneRow_SSSE3(src_argb_a + off,
1195 src_argb_b + off, 1195 src_argb_b + off,
1196 src_argb_alpha + off, 1196 src_argb_alpha + off,
1197 dst_argb_opt, 1197 dst_argb_opt + off,
1198 width * height); 1198 256);
1199 for (int i = 0; i < kStride * height; ++i) { 1199 for (int i = 0; i < 256; ++i) {
1200 EXPECT_EQ(src_argb_a[i], dst_argb_opt[i]); 1200 EXPECT_EQ(src_argb_a[i + off], dst_argb_opt[i + off]);
1201 }
1202 // Test destination is maintained exactly if alpha is 0.
1203 for (int i = 0; i < 256; ++i) {
1204 src_argb_a[i + off] = i;
1205 src_argb_b[i + off] = 255 - i;
1206 src_argb_alpha[i + off] = 0;
1207 }
1208 BlendPlaneRow_SSSE3(src_argb_a + off,
1209 src_argb_b + off,
1210 src_argb_alpha + off,
1211 dst_argb_opt + off,
1212 256);
1213 for (int i = 0; i < 256; ++i) {
1214 EXPECT_EQ(src_argb_b[i + off], dst_argb_opt[i + off]);
1201 } 1215 }
1202 } 1216 }
1203 for (int i = 0; i < kStride * height; ++i) { 1217 for (int i = 0; i < kStride * height; ++i) {
1204 src_argb_a[i + off] = (fastrand() & 0xff); 1218 src_argb_a[i + off] = (fastrand() & 0xff);
1205 src_argb_b[i + off] = (fastrand() & 0xff); 1219 src_argb_b[i + off] = (fastrand() & 0xff);
1206 src_argb_alpha[i + off] = (fastrand() & 0xff); 1220 src_argb_alpha[i + off] = (fastrand() & 0xff);
1207 } 1221 }
1208 memset(dst_argb_c, 255, kStride * height);
1209 memset(dst_argb_opt, 255, kStride * height);
1210 1222
1211 BlendPlaneRow_C(src_argb_a + off, 1223 BlendPlaneRow_C(src_argb_a + off,
1212 src_argb_b + off, 1224 src_argb_b + off,
1213 src_argb_alpha + off, 1225 src_argb_alpha + off,
1214 dst_argb_c, 1226 dst_argb_c + off,
1215 width * height); 1227 width * height);
1216 for (int i = 0; i < benchmark_iterations; ++i) { 1228 for (int i = 0; i < benchmark_iterations; ++i) {
1217 if (has_ssse3) { 1229 if (has_avx2) {
1218 BlendPlaneRow_SSSE3(src_argb_a + off, 1230 BlendPlaneRow_AVX2(src_argb_a + off,
1219 src_argb_b + off, 1231 src_argb_b + off,
1220 src_argb_alpha + off, 1232 src_argb_alpha + off,
1221 dst_argb_opt, 1233 dst_argb_opt + off,
1222 width * height); 1234 width * height);
1223 } else { 1235 } else {
1224 BlendPlaneRow_C(src_argb_a + off, 1236 if (has_ssse3) {
1225 src_argb_b + off, 1237 BlendPlaneRow_SSSE3(src_argb_a + off,
1226 src_argb_alpha + off, 1238 src_argb_b + off,
1227 dst_argb_opt, 1239 src_argb_alpha + off,
1228 width * height); 1240 dst_argb_opt + off,
1241 width * height);
1242 } else {
1243 BlendPlaneRow_C(src_argb_a + off,
1244 src_argb_b + off,
1245 src_argb_alpha + off,
1246 dst_argb_opt + off,
1247 width * height);
1248 }
1229 } 1249 }
1230 } 1250 }
1231 for (int i = 0; i < kStride * height; ++i) { 1251 for (int i = 0; i < kStride * height; ++i) {
1232 EXPECT_EQ(dst_argb_c[i], dst_argb_opt[i]); 1252 EXPECT_EQ(dst_argb_c[i + off], dst_argb_opt[i + off]);
1233 } 1253 }
1234 free_aligned_buffer_64(src_argb_a); 1254 free_aligned_buffer_64(src_argb_a);
1235 free_aligned_buffer_64(src_argb_b); 1255 free_aligned_buffer_64(src_argb_b);
1256 free_aligned_buffer_64(src_argb_alpha);
1236 free_aligned_buffer_64(dst_argb_c); 1257 free_aligned_buffer_64(dst_argb_c);
1237 free_aligned_buffer_64(dst_argb_opt); 1258 free_aligned_buffer_64(dst_argb_opt);
1238 return; 1259 return;
1260 }
1261
1262 TEST_F(LibYUVPlanarTest, BlendPlaneRow_Opt) {
1263 TestBlendPlaneRow(benchmark_width_, benchmark_height_, benchmark_iterations_,
1264 +1, 0);
1265 }
1266 TEST_F(LibYUVPlanarTest, BlendPlaneRow_Unaligned) {
1267 TestBlendPlaneRow(benchmark_width_, benchmark_height_, benchmark_iterations_,
1268 +1, 1);
1269 }
1270 #endif
1271
1272 static void TestBlendPlane(int width, int height, int benchmark_iterations,
1273 int disable_cpu_flags, int benchmark_cpu_info,
1274 int invert, int off) {
1275 if (width < 1) {
1276 width = 1;
1277 }
1278 const int kBpp = 1;
1279 const int kStride = width * kBpp;
1280 align_buffer_64(src_argb_a, kStride * height + off);
1281 align_buffer_64(src_argb_b, kStride * height + off);
1282 align_buffer_64(src_argb_alpha, kStride * height + off);
1283 align_buffer_64(dst_argb_c, kStride * height + off);
1284 align_buffer_64(dst_argb_opt, kStride * height + off);
1285 memset(dst_argb_c, 255, kStride * height + off);
1286 memset(dst_argb_opt, 255, kStride * height + off);
1287
1288 // Test source is maintained exactly if alpha is 255.
1289 for (int i = 0; i < width; ++i) {
1290 src_argb_a[i + off] = i & 255;
1291 src_argb_b[i + off] = 255 - (i & 255);
1292 }
1293 memset(src_argb_alpha + off, 255, width);
1294 BlendPlane(src_argb_a + off, width,
1295 src_argb_b + off, width,
1296 src_argb_alpha + off, width,
1297 dst_argb_opt + off, width,
1298 width, 1);
1299 for (int i = 0; i < width; ++i) {
1300 EXPECT_EQ(src_argb_a[i + off], dst_argb_opt[i + off]);
1301 }
1302 // Test destination is maintained exactly if alpha is 0.
1303 memset(src_argb_alpha + off, 0, width);
1304 BlendPlane(src_argb_a + off, width,
1305 src_argb_b + off, width,
1306 src_argb_alpha + off, width,
1307 dst_argb_opt + off, width,
1308 width, 1);
1309 for (int i = 0; i < width; ++i) {
1310 EXPECT_EQ(src_argb_b[i + off], dst_argb_opt[i + off]);
1311 }
1312 for (int i = 0; i < kStride * height; ++i) {
1313 src_argb_a[i + off] = (fastrand() & 0xff);
1314 src_argb_b[i + off] = (fastrand() & 0xff);
1315 src_argb_alpha[i + off] = (fastrand() & 0xff);
1316 }
1317
1318 MaskCpuFlags(disable_cpu_flags);
1319 BlendPlane(src_argb_a + off, width,
1320 src_argb_b + off, width,
1321 src_argb_alpha + off, width,
1322 dst_argb_c + off, width,
1323 width, height);
1324 MaskCpuFlags(benchmark_cpu_info);
1325 for (int i = 0; i < benchmark_iterations; ++i) {
1326 BlendPlane(src_argb_a + off, width,
1327 src_argb_b + off, width,
1328 src_argb_alpha + off, width,
1329 dst_argb_opt + off, width,
1330 width, height);
1331 }
1332 for (int i = 0; i < kStride * height; ++i) {
1333 EXPECT_EQ(dst_argb_c[i + off], dst_argb_opt[i + off]);
1334 }
1335 free_aligned_buffer_64(src_argb_a);
1336 free_aligned_buffer_64(src_argb_b);
1337 free_aligned_buffer_64(src_argb_alpha);
1338 free_aligned_buffer_64(dst_argb_c);
1339 free_aligned_buffer_64(dst_argb_opt);
1340 return;
1239 } 1341 }
1240 1342
1241 TEST_F(LibYUVPlanarTest, BlendPlane_Opt) { 1343 TEST_F(LibYUVPlanarTest, BlendPlane_Opt) {
1242 TestBlendPlane(benchmark_width_, benchmark_height_, benchmark_iterations_, 1344 TestBlendPlane(benchmark_width_, benchmark_height_, benchmark_iterations_,
1243 +1, 0); 1345 disable_cpu_flags_, benchmark_cpu_info_, +1, 0);
1244 } 1346 }
1245 #endif 1347 TEST_F(LibYUVPlanarTest, BlendPlane_Unaligned) {
1348 TestBlendPlane(benchmark_width_, benchmark_height_, benchmark_iterations_,
1349 disable_cpu_flags_, benchmark_cpu_info_, +1, 1);
1350 }
1351
1352 #define SUBSAMPLE(v, a) ((((v) + (a) - 1)) / (a))
1353
1354 static void TestI420Blend(int width, int height, int benchmark_iterations,
1355 int disable_cpu_flags, int benchmark_cpu_info,
1356 int invert, int off) {
1357 width = ((width) > 0) ? (width) : 1;
1358 const int kStrideUV = SUBSAMPLE(width, 2);
1359 const int kSizeUV = kStrideUV * SUBSAMPLE(height, 2);
1360 align_buffer_64(src_y0, width * height + off);
1361 align_buffer_64(src_u0, kSizeUV + off);
1362 align_buffer_64(src_v0, kSizeUV + off);
1363 align_buffer_64(src_y1, width * height + off);
1364 align_buffer_64(src_u1, kSizeUV + off);
1365 align_buffer_64(src_v1, kSizeUV + off);
1366 align_buffer_64(src_a, width * height + off);
1367 align_buffer_64(dst_y_c, width * height + off);
1368 align_buffer_64(dst_u_c, kSizeUV + off);
1369 align_buffer_64(dst_v_c, kSizeUV + off);
1370 align_buffer_64(dst_y_opt, width * height + off);
1371 align_buffer_64(dst_u_opt, kSizeUV + off);
1372 align_buffer_64(dst_v_opt, kSizeUV + off);
1373
1374 MemRandomize(src_y0, width * height + off);
1375 MemRandomize(src_u0, kSizeUV + off);
1376 MemRandomize(src_v0, kSizeUV + off);
1377 MemRandomize(src_y1, width * height + off);
1378 MemRandomize(src_u1, kSizeUV + off);
1379 MemRandomize(src_v1, kSizeUV + off);
1380 MemRandomize(src_a, width * height + off);
1381 memset(dst_y_c, 255, width * height + off);
1382 memset(dst_u_c, 255, kSizeUV + off);
1383 memset(dst_v_c, 255, kSizeUV + off);
1384 memset(dst_y_opt, 255, width * height + off);
1385 memset(dst_u_opt, 255, kSizeUV + off);
1386 memset(dst_v_opt, 255, kSizeUV + off);
1387
1388 MaskCpuFlags(disable_cpu_flags);
1389 I420Blend(src_y0 + off, width,
1390 src_u0 + off, kStrideUV,
1391 src_v0 + off, kStrideUV,
1392 src_y1 + off, width,
1393 src_u1 + off, kStrideUV,
1394 src_v1 + off, kStrideUV,
1395 src_a + off, width,
1396 dst_y_c + off, width,
1397 dst_u_c + off, kStrideUV,
1398 dst_v_c + off, kStrideUV,
1399 width, height);
1400 MaskCpuFlags(benchmark_cpu_info);
1401 for (int i = 0; i < benchmark_iterations; ++i) {
1402 I420Blend(src_y0 + off, width,
1403 src_u0 + off, kStrideUV,
1404 src_v0 + off, kStrideUV,
1405 src_y1 + off, width,
1406 src_u1 + off, kStrideUV,
1407 src_v1 + off, kStrideUV,
1408 src_a + off, width,
1409 dst_y_opt + off, width,
1410 dst_u_opt + off, kStrideUV,
1411 dst_v_opt + off, kStrideUV,
1412 width, height);
1413 }
1414 for (int i = 0; i < width * height; ++i) {
1415 EXPECT_EQ(dst_y_c[i + off], dst_y_opt[i + off]);
1416 }
1417 for (int i = 0; i < kSizeUV; ++i) {
1418 EXPECT_NEAR(dst_u_c[i + off], dst_u_opt[i + off], 1); // Subsample off by 1
1419 EXPECT_NEAR(dst_v_c[i + off], dst_v_opt[i + off], 1);
1420 }
1421 free_aligned_buffer_64(src_y0);
1422 free_aligned_buffer_64(src_u0);
1423 free_aligned_buffer_64(src_v0);
1424 free_aligned_buffer_64(src_y1);
1425 free_aligned_buffer_64(src_u1);
1426 free_aligned_buffer_64(src_v1);
1427 free_aligned_buffer_64(src_a);
1428 free_aligned_buffer_64(dst_y_c);
1429 free_aligned_buffer_64(dst_u_c);
1430 free_aligned_buffer_64(dst_v_c);
1431 free_aligned_buffer_64(dst_y_opt);
1432 free_aligned_buffer_64(dst_u_opt);
1433 free_aligned_buffer_64(dst_v_opt);
1434 return;
1435 }
1436
1437 TEST_F(LibYUVPlanarTest, I420Blend_Opt) {
1438 TestI420Blend(benchmark_width_, benchmark_height_, benchmark_iterations_,
1439 disable_cpu_flags_, benchmark_cpu_info_, +1, 0);
1440 }
1441 TEST_F(LibYUVPlanarTest, I420Blend_Unaligned) {
1442 TestI420Blend(benchmark_width_, benchmark_height_, benchmark_iterations_,
1443 disable_cpu_flags_, benchmark_cpu_info_, +1, 1);
1444 }
1246 1445
1247 TEST_F(LibYUVPlanarTest, TestAffine) { 1446 TEST_F(LibYUVPlanarTest, TestAffine) {
1248 SIMD_ALIGNED(uint8 orig_pixels_0[1280][4]); 1447 SIMD_ALIGNED(uint8 orig_pixels_0[1280][4]);
1249 SIMD_ALIGNED(uint8 interpolate_pixels_C[1280][4]); 1448 SIMD_ALIGNED(uint8 interpolate_pixels_C[1280][4]);
1250 1449
1251 for (int i = 0; i < 1280; ++i) { 1450 for (int i = 0; i < 1280; ++i) {
1252 for (int j = 0; j < 4; ++j) { 1451 for (int j = 0; j < 4; ++j) {
1253 orig_pixels_0[i][j] = i; 1452 orig_pixels_0[i][j] = i;
1254 } 1453 }
1255 } 1454 }
(...skipping 1170 matching lines...) Expand 10 before | Expand all | Expand 10 after
2426 2625
2427 TEST_F(LibYUVPlanarTest, SetPlane_Opt) { 2626 TEST_F(LibYUVPlanarTest, SetPlane_Opt) {
2428 int max_diff = TestARGBRect(benchmark_width_, benchmark_height_, 2627 int max_diff = TestARGBRect(benchmark_width_, benchmark_height_,
2429 benchmark_iterations_, 2628 benchmark_iterations_,
2430 disable_cpu_flags_, benchmark_cpu_info_, 2629 disable_cpu_flags_, benchmark_cpu_info_,
2431 +1, 0, 1); 2630 +1, 0, 1);
2432 EXPECT_EQ(0, max_diff); 2631 EXPECT_EQ(0, max_diff);
2433 } 2632 }
2434 2633
2435 } // namespace libyuv 2634 } // namespace libyuv
OLDNEW
« no previous file with comments | « source/row_win.cc ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698