OLD | NEW |
1 /* | 1 /* |
2 * Copyright 2011 The LibYuv Project Authors. All rights reserved. | 2 * Copyright 2011 The LibYuv Project Authors. All rights reserved. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
9 */ | 9 */ |
10 | 10 |
(...skipping 1145 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1156 EXPECT_LE(max_diff, 1); | 1156 EXPECT_LE(max_diff, 1); |
1157 } | 1157 } |
1158 | 1158 |
1159 TEST_F(LibYUVPlanarTest, ARGBBlend_Opt) { | 1159 TEST_F(LibYUVPlanarTest, ARGBBlend_Opt) { |
1160 int max_diff = TestBlend(benchmark_width_, benchmark_height_, | 1160 int max_diff = TestBlend(benchmark_width_, benchmark_height_, |
1161 benchmark_iterations_, | 1161 benchmark_iterations_, |
1162 disable_cpu_flags_, benchmark_cpu_info_, +1, 0); | 1162 disable_cpu_flags_, benchmark_cpu_info_, +1, 0); |
1163 EXPECT_LE(max_diff, 1); | 1163 EXPECT_LE(max_diff, 1); |
1164 } | 1164 } |
1165 | 1165 |
1166 #ifdef HAS_BLENDPLANEROW_SSSE3 | 1166 #ifdef HAS_BLENDPLANEROW_AVX2 |
1167 // TODO(fbarchard): Switch to I420Blend. | 1167 // TODO(fbarchard): Switch to I420Blend. |
1168 static void TestBlendPlane(int width, int height, int benchmark_iterations, | 1168 static void TestBlendPlaneRow(int width, int height, int benchmark_iterations, |
1169 int invert, int off) { | 1169 int invert, int off) { |
1170 int has_ssse3 = TestCpuFlag(kCpuHasSSSE3); | 1170 int has_ssse3 = TestCpuFlag(kCpuHasSSSE3); |
| 1171 int has_avx2 = TestCpuFlag(kCpuHasAVX2); |
1171 width = width * height; | 1172 width = width * height; |
1172 height = 1; | 1173 height = 1; |
1173 if (width < 1) { | |
1174 width = 1; | |
1175 } | |
1176 if (width < 256) { | 1174 if (width < 256) { |
1177 width = 256; | 1175 width = 256; |
1178 } | 1176 } |
1179 const int kBpp = 1; | 1177 const int kBpp = 1; |
1180 const int kStride = width * kBpp; | 1178 const int kStride = width * kBpp; |
1181 align_buffer_64(src_argb_a, kStride * height + off); | 1179 align_buffer_64(src_argb_a, kStride * height + off); |
1182 align_buffer_64(src_argb_b, kStride * height + off); | 1180 align_buffer_64(src_argb_b, kStride * height + off); |
1183 align_buffer_64(src_argb_alpha, kStride * height + off); | 1181 align_buffer_64(src_argb_alpha, kStride * height + off); |
1184 align_buffer_64(dst_argb_c, kStride * height); | 1182 align_buffer_64(dst_argb_c, kStride * height + off); |
1185 align_buffer_64(dst_argb_opt, kStride * height); | 1183 align_buffer_64(dst_argb_opt, kStride * height + off); |
| 1184 memset(dst_argb_c, 255, kStride * height + off); |
| 1185 memset(dst_argb_opt, 255, kStride * height + off); |
1186 | 1186 |
1187 if (has_ssse3) { | 1187 if (has_ssse3) { |
1188 for (int i = 0; i < 255; ++i) { | 1188 // Test source is maintained exactly if alpha is 255. |
1189 src_argb_a[i] = i; | 1189 for (int i = 0; i < 256; ++i) { |
1190 src_argb_b[i] = 255 - i; | 1190 src_argb_a[i + off] = i; |
1191 src_argb_alpha[i] = 255; | 1191 src_argb_b[i + off] = 255 - i; |
| 1192 src_argb_alpha[i + off] = 255; |
1192 } | 1193 } |
1193 memset(dst_argb_opt, 0xfb, kStride * height); | |
1194 BlendPlaneRow_SSSE3(src_argb_a + off, | 1194 BlendPlaneRow_SSSE3(src_argb_a + off, |
1195 src_argb_b + off, | 1195 src_argb_b + off, |
1196 src_argb_alpha + off, | 1196 src_argb_alpha + off, |
1197 dst_argb_opt, | 1197 dst_argb_opt + off, |
1198 width * height); | 1198 256); |
1199 for (int i = 0; i < kStride * height; ++i) { | 1199 for (int i = 0; i < 256; ++i) { |
1200 EXPECT_EQ(src_argb_a[i], dst_argb_opt[i]); | 1200 EXPECT_EQ(src_argb_a[i + off], dst_argb_opt[i + off]); |
| 1201 } |
| 1202 // Test destination is maintained exactly if alpha is 0. |
| 1203 for (int i = 0; i < 256; ++i) { |
| 1204 src_argb_a[i + off] = i; |
| 1205 src_argb_b[i + off] = 255 - i; |
| 1206 src_argb_alpha[i + off] = 0; |
| 1207 } |
| 1208 BlendPlaneRow_SSSE3(src_argb_a + off, |
| 1209 src_argb_b + off, |
| 1210 src_argb_alpha + off, |
| 1211 dst_argb_opt + off, |
| 1212 256); |
| 1213 for (int i = 0; i < 256; ++i) { |
| 1214 EXPECT_EQ(src_argb_b[i + off], dst_argb_opt[i + off]); |
1201 } | 1215 } |
1202 } | 1216 } |
1203 for (int i = 0; i < kStride * height; ++i) { | 1217 for (int i = 0; i < kStride * height; ++i) { |
1204 src_argb_a[i + off] = (fastrand() & 0xff); | 1218 src_argb_a[i + off] = (fastrand() & 0xff); |
1205 src_argb_b[i + off] = (fastrand() & 0xff); | 1219 src_argb_b[i + off] = (fastrand() & 0xff); |
1206 src_argb_alpha[i + off] = (fastrand() & 0xff); | 1220 src_argb_alpha[i + off] = (fastrand() & 0xff); |
1207 } | 1221 } |
1208 memset(dst_argb_c, 255, kStride * height); | |
1209 memset(dst_argb_opt, 255, kStride * height); | |
1210 | 1222 |
1211 BlendPlaneRow_C(src_argb_a + off, | 1223 BlendPlaneRow_C(src_argb_a + off, |
1212 src_argb_b + off, | 1224 src_argb_b + off, |
1213 src_argb_alpha + off, | 1225 src_argb_alpha + off, |
1214 dst_argb_c, | 1226 dst_argb_c + off, |
1215 width * height); | 1227 width * height); |
1216 for (int i = 0; i < benchmark_iterations; ++i) { | 1228 for (int i = 0; i < benchmark_iterations; ++i) { |
1217 if (has_ssse3) { | 1229 if (has_avx2) { |
1218 BlendPlaneRow_SSSE3(src_argb_a + off, | 1230 BlendPlaneRow_AVX2(src_argb_a + off, |
1219 src_argb_b + off, | 1231 src_argb_b + off, |
1220 src_argb_alpha + off, | 1232 src_argb_alpha + off, |
1221 dst_argb_opt, | 1233 dst_argb_opt + off, |
1222 width * height); | 1234 width * height); |
1223 } else { | 1235 } else { |
1224 BlendPlaneRow_C(src_argb_a + off, | 1236 if (has_ssse3) { |
1225 src_argb_b + off, | 1237 BlendPlaneRow_SSSE3(src_argb_a + off, |
1226 src_argb_alpha + off, | 1238 src_argb_b + off, |
1227 dst_argb_opt, | 1239 src_argb_alpha + off, |
1228 width * height); | 1240 dst_argb_opt + off, |
| 1241 width * height); |
| 1242 } else { |
| 1243 BlendPlaneRow_C(src_argb_a + off, |
| 1244 src_argb_b + off, |
| 1245 src_argb_alpha + off, |
| 1246 dst_argb_opt + off, |
| 1247 width * height); |
| 1248 } |
1229 } | 1249 } |
1230 } | 1250 } |
1231 for (int i = 0; i < kStride * height; ++i) { | 1251 for (int i = 0; i < kStride * height; ++i) { |
1232 EXPECT_EQ(dst_argb_c[i], dst_argb_opt[i]); | 1252 EXPECT_EQ(dst_argb_c[i + off], dst_argb_opt[i + off]); |
1233 } | 1253 } |
1234 free_aligned_buffer_64(src_argb_a); | 1254 free_aligned_buffer_64(src_argb_a); |
1235 free_aligned_buffer_64(src_argb_b); | 1255 free_aligned_buffer_64(src_argb_b); |
| 1256 free_aligned_buffer_64(src_argb_alpha); |
1236 free_aligned_buffer_64(dst_argb_c); | 1257 free_aligned_buffer_64(dst_argb_c); |
1237 free_aligned_buffer_64(dst_argb_opt); | 1258 free_aligned_buffer_64(dst_argb_opt); |
1238 return; | 1259 return; |
| 1260 } |
| 1261 |
| 1262 TEST_F(LibYUVPlanarTest, BlendPlaneRow_Opt) { |
| 1263 TestBlendPlaneRow(benchmark_width_, benchmark_height_, benchmark_iterations_, |
| 1264 +1, 0); |
| 1265 } |
| 1266 TEST_F(LibYUVPlanarTest, BlendPlaneRow_Unaligned) { |
| 1267 TestBlendPlaneRow(benchmark_width_, benchmark_height_, benchmark_iterations_, |
| 1268 +1, 1); |
| 1269 } |
| 1270 #endif |
| 1271 |
| 1272 static void TestBlendPlane(int width, int height, int benchmark_iterations, |
| 1273 int disable_cpu_flags, int benchmark_cpu_info, |
| 1274 int invert, int off) { |
| 1275 if (width < 1) { |
| 1276 width = 1; |
| 1277 } |
| 1278 const int kBpp = 1; |
| 1279 const int kStride = width * kBpp; |
| 1280 align_buffer_64(src_argb_a, kStride * height + off); |
| 1281 align_buffer_64(src_argb_b, kStride * height + off); |
| 1282 align_buffer_64(src_argb_alpha, kStride * height + off); |
| 1283 align_buffer_64(dst_argb_c, kStride * height + off); |
| 1284 align_buffer_64(dst_argb_opt, kStride * height + off); |
| 1285 memset(dst_argb_c, 255, kStride * height + off); |
| 1286 memset(dst_argb_opt, 255, kStride * height + off); |
| 1287 |
| 1288 // Test source is maintained exactly if alpha is 255. |
| 1289 for (int i = 0; i < width; ++i) { |
| 1290 src_argb_a[i + off] = i & 255; |
| 1291 src_argb_b[i + off] = 255 - (i & 255); |
| 1292 } |
| 1293 memset(src_argb_alpha + off, 255, width); |
| 1294 BlendPlane(src_argb_a + off, width, |
| 1295 src_argb_b + off, width, |
| 1296 src_argb_alpha + off, width, |
| 1297 dst_argb_opt + off, width, |
| 1298 width, 1); |
| 1299 for (int i = 0; i < width; ++i) { |
| 1300 EXPECT_EQ(src_argb_a[i + off], dst_argb_opt[i + off]); |
| 1301 } |
| 1302 // Test destination is maintained exactly if alpha is 0. |
| 1303 memset(src_argb_alpha + off, 0, width); |
| 1304 BlendPlane(src_argb_a + off, width, |
| 1305 src_argb_b + off, width, |
| 1306 src_argb_alpha + off, width, |
| 1307 dst_argb_opt + off, width, |
| 1308 width, 1); |
| 1309 for (int i = 0; i < width; ++i) { |
| 1310 EXPECT_EQ(src_argb_b[i + off], dst_argb_opt[i + off]); |
| 1311 } |
| 1312 for (int i = 0; i < kStride * height; ++i) { |
| 1313 src_argb_a[i + off] = (fastrand() & 0xff); |
| 1314 src_argb_b[i + off] = (fastrand() & 0xff); |
| 1315 src_argb_alpha[i + off] = (fastrand() & 0xff); |
| 1316 } |
| 1317 |
| 1318 MaskCpuFlags(disable_cpu_flags); |
| 1319 BlendPlane(src_argb_a + off, width, |
| 1320 src_argb_b + off, width, |
| 1321 src_argb_alpha + off, width, |
| 1322 dst_argb_c + off, width, |
| 1323 width, height); |
| 1324 MaskCpuFlags(benchmark_cpu_info); |
| 1325 for (int i = 0; i < benchmark_iterations; ++i) { |
| 1326 BlendPlane(src_argb_a + off, width, |
| 1327 src_argb_b + off, width, |
| 1328 src_argb_alpha + off, width, |
| 1329 dst_argb_opt + off, width, |
| 1330 width, height); |
| 1331 } |
| 1332 for (int i = 0; i < kStride * height; ++i) { |
| 1333 EXPECT_EQ(dst_argb_c[i + off], dst_argb_opt[i + off]); |
| 1334 } |
| 1335 free_aligned_buffer_64(src_argb_a); |
| 1336 free_aligned_buffer_64(src_argb_b); |
| 1337 free_aligned_buffer_64(src_argb_alpha); |
| 1338 free_aligned_buffer_64(dst_argb_c); |
| 1339 free_aligned_buffer_64(dst_argb_opt); |
| 1340 return; |
1239 } | 1341 } |
1240 | 1342 |
1241 TEST_F(LibYUVPlanarTest, BlendPlane_Opt) { | 1343 TEST_F(LibYUVPlanarTest, BlendPlane_Opt) { |
1242 TestBlendPlane(benchmark_width_, benchmark_height_, benchmark_iterations_, | 1344 TestBlendPlane(benchmark_width_, benchmark_height_, benchmark_iterations_, |
1243 +1, 0); | 1345 disable_cpu_flags_, benchmark_cpu_info_, +1, 0); |
1244 } | 1346 } |
1245 #endif | 1347 TEST_F(LibYUVPlanarTest, BlendPlane_Unaligned) { |
| 1348 TestBlendPlane(benchmark_width_, benchmark_height_, benchmark_iterations_, |
| 1349 disable_cpu_flags_, benchmark_cpu_info_, +1, 1); |
| 1350 } |
| 1351 |
| 1352 #define SUBSAMPLE(v, a) ((((v) + (a) - 1)) / (a)) |
| 1353 |
| 1354 static void TestI420Blend(int width, int height, int benchmark_iterations, |
| 1355 int disable_cpu_flags, int benchmark_cpu_info, |
| 1356 int invert, int off) { |
| 1357 width = ((width) > 0) ? (width) : 1; |
| 1358 const int kStrideUV = SUBSAMPLE(width, 2); |
| 1359 const int kSizeUV = kStrideUV * SUBSAMPLE(height, 2); |
| 1360 align_buffer_64(src_y0, width * height + off); |
| 1361 align_buffer_64(src_u0, kSizeUV + off); |
| 1362 align_buffer_64(src_v0, kSizeUV + off); |
| 1363 align_buffer_64(src_y1, width * height + off); |
| 1364 align_buffer_64(src_u1, kSizeUV + off); |
| 1365 align_buffer_64(src_v1, kSizeUV + off); |
| 1366 align_buffer_64(src_a, width * height + off); |
| 1367 align_buffer_64(dst_y_c, width * height + off); |
| 1368 align_buffer_64(dst_u_c, kSizeUV + off); |
| 1369 align_buffer_64(dst_v_c, kSizeUV + off); |
| 1370 align_buffer_64(dst_y_opt, width * height + off); |
| 1371 align_buffer_64(dst_u_opt, kSizeUV + off); |
| 1372 align_buffer_64(dst_v_opt, kSizeUV + off); |
| 1373 |
| 1374 MemRandomize(src_y0, width * height + off); |
| 1375 MemRandomize(src_u0, kSizeUV + off); |
| 1376 MemRandomize(src_v0, kSizeUV + off); |
| 1377 MemRandomize(src_y1, width * height + off); |
| 1378 MemRandomize(src_u1, kSizeUV + off); |
| 1379 MemRandomize(src_v1, kSizeUV + off); |
| 1380 MemRandomize(src_a, width * height + off); |
| 1381 memset(dst_y_c, 255, width * height + off); |
| 1382 memset(dst_u_c, 255, kSizeUV + off); |
| 1383 memset(dst_v_c, 255, kSizeUV + off); |
| 1384 memset(dst_y_opt, 255, width * height + off); |
| 1385 memset(dst_u_opt, 255, kSizeUV + off); |
| 1386 memset(dst_v_opt, 255, kSizeUV + off); |
| 1387 |
| 1388 MaskCpuFlags(disable_cpu_flags); |
| 1389 I420Blend(src_y0 + off, width, |
| 1390 src_u0 + off, kStrideUV, |
| 1391 src_v0 + off, kStrideUV, |
| 1392 src_y1 + off, width, |
| 1393 src_u1 + off, kStrideUV, |
| 1394 src_v1 + off, kStrideUV, |
| 1395 src_a + off, width, |
| 1396 dst_y_c + off, width, |
| 1397 dst_u_c + off, kStrideUV, |
| 1398 dst_v_c + off, kStrideUV, |
| 1399 width, height); |
| 1400 MaskCpuFlags(benchmark_cpu_info); |
| 1401 for (int i = 0; i < benchmark_iterations; ++i) { |
| 1402 I420Blend(src_y0 + off, width, |
| 1403 src_u0 + off, kStrideUV, |
| 1404 src_v0 + off, kStrideUV, |
| 1405 src_y1 + off, width, |
| 1406 src_u1 + off, kStrideUV, |
| 1407 src_v1 + off, kStrideUV, |
| 1408 src_a + off, width, |
| 1409 dst_y_opt + off, width, |
| 1410 dst_u_opt + off, kStrideUV, |
| 1411 dst_v_opt + off, kStrideUV, |
| 1412 width, height); |
| 1413 } |
| 1414 for (int i = 0; i < width * height; ++i) { |
| 1415 EXPECT_EQ(dst_y_c[i + off], dst_y_opt[i + off]); |
| 1416 } |
| 1417 for (int i = 0; i < kSizeUV; ++i) { |
| 1418 EXPECT_NEAR(dst_u_c[i + off], dst_u_opt[i + off], 1); // Subsample off by 1 |
| 1419 EXPECT_NEAR(dst_v_c[i + off], dst_v_opt[i + off], 1); |
| 1420 } |
| 1421 free_aligned_buffer_64(src_y0); |
| 1422 free_aligned_buffer_64(src_u0); |
| 1423 free_aligned_buffer_64(src_v0); |
| 1424 free_aligned_buffer_64(src_y1); |
| 1425 free_aligned_buffer_64(src_u1); |
| 1426 free_aligned_buffer_64(src_v1); |
| 1427 free_aligned_buffer_64(src_a); |
| 1428 free_aligned_buffer_64(dst_y_c); |
| 1429 free_aligned_buffer_64(dst_u_c); |
| 1430 free_aligned_buffer_64(dst_v_c); |
| 1431 free_aligned_buffer_64(dst_y_opt); |
| 1432 free_aligned_buffer_64(dst_u_opt); |
| 1433 free_aligned_buffer_64(dst_v_opt); |
| 1434 return; |
| 1435 } |
| 1436 |
| 1437 TEST_F(LibYUVPlanarTest, I420Blend_Opt) { |
| 1438 TestI420Blend(benchmark_width_, benchmark_height_, benchmark_iterations_, |
| 1439 disable_cpu_flags_, benchmark_cpu_info_, +1, 0); |
| 1440 } |
| 1441 TEST_F(LibYUVPlanarTest, I420Blend_Unaligned) { |
| 1442 TestI420Blend(benchmark_width_, benchmark_height_, benchmark_iterations_, |
| 1443 disable_cpu_flags_, benchmark_cpu_info_, +1, 1); |
| 1444 } |
1246 | 1445 |
1247 TEST_F(LibYUVPlanarTest, TestAffine) { | 1446 TEST_F(LibYUVPlanarTest, TestAffine) { |
1248 SIMD_ALIGNED(uint8 orig_pixels_0[1280][4]); | 1447 SIMD_ALIGNED(uint8 orig_pixels_0[1280][4]); |
1249 SIMD_ALIGNED(uint8 interpolate_pixels_C[1280][4]); | 1448 SIMD_ALIGNED(uint8 interpolate_pixels_C[1280][4]); |
1250 | 1449 |
1251 for (int i = 0; i < 1280; ++i) { | 1450 for (int i = 0; i < 1280; ++i) { |
1252 for (int j = 0; j < 4; ++j) { | 1451 for (int j = 0; j < 4; ++j) { |
1253 orig_pixels_0[i][j] = i; | 1452 orig_pixels_0[i][j] = i; |
1254 } | 1453 } |
1255 } | 1454 } |
(...skipping 1170 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2426 | 2625 |
2427 TEST_F(LibYUVPlanarTest, SetPlane_Opt) { | 2626 TEST_F(LibYUVPlanarTest, SetPlane_Opt) { |
2428 int max_diff = TestARGBRect(benchmark_width_, benchmark_height_, | 2627 int max_diff = TestARGBRect(benchmark_width_, benchmark_height_, |
2429 benchmark_iterations_, | 2628 benchmark_iterations_, |
2430 disable_cpu_flags_, benchmark_cpu_info_, | 2629 disable_cpu_flags_, benchmark_cpu_info_, |
2431 +1, 0, 1); | 2630 +1, 0, 1); |
2432 EXPECT_EQ(0, max_diff); | 2631 EXPECT_EQ(0, max_diff); |
2433 } | 2632 } |
2434 | 2633 |
2435 } // namespace libyuv | 2634 } // namespace libyuv |
OLD | NEW |