| OLD | NEW |
| 1 /* | 1 /* |
| 2 * Copyright 2011 The LibYuv Project Authors. All rights reserved. | 2 * Copyright 2011 The LibYuv Project Authors. All rights reserved. |
| 3 * | 3 * |
| 4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
| 5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
| 6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
| 7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
| 8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
| 9 */ | 9 */ |
| 10 | 10 |
| (...skipping 87 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 98 EXPECT_NEAR(85, atten_pixels[255 * 4 + 2], 1); | 98 EXPECT_NEAR(85, atten_pixels[255 * 4 + 2], 1); |
| 99 EXPECT_EQ(255, atten_pixels[255 * 4 + 3]); | 99 EXPECT_EQ(255, atten_pixels[255 * 4 + 3]); |
| 100 | 100 |
| 101 free_aligned_buffer_64(atten2_pixels); | 101 free_aligned_buffer_64(atten2_pixels); |
| 102 free_aligned_buffer_64(unatten_pixels); | 102 free_aligned_buffer_64(unatten_pixels); |
| 103 free_aligned_buffer_64(atten_pixels); | 103 free_aligned_buffer_64(atten_pixels); |
| 104 free_aligned_buffer_64(orig_pixels); | 104 free_aligned_buffer_64(orig_pixels); |
| 105 } | 105 } |
| 106 | 106 |
| 107 static int TestAttenuateI(int width, int height, int benchmark_iterations, | 107 static int TestAttenuateI(int width, int height, int benchmark_iterations, |
| 108 int disable_cpu_flags, int invert, int off) { | 108 int disable_cpu_flags, int benchmark_cpu_info, |
| 109 int invert, int off) { |
| 109 if (width < 1) { | 110 if (width < 1) { |
| 110 width = 1; | 111 width = 1; |
| 111 } | 112 } |
| 112 const int kBpp = 4; | 113 const int kBpp = 4; |
| 113 const int kStride = width * kBpp; | 114 const int kStride = width * kBpp; |
| 114 align_buffer_64(src_argb, kStride * height + off); | 115 align_buffer_64(src_argb, kStride * height + off); |
| 115 align_buffer_64(dst_argb_c, kStride * height); | 116 align_buffer_64(dst_argb_c, kStride * height); |
| 116 align_buffer_64(dst_argb_opt, kStride * height); | 117 align_buffer_64(dst_argb_opt, kStride * height); |
| 117 for (int i = 0; i < kStride * height; ++i) { | 118 for (int i = 0; i < kStride * height; ++i) { |
| 118 src_argb[i + off] = (fastrand() & 0xff); | 119 src_argb[i + off] = (fastrand() & 0xff); |
| 119 } | 120 } |
| 120 memset(dst_argb_c, 0, kStride * height); | 121 memset(dst_argb_c, 0, kStride * height); |
| 121 memset(dst_argb_opt, 0, kStride * height); | 122 memset(dst_argb_opt, 0, kStride * height); |
| 122 | 123 |
| 123 MaskCpuFlags(disable_cpu_flags); | 124 MaskCpuFlags(disable_cpu_flags); |
| 124 ARGBAttenuate(src_argb + off, kStride, | 125 ARGBAttenuate(src_argb + off, kStride, |
| 125 dst_argb_c, kStride, | 126 dst_argb_c, kStride, |
| 126 width, invert * height); | 127 width, invert * height); |
| 127 MaskCpuFlags(-1); | 128 MaskCpuFlags(benchmark_cpu_info); |
| 128 for (int i = 0; i < benchmark_iterations; ++i) { | 129 for (int i = 0; i < benchmark_iterations; ++i) { |
| 129 ARGBAttenuate(src_argb + off, kStride, | 130 ARGBAttenuate(src_argb + off, kStride, |
| 130 dst_argb_opt, kStride, | 131 dst_argb_opt, kStride, |
| 131 width, invert * height); | 132 width, invert * height); |
| 132 } | 133 } |
| 133 int max_diff = 0; | 134 int max_diff = 0; |
| 134 for (int i = 0; i < kStride * height; ++i) { | 135 for (int i = 0; i < kStride * height; ++i) { |
| 135 int abs_diff = | 136 int abs_diff = |
| 136 abs(static_cast<int>(dst_argb_c[i]) - | 137 abs(static_cast<int>(dst_argb_c[i]) - |
| 137 static_cast<int>(dst_argb_opt[i])); | 138 static_cast<int>(dst_argb_opt[i])); |
| 138 if (abs_diff > max_diff) { | 139 if (abs_diff > max_diff) { |
| 139 max_diff = abs_diff; | 140 max_diff = abs_diff; |
| 140 } | 141 } |
| 141 } | 142 } |
| 142 free_aligned_buffer_64(src_argb); | 143 free_aligned_buffer_64(src_argb); |
| 143 free_aligned_buffer_64(dst_argb_c); | 144 free_aligned_buffer_64(dst_argb_c); |
| 144 free_aligned_buffer_64(dst_argb_opt); | 145 free_aligned_buffer_64(dst_argb_opt); |
| 145 return max_diff; | 146 return max_diff; |
| 146 } | 147 } |
| 147 | 148 |
| 148 TEST_F(LibYUVPlanarTest, ARGBAttenuate_Any) { | 149 TEST_F(LibYUVPlanarTest, ARGBAttenuate_Any) { |
| 149 int max_diff = TestAttenuateI(benchmark_width_ - 1, benchmark_height_, | 150 int max_diff = TestAttenuateI(benchmark_width_ - 1, benchmark_height_, |
| 150 benchmark_iterations_, disable_cpu_flags_, | 151 benchmark_iterations_, |
| 152 disable_cpu_flags_, benchmark_cpu_info_, |
| 151 +1, 0); | 153 +1, 0); |
| 152 EXPECT_LE(max_diff, 2); | 154 EXPECT_LE(max_diff, 2); |
| 153 } | 155 } |
| 154 | 156 |
| 155 TEST_F(LibYUVPlanarTest, ARGBAttenuate_Unaligned) { | 157 TEST_F(LibYUVPlanarTest, ARGBAttenuate_Unaligned) { |
| 156 int max_diff = TestAttenuateI(benchmark_width_, benchmark_height_, | 158 int max_diff = TestAttenuateI(benchmark_width_, benchmark_height_, |
| 157 benchmark_iterations_, disable_cpu_flags_, | 159 benchmark_iterations_, |
| 160 disable_cpu_flags_, benchmark_cpu_info_, |
| 158 +1, 1); | 161 +1, 1); |
| 159 EXPECT_LE(max_diff, 2); | 162 EXPECT_LE(max_diff, 2); |
| 160 } | 163 } |
| 161 | 164 |
| 162 TEST_F(LibYUVPlanarTest, ARGBAttenuate_Invert) { | 165 TEST_F(LibYUVPlanarTest, ARGBAttenuate_Invert) { |
| 163 int max_diff = TestAttenuateI(benchmark_width_, benchmark_height_, | 166 int max_diff = TestAttenuateI(benchmark_width_, benchmark_height_, |
| 164 benchmark_iterations_, disable_cpu_flags_, | 167 benchmark_iterations_, |
| 168 disable_cpu_flags_, benchmark_cpu_info_, |
| 165 -1, 0); | 169 -1, 0); |
| 166 EXPECT_LE(max_diff, 2); | 170 EXPECT_LE(max_diff, 2); |
| 167 } | 171 } |
| 168 | 172 |
| 169 TEST_F(LibYUVPlanarTest, ARGBAttenuate_Opt) { | 173 TEST_F(LibYUVPlanarTest, ARGBAttenuate_Opt) { |
| 170 int max_diff = TestAttenuateI(benchmark_width_, benchmark_height_, | 174 int max_diff = TestAttenuateI(benchmark_width_, benchmark_height_, |
| 171 benchmark_iterations_, disable_cpu_flags_, | 175 benchmark_iterations_, |
| 176 disable_cpu_flags_, benchmark_cpu_info_, |
| 172 +1, 0); | 177 +1, 0); |
| 173 EXPECT_LE(max_diff, 2); | 178 EXPECT_LE(max_diff, 2); |
| 174 } | 179 } |
| 175 | 180 |
| 176 static int TestUnattenuateI(int width, int height, int benchmark_iterations, | 181 static int TestUnattenuateI(int width, int height, int benchmark_iterations, |
| 177 int disable_cpu_flags, int invert, int off) { | 182 int disable_cpu_flags, int benchmark_cpu_info, |
| 183 int invert, int off) { |
| 178 if (width < 1) { | 184 if (width < 1) { |
| 179 width = 1; | 185 width = 1; |
| 180 } | 186 } |
| 181 const int kBpp = 4; | 187 const int kBpp = 4; |
| 182 const int kStride = width * kBpp; | 188 const int kStride = width * kBpp; |
| 183 align_buffer_64(src_argb, kStride * height + off); | 189 align_buffer_64(src_argb, kStride * height + off); |
| 184 align_buffer_64(dst_argb_c, kStride * height); | 190 align_buffer_64(dst_argb_c, kStride * height); |
| 185 align_buffer_64(dst_argb_opt, kStride * height); | 191 align_buffer_64(dst_argb_opt, kStride * height); |
| 186 for (int i = 0; i < kStride * height; ++i) { | 192 for (int i = 0; i < kStride * height; ++i) { |
| 187 src_argb[i + off] = (fastrand() & 0xff); | 193 src_argb[i + off] = (fastrand() & 0xff); |
| 188 } | 194 } |
| 189 ARGBAttenuate(src_argb + off, kStride, | 195 ARGBAttenuate(src_argb + off, kStride, |
| 190 src_argb + off, kStride, | 196 src_argb + off, kStride, |
| 191 width, height); | 197 width, height); |
| 192 memset(dst_argb_c, 0, kStride * height); | 198 memset(dst_argb_c, 0, kStride * height); |
| 193 memset(dst_argb_opt, 0, kStride * height); | 199 memset(dst_argb_opt, 0, kStride * height); |
| 194 | 200 |
| 195 MaskCpuFlags(disable_cpu_flags); | 201 MaskCpuFlags(disable_cpu_flags); |
| 196 ARGBUnattenuate(src_argb + off, kStride, | 202 ARGBUnattenuate(src_argb + off, kStride, |
| 197 dst_argb_c, kStride, | 203 dst_argb_c, kStride, |
| 198 width, invert * height); | 204 width, invert * height); |
| 199 MaskCpuFlags(-1); | 205 MaskCpuFlags(benchmark_cpu_info); |
| 200 for (int i = 0; i < benchmark_iterations; ++i) { | 206 for (int i = 0; i < benchmark_iterations; ++i) { |
| 201 ARGBUnattenuate(src_argb + off, kStride, | 207 ARGBUnattenuate(src_argb + off, kStride, |
| 202 dst_argb_opt, kStride, | 208 dst_argb_opt, kStride, |
| 203 width, invert * height); | 209 width, invert * height); |
| 204 } | 210 } |
| 205 int max_diff = 0; | 211 int max_diff = 0; |
| 206 for (int i = 0; i < kStride * height; ++i) { | 212 for (int i = 0; i < kStride * height; ++i) { |
| 207 int abs_diff = | 213 int abs_diff = |
| 208 abs(static_cast<int>(dst_argb_c[i]) - | 214 abs(static_cast<int>(dst_argb_c[i]) - |
| 209 static_cast<int>(dst_argb_opt[i])); | 215 static_cast<int>(dst_argb_opt[i])); |
| 210 if (abs_diff > max_diff) { | 216 if (abs_diff > max_diff) { |
| 211 max_diff = abs_diff; | 217 max_diff = abs_diff; |
| 212 } | 218 } |
| 213 } | 219 } |
| 214 free_aligned_buffer_64(src_argb); | 220 free_aligned_buffer_64(src_argb); |
| 215 free_aligned_buffer_64(dst_argb_c); | 221 free_aligned_buffer_64(dst_argb_c); |
| 216 free_aligned_buffer_64(dst_argb_opt); | 222 free_aligned_buffer_64(dst_argb_opt); |
| 217 return max_diff; | 223 return max_diff; |
| 218 } | 224 } |
| 219 | 225 |
| 220 TEST_F(LibYUVPlanarTest, ARGBUnattenuate_Any) { | 226 TEST_F(LibYUVPlanarTest, ARGBUnattenuate_Any) { |
| 221 int max_diff = TestUnattenuateI(benchmark_width_ - 1, benchmark_height_, | 227 int max_diff = TestUnattenuateI(benchmark_width_ - 1, benchmark_height_, |
| 222 benchmark_iterations_, disable_cpu_flags_, | 228 benchmark_iterations_, |
| 229 disable_cpu_flags_, benchmark_cpu_info_, |
| 223 +1, 0); | 230 +1, 0); |
| 224 EXPECT_LE(max_diff, 2); | 231 EXPECT_LE(max_diff, 2); |
| 225 } | 232 } |
| 226 | 233 |
| 227 TEST_F(LibYUVPlanarTest, ARGBUnattenuate_Unaligned) { | 234 TEST_F(LibYUVPlanarTest, ARGBUnattenuate_Unaligned) { |
| 228 int max_diff = TestUnattenuateI(benchmark_width_, benchmark_height_, | 235 int max_diff = TestUnattenuateI(benchmark_width_, benchmark_height_, |
| 229 benchmark_iterations_, disable_cpu_flags_, | 236 benchmark_iterations_, |
| 237 disable_cpu_flags_, benchmark_cpu_info_, |
| 230 +1, 1); | 238 +1, 1); |
| 231 EXPECT_LE(max_diff, 2); | 239 EXPECT_LE(max_diff, 2); |
| 232 } | 240 } |
| 233 | 241 |
| 234 TEST_F(LibYUVPlanarTest, ARGBUnattenuate_Invert) { | 242 TEST_F(LibYUVPlanarTest, ARGBUnattenuate_Invert) { |
| 235 int max_diff = TestUnattenuateI(benchmark_width_, benchmark_height_, | 243 int max_diff = TestUnattenuateI(benchmark_width_, benchmark_height_, |
| 236 benchmark_iterations_, disable_cpu_flags_, | 244 benchmark_iterations_, |
| 245 disable_cpu_flags_, benchmark_cpu_info_, |
| 237 -1, 0); | 246 -1, 0); |
| 238 EXPECT_LE(max_diff, 2); | 247 EXPECT_LE(max_diff, 2); |
| 239 } | 248 } |
| 240 | 249 |
| 241 TEST_F(LibYUVPlanarTest, ARGBUnattenuate_Opt) { | 250 TEST_F(LibYUVPlanarTest, ARGBUnattenuate_Opt) { |
| 242 int max_diff = TestUnattenuateI(benchmark_width_, benchmark_height_, | 251 int max_diff = TestUnattenuateI(benchmark_width_, benchmark_height_, |
| 243 benchmark_iterations_, disable_cpu_flags_, | 252 benchmark_iterations_, |
| 253 disable_cpu_flags_, benchmark_cpu_info_, |
| 244 +1, 0); | 254 +1, 0); |
| 245 EXPECT_LE(max_diff, 2); | 255 EXPECT_LE(max_diff, 2); |
| 246 } | 256 } |
| 247 | 257 |
| 248 TEST_F(LibYUVPlanarTest, TestARGBComputeCumulativeSum) { | 258 TEST_F(LibYUVPlanarTest, TestARGBComputeCumulativeSum) { |
| 249 SIMD_ALIGNED(uint8 orig_pixels[16][16][4]); | 259 SIMD_ALIGNED(uint8 orig_pixels[16][16][4]); |
| 250 SIMD_ALIGNED(int32 added_pixels[16][16][4]); | 260 SIMD_ALIGNED(int32 added_pixels[16][16][4]); |
| 251 | 261 |
| 252 for (int y = 0; y < 16; ++y) { | 262 for (int y = 0; y < 16; ++y) { |
| 253 for (int x = 0; x < 16; ++x) { | 263 for (int x = 0; x < 16; ++x) { |
| (...skipping 289 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 543 | 553 |
| 544 for (int i = 0; i < 1280; ++i) { | 554 for (int i = 0; i < 1280; ++i) { |
| 545 orig_pixels[i][0] = i; | 555 orig_pixels[i][0] = i; |
| 546 orig_pixels[i][1] = i / 2; | 556 orig_pixels[i][1] = i / 2; |
| 547 orig_pixels[i][2] = i / 3; | 557 orig_pixels[i][2] = i / 3; |
| 548 orig_pixels[i][3] = i; | 558 orig_pixels[i][3] = i; |
| 549 } | 559 } |
| 550 MaskCpuFlags(disable_cpu_flags_); | 560 MaskCpuFlags(disable_cpu_flags_); |
| 551 ARGBColorMatrix(&orig_pixels[0][0], 0, &dst_pixels_c[0][0], 0, | 561 ARGBColorMatrix(&orig_pixels[0][0], 0, &dst_pixels_c[0][0], 0, |
| 552 &kRGBToSepia[0], 1280, 1); | 562 &kRGBToSepia[0], 1280, 1); |
| 553 MaskCpuFlags(-1); | 563 MaskCpuFlags(benchmark_cpu_info_); |
| 554 | 564 |
| 555 for (int i = 0; i < benchmark_pixels_div1280_; ++i) { | 565 for (int i = 0; i < benchmark_pixels_div1280_; ++i) { |
| 556 ARGBColorMatrix(&orig_pixels[0][0], 0, &dst_pixels_opt[0][0], 0, | 566 ARGBColorMatrix(&orig_pixels[0][0], 0, &dst_pixels_opt[0][0], 0, |
| 557 &kRGBToSepia[0], 1280, 1); | 567 &kRGBToSepia[0], 1280, 1); |
| 558 } | 568 } |
| 559 | 569 |
| 560 for (int i = 0; i < 1280; ++i) { | 570 for (int i = 0; i < 1280; ++i) { |
| 561 EXPECT_EQ(dst_pixels_c[i][0], dst_pixels_opt[i][0]); | 571 EXPECT_EQ(dst_pixels_c[i][0], dst_pixels_opt[i][0]); |
| 562 EXPECT_EQ(dst_pixels_c[i][1], dst_pixels_opt[i][1]); | 572 EXPECT_EQ(dst_pixels_c[i][1], dst_pixels_opt[i][1]); |
| 563 EXPECT_EQ(dst_pixels_c[i][2], dst_pixels_opt[i][2]); | 573 EXPECT_EQ(dst_pixels_c[i][2], dst_pixels_opt[i][2]); |
| (...skipping 380 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 944 align_buffer_64(dst_argb_opt, kStrideB * kHeight); \ | 954 align_buffer_64(dst_argb_opt, kStrideB * kHeight); \ |
| 945 for (int i = 0; i < kStrideA * kHeight; ++i) { \ | 955 for (int i = 0; i < kStrideA * kHeight; ++i) { \ |
| 946 src_argb_a[i + OFF] = (fastrand() & 0xff); \ | 956 src_argb_a[i + OFF] = (fastrand() & 0xff); \ |
| 947 src_argb_b[i + OFF] = (fastrand() & 0xff); \ | 957 src_argb_b[i + OFF] = (fastrand() & 0xff); \ |
| 948 } \ | 958 } \ |
| 949 MaskCpuFlags(disable_cpu_flags_); \ | 959 MaskCpuFlags(disable_cpu_flags_); \ |
| 950 ARGBInterpolate(src_argb_a + OFF, kStrideA, \ | 960 ARGBInterpolate(src_argb_a + OFF, kStrideA, \ |
| 951 src_argb_b + OFF, kStrideA, \ | 961 src_argb_b + OFF, kStrideA, \ |
| 952 dst_argb_c, kStrideB, \ | 962 dst_argb_c, kStrideB, \ |
| 953 kWidth, NEG kHeight, TERP); \ | 963 kWidth, NEG kHeight, TERP); \ |
| 954 MaskCpuFlags(-1); \ | 964 MaskCpuFlags(benchmark_cpu_info_); \ |
| 955 for (int i = 0; i < benchmark_iterations_; ++i) { \ | 965 for (int i = 0; i < benchmark_iterations_; ++i) { \ |
| 956 ARGBInterpolate(src_argb_a + OFF, kStrideA, \ | 966 ARGBInterpolate(src_argb_a + OFF, kStrideA, \ |
| 957 src_argb_b + OFF, kStrideA, \ | 967 src_argb_b + OFF, kStrideA, \ |
| 958 dst_argb_opt, kStrideB, \ | 968 dst_argb_opt, kStrideB, \ |
| 959 kWidth, NEG kHeight, TERP); \ | 969 kWidth, NEG kHeight, TERP); \ |
| 960 } \ | 970 } \ |
| 961 int max_diff = 0; \ | 971 int max_diff = 0; \ |
| 962 for (int i = 0; i < kStrideB * kHeight; ++i) { \ | 972 for (int i = 0; i < kStrideB * kHeight; ++i) { \ |
| 963 int abs_diff = \ | 973 int abs_diff = \ |
| 964 abs(static_cast<int>(dst_argb_c[i]) - \ | 974 abs(static_cast<int>(dst_argb_c[i]) - \ |
| (...skipping 21 matching lines...) Expand all Loading... |
| 986 TESTTERP(ARGB, 4, 1, ARGB, 4, 1, \ | 996 TESTTERP(ARGB, 4, 1, ARGB, 4, 1, \ |
| 987 benchmark_width_ - 1, TERP, 1, _Any_Invert, -, 0) | 997 benchmark_width_ - 1, TERP, 1, _Any_Invert, -, 0) |
| 988 | 998 |
| 989 TESTINTERPOLATE(0) | 999 TESTINTERPOLATE(0) |
| 990 TESTINTERPOLATE(64) | 1000 TESTINTERPOLATE(64) |
| 991 TESTINTERPOLATE(128) | 1001 TESTINTERPOLATE(128) |
| 992 TESTINTERPOLATE(192) | 1002 TESTINTERPOLATE(192) |
| 993 TESTINTERPOLATE(255) | 1003 TESTINTERPOLATE(255) |
| 994 | 1004 |
| 995 static int TestBlend(int width, int height, int benchmark_iterations, | 1005 static int TestBlend(int width, int height, int benchmark_iterations, |
| 996 int disable_cpu_flags, int invert, int off) { | 1006 int disable_cpu_flags, int benchmark_cpu_info, |
| 1007 int invert, int off) { |
| 997 if (width < 1) { | 1008 if (width < 1) { |
| 998 width = 1; | 1009 width = 1; |
| 999 } | 1010 } |
| 1000 const int kBpp = 4; | 1011 const int kBpp = 4; |
| 1001 const int kStride = width * kBpp; | 1012 const int kStride = width * kBpp; |
| 1002 align_buffer_64(src_argb_a, kStride * height + off); | 1013 align_buffer_64(src_argb_a, kStride * height + off); |
| 1003 align_buffer_64(src_argb_b, kStride * height + off); | 1014 align_buffer_64(src_argb_b, kStride * height + off); |
| 1004 align_buffer_64(dst_argb_c, kStride * height); | 1015 align_buffer_64(dst_argb_c, kStride * height); |
| 1005 align_buffer_64(dst_argb_opt, kStride * height); | 1016 align_buffer_64(dst_argb_opt, kStride * height); |
| 1006 for (int i = 0; i < kStride * height; ++i) { | 1017 for (int i = 0; i < kStride * height; ++i) { |
| 1007 src_argb_a[i + off] = (fastrand() & 0xff); | 1018 src_argb_a[i + off] = (fastrand() & 0xff); |
| 1008 src_argb_b[i + off] = (fastrand() & 0xff); | 1019 src_argb_b[i + off] = (fastrand() & 0xff); |
| 1009 } | 1020 } |
| 1010 ARGBAttenuate(src_argb_a + off, kStride, src_argb_a + off, kStride, width, | 1021 ARGBAttenuate(src_argb_a + off, kStride, src_argb_a + off, kStride, width, |
| 1011 height); | 1022 height); |
| 1012 ARGBAttenuate(src_argb_b + off, kStride, src_argb_b + off, kStride, width, | 1023 ARGBAttenuate(src_argb_b + off, kStride, src_argb_b + off, kStride, width, |
| 1013 height); | 1024 height); |
| 1014 memset(dst_argb_c, 255, kStride * height); | 1025 memset(dst_argb_c, 255, kStride * height); |
| 1015 memset(dst_argb_opt, 255, kStride * height); | 1026 memset(dst_argb_opt, 255, kStride * height); |
| 1016 | 1027 |
| 1017 MaskCpuFlags(disable_cpu_flags); | 1028 MaskCpuFlags(disable_cpu_flags); |
| 1018 ARGBBlend(src_argb_a + off, kStride, | 1029 ARGBBlend(src_argb_a + off, kStride, |
| 1019 src_argb_b + off, kStride, | 1030 src_argb_b + off, kStride, |
| 1020 dst_argb_c, kStride, | 1031 dst_argb_c, kStride, |
| 1021 width, invert * height); | 1032 width, invert * height); |
| 1022 MaskCpuFlags(-1); | 1033 MaskCpuFlags(benchmark_cpu_info); |
| 1023 for (int i = 0; i < benchmark_iterations; ++i) { | 1034 for (int i = 0; i < benchmark_iterations; ++i) { |
| 1024 ARGBBlend(src_argb_a + off, kStride, | 1035 ARGBBlend(src_argb_a + off, kStride, |
| 1025 src_argb_b + off, kStride, | 1036 src_argb_b + off, kStride, |
| 1026 dst_argb_opt, kStride, | 1037 dst_argb_opt, kStride, |
| 1027 width, invert * height); | 1038 width, invert * height); |
| 1028 } | 1039 } |
| 1029 int max_diff = 0; | 1040 int max_diff = 0; |
| 1030 for (int i = 0; i < kStride * height; ++i) { | 1041 for (int i = 0; i < kStride * height; ++i) { |
| 1031 int abs_diff = | 1042 int abs_diff = |
| 1032 abs(static_cast<int>(dst_argb_c[i]) - | 1043 abs(static_cast<int>(dst_argb_c[i]) - |
| 1033 static_cast<int>(dst_argb_opt[i])); | 1044 static_cast<int>(dst_argb_opt[i])); |
| 1034 if (abs_diff > max_diff) { | 1045 if (abs_diff > max_diff) { |
| 1035 max_diff = abs_diff; | 1046 max_diff = abs_diff; |
| 1036 } | 1047 } |
| 1037 } | 1048 } |
| 1038 free_aligned_buffer_64(src_argb_a); | 1049 free_aligned_buffer_64(src_argb_a); |
| 1039 free_aligned_buffer_64(src_argb_b); | 1050 free_aligned_buffer_64(src_argb_b); |
| 1040 free_aligned_buffer_64(dst_argb_c); | 1051 free_aligned_buffer_64(dst_argb_c); |
| 1041 free_aligned_buffer_64(dst_argb_opt); | 1052 free_aligned_buffer_64(dst_argb_opt); |
| 1042 return max_diff; | 1053 return max_diff; |
| 1043 } | 1054 } |
| 1044 | 1055 |
| 1045 TEST_F(LibYUVPlanarTest, ARGBBlend_Any) { | 1056 TEST_F(LibYUVPlanarTest, ARGBBlend_Any) { |
| 1046 int max_diff = TestBlend(benchmark_width_ - 4, benchmark_height_, | 1057 int max_diff = TestBlend(benchmark_width_ - 4, benchmark_height_, |
| 1047 benchmark_iterations_, disable_cpu_flags_, +1, 0); | 1058 benchmark_iterations_, |
| 1059 disable_cpu_flags_, benchmark_cpu_info_, +1, 0); |
| 1048 EXPECT_LE(max_diff, 1); | 1060 EXPECT_LE(max_diff, 1); |
| 1049 } | 1061 } |
| 1050 | 1062 |
| 1051 TEST_F(LibYUVPlanarTest, ARGBBlend_Unaligned) { | 1063 TEST_F(LibYUVPlanarTest, ARGBBlend_Unaligned) { |
| 1052 int max_diff = TestBlend(benchmark_width_, benchmark_height_, | 1064 int max_diff = TestBlend(benchmark_width_, benchmark_height_, |
| 1053 benchmark_iterations_, disable_cpu_flags_, +1, 1); | 1065 benchmark_iterations_, |
| 1066 disable_cpu_flags_, benchmark_cpu_info_, +1, 1); |
| 1054 EXPECT_LE(max_diff, 1); | 1067 EXPECT_LE(max_diff, 1); |
| 1055 } | 1068 } |
| 1056 | 1069 |
| 1057 TEST_F(LibYUVPlanarTest, ARGBBlend_Invert) { | 1070 TEST_F(LibYUVPlanarTest, ARGBBlend_Invert) { |
| 1058 int max_diff = TestBlend(benchmark_width_, benchmark_height_, | 1071 int max_diff = TestBlend(benchmark_width_, benchmark_height_, |
| 1059 benchmark_iterations_, disable_cpu_flags_, -1, 0); | 1072 benchmark_iterations_, |
| 1073 disable_cpu_flags_, benchmark_cpu_info_, -1, 0); |
| 1060 EXPECT_LE(max_diff, 1); | 1074 EXPECT_LE(max_diff, 1); |
| 1061 } | 1075 } |
| 1062 | 1076 |
| 1063 TEST_F(LibYUVPlanarTest, ARGBBlend_Opt) { | 1077 TEST_F(LibYUVPlanarTest, ARGBBlend_Opt) { |
| 1064 int max_diff = TestBlend(benchmark_width_, benchmark_height_, | 1078 int max_diff = TestBlend(benchmark_width_, benchmark_height_, |
| 1065 benchmark_iterations_, disable_cpu_flags_, +1, 0); | 1079 benchmark_iterations_, |
| 1080 disable_cpu_flags_, benchmark_cpu_info_, +1, 0); |
| 1066 EXPECT_LE(max_diff, 1); | 1081 EXPECT_LE(max_diff, 1); |
| 1067 } | 1082 } |
| 1068 | 1083 |
| 1069 TEST_F(LibYUVPlanarTest, TestAffine) { | 1084 TEST_F(LibYUVPlanarTest, TestAffine) { |
| 1070 SIMD_ALIGNED(uint8 orig_pixels_0[1280][4]); | 1085 SIMD_ALIGNED(uint8 orig_pixels_0[1280][4]); |
| 1071 SIMD_ALIGNED(uint8 interpolate_pixels_C[1280][4]); | 1086 SIMD_ALIGNED(uint8 interpolate_pixels_C[1280][4]); |
| 1072 | 1087 |
| 1073 for (int i = 0; i < 1280; ++i) { | 1088 for (int i = 0; i < 1280; ++i) { |
| 1074 for (int j = 0; j < 4; ++j) { | 1089 for (int j = 0; j < 4; ++j) { |
| 1075 orig_pixels_0[i][j] = i; | 1090 orig_pixels_0[i][j] = i; |
| (...skipping 261 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1337 | 1352 |
| 1338 // Disable all optimizations. | 1353 // Disable all optimizations. |
| 1339 MaskCpuFlags(disable_cpu_flags_); | 1354 MaskCpuFlags(disable_cpu_flags_); |
| 1340 double c_time = get_time(); | 1355 double c_time = get_time(); |
| 1341 for (j = 0; j < benchmark_iterations_; j++) { | 1356 for (j = 0; j < benchmark_iterations_; j++) { |
| 1342 CopyPlane(orig_y + y_off, y_st, dst_c + y_off, stride, yw, yh); | 1357 CopyPlane(orig_y + y_off, y_st, dst_c + y_off, stride, yw, yh); |
| 1343 } | 1358 } |
| 1344 c_time = (get_time() - c_time) / benchmark_iterations_; | 1359 c_time = (get_time() - c_time) / benchmark_iterations_; |
| 1345 | 1360 |
| 1346 // Enable optimizations. | 1361 // Enable optimizations. |
| 1347 MaskCpuFlags(-1); | 1362 MaskCpuFlags(benchmark_cpu_info_); |
| 1348 double opt_time = get_time(); | 1363 double opt_time = get_time(); |
| 1349 for (j = 0; j < benchmark_iterations_; j++) { | 1364 for (j = 0; j < benchmark_iterations_; j++) { |
| 1350 CopyPlane(orig_y + y_off, y_st, dst_opt + y_off, stride, yw, yh); | 1365 CopyPlane(orig_y + y_off, y_st, dst_opt + y_off, stride, yw, yh); |
| 1351 } | 1366 } |
| 1352 opt_time = (get_time() - opt_time) / benchmark_iterations_; | 1367 opt_time = (get_time() - opt_time) / benchmark_iterations_; |
| 1353 | 1368 |
| 1354 for (i = 0; i < y_plane_size; ++i) { | 1369 for (i = 0; i < y_plane_size; ++i) { |
| 1355 if (dst_c[i] != dst_opt[i]) | 1370 if (dst_c[i] != dst_opt[i]) |
| 1356 ++err; | 1371 ++err; |
| 1357 } | 1372 } |
| 1358 | 1373 |
| 1359 free_aligned_buffer_64(orig_y); | 1374 free_aligned_buffer_64(orig_y); |
| 1360 free_aligned_buffer_64(dst_c); | 1375 free_aligned_buffer_64(dst_c); |
| 1361 free_aligned_buffer_64(dst_opt); | 1376 free_aligned_buffer_64(dst_opt); |
| 1362 | 1377 |
| 1363 EXPECT_EQ(0, err); | 1378 EXPECT_EQ(0, err); |
| 1364 } | 1379 } |
| 1365 | 1380 |
| 1366 static int TestMultiply(int width, int height, int benchmark_iterations, | 1381 static int TestMultiply(int width, int height, int benchmark_iterations, |
| 1367 int disable_cpu_flags, int invert, int off) { | 1382 int disable_cpu_flags, int benchmark_cpu_info, |
| 1383 int invert, int off) { |
| 1368 if (width < 1) { | 1384 if (width < 1) { |
| 1369 width = 1; | 1385 width = 1; |
| 1370 } | 1386 } |
| 1371 const int kBpp = 4; | 1387 const int kBpp = 4; |
| 1372 const int kStride = width * kBpp; | 1388 const int kStride = width * kBpp; |
| 1373 align_buffer_64(src_argb_a, kStride * height + off); | 1389 align_buffer_64(src_argb_a, kStride * height + off); |
| 1374 align_buffer_64(src_argb_b, kStride * height + off); | 1390 align_buffer_64(src_argb_b, kStride * height + off); |
| 1375 align_buffer_64(dst_argb_c, kStride * height); | 1391 align_buffer_64(dst_argb_c, kStride * height); |
| 1376 align_buffer_64(dst_argb_opt, kStride * height); | 1392 align_buffer_64(dst_argb_opt, kStride * height); |
| 1377 for (int i = 0; i < kStride * height; ++i) { | 1393 for (int i = 0; i < kStride * height; ++i) { |
| 1378 src_argb_a[i + off] = (fastrand() & 0xff); | 1394 src_argb_a[i + off] = (fastrand() & 0xff); |
| 1379 src_argb_b[i + off] = (fastrand() & 0xff); | 1395 src_argb_b[i + off] = (fastrand() & 0xff); |
| 1380 } | 1396 } |
| 1381 memset(dst_argb_c, 0, kStride * height); | 1397 memset(dst_argb_c, 0, kStride * height); |
| 1382 memset(dst_argb_opt, 0, kStride * height); | 1398 memset(dst_argb_opt, 0, kStride * height); |
| 1383 | 1399 |
| 1384 MaskCpuFlags(disable_cpu_flags); | 1400 MaskCpuFlags(disable_cpu_flags); |
| 1385 ARGBMultiply(src_argb_a + off, kStride, | 1401 ARGBMultiply(src_argb_a + off, kStride, |
| 1386 src_argb_b + off, kStride, | 1402 src_argb_b + off, kStride, |
| 1387 dst_argb_c, kStride, | 1403 dst_argb_c, kStride, |
| 1388 width, invert * height); | 1404 width, invert * height); |
| 1389 MaskCpuFlags(-1); | 1405 MaskCpuFlags(benchmark_cpu_info); |
| 1390 for (int i = 0; i < benchmark_iterations; ++i) { | 1406 for (int i = 0; i < benchmark_iterations; ++i) { |
| 1391 ARGBMultiply(src_argb_a + off, kStride, | 1407 ARGBMultiply(src_argb_a + off, kStride, |
| 1392 src_argb_b + off, kStride, | 1408 src_argb_b + off, kStride, |
| 1393 dst_argb_opt, kStride, | 1409 dst_argb_opt, kStride, |
| 1394 width, invert * height); | 1410 width, invert * height); |
| 1395 } | 1411 } |
| 1396 int max_diff = 0; | 1412 int max_diff = 0; |
| 1397 for (int i = 0; i < kStride * height; ++i) { | 1413 for (int i = 0; i < kStride * height; ++i) { |
| 1398 int abs_diff = | 1414 int abs_diff = |
| 1399 abs(static_cast<int>(dst_argb_c[i]) - | 1415 abs(static_cast<int>(dst_argb_c[i]) - |
| 1400 static_cast<int>(dst_argb_opt[i])); | 1416 static_cast<int>(dst_argb_opt[i])); |
| 1401 if (abs_diff > max_diff) { | 1417 if (abs_diff > max_diff) { |
| 1402 max_diff = abs_diff; | 1418 max_diff = abs_diff; |
| 1403 } | 1419 } |
| 1404 } | 1420 } |
| 1405 free_aligned_buffer_64(src_argb_a); | 1421 free_aligned_buffer_64(src_argb_a); |
| 1406 free_aligned_buffer_64(src_argb_b); | 1422 free_aligned_buffer_64(src_argb_b); |
| 1407 free_aligned_buffer_64(dst_argb_c); | 1423 free_aligned_buffer_64(dst_argb_c); |
| 1408 free_aligned_buffer_64(dst_argb_opt); | 1424 free_aligned_buffer_64(dst_argb_opt); |
| 1409 return max_diff; | 1425 return max_diff; |
| 1410 } | 1426 } |
| 1411 | 1427 |
| 1412 TEST_F(LibYUVPlanarTest, ARGBMultiply_Any) { | 1428 TEST_F(LibYUVPlanarTest, ARGBMultiply_Any) { |
| 1413 int max_diff = TestMultiply(benchmark_width_ - 1, benchmark_height_, | 1429 int max_diff = TestMultiply(benchmark_width_ - 1, benchmark_height_, |
| 1414 benchmark_iterations_, disable_cpu_flags_, +1, 0); | 1430 benchmark_iterations_, |
| 1431 disable_cpu_flags_, benchmark_cpu_info_, +1, 0); |
| 1415 EXPECT_LE(max_diff, 1); | 1432 EXPECT_LE(max_diff, 1); |
| 1416 } | 1433 } |
| 1417 | 1434 |
| 1418 TEST_F(LibYUVPlanarTest, ARGBMultiply_Unaligned) { | 1435 TEST_F(LibYUVPlanarTest, ARGBMultiply_Unaligned) { |
| 1419 int max_diff = TestMultiply(benchmark_width_, benchmark_height_, | 1436 int max_diff = TestMultiply(benchmark_width_, benchmark_height_, |
| 1420 benchmark_iterations_, disable_cpu_flags_, +1, 1); | 1437 benchmark_iterations_, |
| 1438 disable_cpu_flags_, benchmark_cpu_info_, +1, 1); |
| 1421 EXPECT_LE(max_diff, 1); | 1439 EXPECT_LE(max_diff, 1); |
| 1422 } | 1440 } |
| 1423 | 1441 |
| 1424 TEST_F(LibYUVPlanarTest, ARGBMultiply_Invert) { | 1442 TEST_F(LibYUVPlanarTest, ARGBMultiply_Invert) { |
| 1425 int max_diff = TestMultiply(benchmark_width_, benchmark_height_, | 1443 int max_diff = TestMultiply(benchmark_width_, benchmark_height_, |
| 1426 benchmark_iterations_, disable_cpu_flags_, -1, 0); | 1444 benchmark_iterations_, |
| 1445 disable_cpu_flags_, benchmark_cpu_info_, -1, 0); |
| 1427 EXPECT_LE(max_diff, 1); | 1446 EXPECT_LE(max_diff, 1); |
| 1428 } | 1447 } |
| 1429 | 1448 |
| 1430 TEST_F(LibYUVPlanarTest, ARGBMultiply_Opt) { | 1449 TEST_F(LibYUVPlanarTest, ARGBMultiply_Opt) { |
| 1431 int max_diff = TestMultiply(benchmark_width_, benchmark_height_, | 1450 int max_diff = TestMultiply(benchmark_width_, benchmark_height_, |
| 1432 benchmark_iterations_, disable_cpu_flags_, +1, 0); | 1451 benchmark_iterations_, |
| 1452 disable_cpu_flags_, benchmark_cpu_info_, +1, 0); |
| 1433 EXPECT_LE(max_diff, 1); | 1453 EXPECT_LE(max_diff, 1); |
| 1434 } | 1454 } |
| 1435 | 1455 |
| 1436 static int TestAdd(int width, int height, int benchmark_iterations, | 1456 static int TestAdd(int width, int height, int benchmark_iterations, |
| 1437 int disable_cpu_flags, int invert, int off) { | 1457 int disable_cpu_flags, int benchmark_cpu_info, |
| 1458 int invert, int off) { |
| 1438 if (width < 1) { | 1459 if (width < 1) { |
| 1439 width = 1; | 1460 width = 1; |
| 1440 } | 1461 } |
| 1441 const int kBpp = 4; | 1462 const int kBpp = 4; |
| 1442 const int kStride = width * kBpp; | 1463 const int kStride = width * kBpp; |
| 1443 align_buffer_64(src_argb_a, kStride * height + off); | 1464 align_buffer_64(src_argb_a, kStride * height + off); |
| 1444 align_buffer_64(src_argb_b, kStride * height + off); | 1465 align_buffer_64(src_argb_b, kStride * height + off); |
| 1445 align_buffer_64(dst_argb_c, kStride * height); | 1466 align_buffer_64(dst_argb_c, kStride * height); |
| 1446 align_buffer_64(dst_argb_opt, kStride * height); | 1467 align_buffer_64(dst_argb_opt, kStride * height); |
| 1447 for (int i = 0; i < kStride * height; ++i) { | 1468 for (int i = 0; i < kStride * height; ++i) { |
| 1448 src_argb_a[i + off] = (fastrand() & 0xff); | 1469 src_argb_a[i + off] = (fastrand() & 0xff); |
| 1449 src_argb_b[i + off] = (fastrand() & 0xff); | 1470 src_argb_b[i + off] = (fastrand() & 0xff); |
| 1450 } | 1471 } |
| 1451 memset(dst_argb_c, 0, kStride * height); | 1472 memset(dst_argb_c, 0, kStride * height); |
| 1452 memset(dst_argb_opt, 0, kStride * height); | 1473 memset(dst_argb_opt, 0, kStride * height); |
| 1453 | 1474 |
| 1454 MaskCpuFlags(disable_cpu_flags); | 1475 MaskCpuFlags(disable_cpu_flags); |
| 1455 ARGBAdd(src_argb_a + off, kStride, | 1476 ARGBAdd(src_argb_a + off, kStride, |
| 1456 src_argb_b + off, kStride, | 1477 src_argb_b + off, kStride, |
| 1457 dst_argb_c, kStride, | 1478 dst_argb_c, kStride, |
| 1458 width, invert * height); | 1479 width, invert * height); |
| 1459 MaskCpuFlags(-1); | 1480 MaskCpuFlags(benchmark_cpu_info); |
| 1460 for (int i = 0; i < benchmark_iterations; ++i) { | 1481 for (int i = 0; i < benchmark_iterations; ++i) { |
| 1461 ARGBAdd(src_argb_a + off, kStride, | 1482 ARGBAdd(src_argb_a + off, kStride, |
| 1462 src_argb_b + off, kStride, | 1483 src_argb_b + off, kStride, |
| 1463 dst_argb_opt, kStride, | 1484 dst_argb_opt, kStride, |
| 1464 width, invert * height); | 1485 width, invert * height); |
| 1465 } | 1486 } |
| 1466 int max_diff = 0; | 1487 int max_diff = 0; |
| 1467 for (int i = 0; i < kStride * height; ++i) { | 1488 for (int i = 0; i < kStride * height; ++i) { |
| 1468 int abs_diff = | 1489 int abs_diff = |
| 1469 abs(static_cast<int>(dst_argb_c[i]) - | 1490 abs(static_cast<int>(dst_argb_c[i]) - |
| 1470 static_cast<int>(dst_argb_opt[i])); | 1491 static_cast<int>(dst_argb_opt[i])); |
| 1471 if (abs_diff > max_diff) { | 1492 if (abs_diff > max_diff) { |
| 1472 max_diff = abs_diff; | 1493 max_diff = abs_diff; |
| 1473 } | 1494 } |
| 1474 } | 1495 } |
| 1475 free_aligned_buffer_64(src_argb_a); | 1496 free_aligned_buffer_64(src_argb_a); |
| 1476 free_aligned_buffer_64(src_argb_b); | 1497 free_aligned_buffer_64(src_argb_b); |
| 1477 free_aligned_buffer_64(dst_argb_c); | 1498 free_aligned_buffer_64(dst_argb_c); |
| 1478 free_aligned_buffer_64(dst_argb_opt); | 1499 free_aligned_buffer_64(dst_argb_opt); |
| 1479 return max_diff; | 1500 return max_diff; |
| 1480 } | 1501 } |
| 1481 | 1502 |
| 1482 TEST_F(LibYUVPlanarTest, ARGBAdd_Any) { | 1503 TEST_F(LibYUVPlanarTest, ARGBAdd_Any) { |
| 1483 int max_diff = TestAdd(benchmark_width_ - 1, benchmark_height_, | 1504 int max_diff = TestAdd(benchmark_width_ - 1, benchmark_height_, |
| 1484 benchmark_iterations_, disable_cpu_flags_, +1, 0); | 1505 benchmark_iterations_, |
| 1506 disable_cpu_flags_, benchmark_cpu_info_, +1, 0); |
| 1485 EXPECT_LE(max_diff, 1); | 1507 EXPECT_LE(max_diff, 1); |
| 1486 } | 1508 } |
| 1487 | 1509 |
| 1488 TEST_F(LibYUVPlanarTest, ARGBAdd_Unaligned) { | 1510 TEST_F(LibYUVPlanarTest, ARGBAdd_Unaligned) { |
| 1489 int max_diff = TestAdd(benchmark_width_, benchmark_height_, | 1511 int max_diff = TestAdd(benchmark_width_, benchmark_height_, |
| 1490 benchmark_iterations_, disable_cpu_flags_, +1, 1); | 1512 benchmark_iterations_, |
| 1513 disable_cpu_flags_, benchmark_cpu_info_, +1, 1); |
| 1491 EXPECT_LE(max_diff, 1); | 1514 EXPECT_LE(max_diff, 1); |
| 1492 } | 1515 } |
| 1493 | 1516 |
| 1494 TEST_F(LibYUVPlanarTest, ARGBAdd_Invert) { | 1517 TEST_F(LibYUVPlanarTest, ARGBAdd_Invert) { |
| 1495 int max_diff = TestAdd(benchmark_width_, benchmark_height_, | 1518 int max_diff = TestAdd(benchmark_width_, benchmark_height_, |
| 1496 benchmark_iterations_, disable_cpu_flags_, -1, 0); | 1519 benchmark_iterations_, |
| 1520 disable_cpu_flags_, benchmark_cpu_info_, -1, 0); |
| 1497 EXPECT_LE(max_diff, 1); | 1521 EXPECT_LE(max_diff, 1); |
| 1498 } | 1522 } |
| 1499 | 1523 |
| 1500 TEST_F(LibYUVPlanarTest, ARGBAdd_Opt) { | 1524 TEST_F(LibYUVPlanarTest, ARGBAdd_Opt) { |
| 1501 int max_diff = TestAdd(benchmark_width_, benchmark_height_, | 1525 int max_diff = TestAdd(benchmark_width_, benchmark_height_, |
| 1502 benchmark_iterations_, disable_cpu_flags_, +1, 0); | 1526 benchmark_iterations_, |
| 1527 disable_cpu_flags_, benchmark_cpu_info_, +1, 0); |
| 1503 EXPECT_LE(max_diff, 1); | 1528 EXPECT_LE(max_diff, 1); |
| 1504 } | 1529 } |
| 1505 | 1530 |
| 1506 static int TestSubtract(int width, int height, int benchmark_iterations, | 1531 static int TestSubtract(int width, int height, int benchmark_iterations, |
| 1507 int disable_cpu_flags, int invert, int off) { | 1532 int disable_cpu_flags, int benchmark_cpu_info, |
| 1533 int invert, int off) { |
| 1508 if (width < 1) { | 1534 if (width < 1) { |
| 1509 width = 1; | 1535 width = 1; |
| 1510 } | 1536 } |
| 1511 const int kBpp = 4; | 1537 const int kBpp = 4; |
| 1512 const int kStride = width * kBpp; | 1538 const int kStride = width * kBpp; |
| 1513 align_buffer_64(src_argb_a, kStride * height + off); | 1539 align_buffer_64(src_argb_a, kStride * height + off); |
| 1514 align_buffer_64(src_argb_b, kStride * height + off); | 1540 align_buffer_64(src_argb_b, kStride * height + off); |
| 1515 align_buffer_64(dst_argb_c, kStride * height); | 1541 align_buffer_64(dst_argb_c, kStride * height); |
| 1516 align_buffer_64(dst_argb_opt, kStride * height); | 1542 align_buffer_64(dst_argb_opt, kStride * height); |
| 1517 for (int i = 0; i < kStride * height; ++i) { | 1543 for (int i = 0; i < kStride * height; ++i) { |
| 1518 src_argb_a[i + off] = (fastrand() & 0xff); | 1544 src_argb_a[i + off] = (fastrand() & 0xff); |
| 1519 src_argb_b[i + off] = (fastrand() & 0xff); | 1545 src_argb_b[i + off] = (fastrand() & 0xff); |
| 1520 } | 1546 } |
| 1521 memset(dst_argb_c, 0, kStride * height); | 1547 memset(dst_argb_c, 0, kStride * height); |
| 1522 memset(dst_argb_opt, 0, kStride * height); | 1548 memset(dst_argb_opt, 0, kStride * height); |
| 1523 | 1549 |
| 1524 MaskCpuFlags(disable_cpu_flags); | 1550 MaskCpuFlags(disable_cpu_flags); |
| 1525 ARGBSubtract(src_argb_a + off, kStride, | 1551 ARGBSubtract(src_argb_a + off, kStride, |
| 1526 src_argb_b + off, kStride, | 1552 src_argb_b + off, kStride, |
| 1527 dst_argb_c, kStride, | 1553 dst_argb_c, kStride, |
| 1528 width, invert * height); | 1554 width, invert * height); |
| 1529 MaskCpuFlags(-1); | 1555 MaskCpuFlags(benchmark_cpu_info); |
| 1530 for (int i = 0; i < benchmark_iterations; ++i) { | 1556 for (int i = 0; i < benchmark_iterations; ++i) { |
| 1531 ARGBSubtract(src_argb_a + off, kStride, | 1557 ARGBSubtract(src_argb_a + off, kStride, |
| 1532 src_argb_b + off, kStride, | 1558 src_argb_b + off, kStride, |
| 1533 dst_argb_opt, kStride, | 1559 dst_argb_opt, kStride, |
| 1534 width, invert * height); | 1560 width, invert * height); |
| 1535 } | 1561 } |
| 1536 int max_diff = 0; | 1562 int max_diff = 0; |
| 1537 for (int i = 0; i < kStride * height; ++i) { | 1563 for (int i = 0; i < kStride * height; ++i) { |
| 1538 int abs_diff = | 1564 int abs_diff = |
| 1539 abs(static_cast<int>(dst_argb_c[i]) - | 1565 abs(static_cast<int>(dst_argb_c[i]) - |
| 1540 static_cast<int>(dst_argb_opt[i])); | 1566 static_cast<int>(dst_argb_opt[i])); |
| 1541 if (abs_diff > max_diff) { | 1567 if (abs_diff > max_diff) { |
| 1542 max_diff = abs_diff; | 1568 max_diff = abs_diff; |
| 1543 } | 1569 } |
| 1544 } | 1570 } |
| 1545 free_aligned_buffer_64(src_argb_a); | 1571 free_aligned_buffer_64(src_argb_a); |
| 1546 free_aligned_buffer_64(src_argb_b); | 1572 free_aligned_buffer_64(src_argb_b); |
| 1547 free_aligned_buffer_64(dst_argb_c); | 1573 free_aligned_buffer_64(dst_argb_c); |
| 1548 free_aligned_buffer_64(dst_argb_opt); | 1574 free_aligned_buffer_64(dst_argb_opt); |
| 1549 return max_diff; | 1575 return max_diff; |
| 1550 } | 1576 } |
| 1551 | 1577 |
| 1552 TEST_F(LibYUVPlanarTest, ARGBSubtract_Any) { | 1578 TEST_F(LibYUVPlanarTest, ARGBSubtract_Any) { |
| 1553 int max_diff = TestSubtract(benchmark_width_ - 1, benchmark_height_, | 1579 int max_diff = TestSubtract(benchmark_width_ - 1, benchmark_height_, |
| 1554 benchmark_iterations_, disable_cpu_flags_, +1, 0); | 1580 benchmark_iterations_, |
| 1581 disable_cpu_flags_, benchmark_cpu_info_, +1, 0); |
| 1555 EXPECT_LE(max_diff, 1); | 1582 EXPECT_LE(max_diff, 1); |
| 1556 } | 1583 } |
| 1557 | 1584 |
| 1558 TEST_F(LibYUVPlanarTest, ARGBSubtract_Unaligned) { | 1585 TEST_F(LibYUVPlanarTest, ARGBSubtract_Unaligned) { |
| 1559 int max_diff = TestSubtract(benchmark_width_, benchmark_height_, | 1586 int max_diff = TestSubtract(benchmark_width_, benchmark_height_, |
| 1560 benchmark_iterations_, disable_cpu_flags_, +1, 1); | 1587 benchmark_iterations_, |
| 1588 disable_cpu_flags_, benchmark_cpu_info_, +1, 1); |
| 1561 EXPECT_LE(max_diff, 1); | 1589 EXPECT_LE(max_diff, 1); |
| 1562 } | 1590 } |
| 1563 | 1591 |
| 1564 TEST_F(LibYUVPlanarTest, ARGBSubtract_Invert) { | 1592 TEST_F(LibYUVPlanarTest, ARGBSubtract_Invert) { |
| 1565 int max_diff = TestSubtract(benchmark_width_, benchmark_height_, | 1593 int max_diff = TestSubtract(benchmark_width_, benchmark_height_, |
| 1566 benchmark_iterations_, disable_cpu_flags_, -1, 0); | 1594 benchmark_iterations_, |
| 1595 disable_cpu_flags_, benchmark_cpu_info_, -1, 0); |
| 1567 EXPECT_LE(max_diff, 1); | 1596 EXPECT_LE(max_diff, 1); |
| 1568 } | 1597 } |
| 1569 | 1598 |
| 1570 TEST_F(LibYUVPlanarTest, ARGBSubtract_Opt) { | 1599 TEST_F(LibYUVPlanarTest, ARGBSubtract_Opt) { |
| 1571 int max_diff = TestSubtract(benchmark_width_, benchmark_height_, | 1600 int max_diff = TestSubtract(benchmark_width_, benchmark_height_, |
| 1572 benchmark_iterations_, disable_cpu_flags_, +1, 0); | 1601 benchmark_iterations_, |
| 1602 disable_cpu_flags_, benchmark_cpu_info_, +1, 0); |
| 1573 EXPECT_LE(max_diff, 1); | 1603 EXPECT_LE(max_diff, 1); |
| 1574 } | 1604 } |
| 1575 | 1605 |
| 1576 static int TestSobel(int width, int height, int benchmark_iterations, | 1606 static int TestSobel(int width, int height, int benchmark_iterations, |
| 1577 int disable_cpu_flags, int invert, int off) { | 1607 int disable_cpu_flags, int benchmark_cpu_info, |
| 1608 int invert, int off) { |
| 1578 if (width < 1) { | 1609 if (width < 1) { |
| 1579 width = 1; | 1610 width = 1; |
| 1580 } | 1611 } |
| 1581 const int kBpp = 4; | 1612 const int kBpp = 4; |
| 1582 const int kStride = width * kBpp; | 1613 const int kStride = width * kBpp; |
| 1583 align_buffer_64(src_argb_a, kStride * height + off); | 1614 align_buffer_64(src_argb_a, kStride * height + off); |
| 1584 align_buffer_64(dst_argb_c, kStride * height); | 1615 align_buffer_64(dst_argb_c, kStride * height); |
| 1585 align_buffer_64(dst_argb_opt, kStride * height); | 1616 align_buffer_64(dst_argb_opt, kStride * height); |
| 1586 memset(src_argb_a, 0, kStride * height + off); | 1617 memset(src_argb_a, 0, kStride * height + off); |
| 1587 for (int i = 0; i < kStride * height; ++i) { | 1618 for (int i = 0; i < kStride * height; ++i) { |
| 1588 src_argb_a[i + off] = (fastrand() & 0xff); | 1619 src_argb_a[i + off] = (fastrand() & 0xff); |
| 1589 } | 1620 } |
| 1590 memset(dst_argb_c, 0, kStride * height); | 1621 memset(dst_argb_c, 0, kStride * height); |
| 1591 memset(dst_argb_opt, 0, kStride * height); | 1622 memset(dst_argb_opt, 0, kStride * height); |
| 1592 | 1623 |
| 1593 MaskCpuFlags(disable_cpu_flags); | 1624 MaskCpuFlags(disable_cpu_flags); |
| 1594 ARGBSobel(src_argb_a + off, kStride, | 1625 ARGBSobel(src_argb_a + off, kStride, |
| 1595 dst_argb_c, kStride, | 1626 dst_argb_c, kStride, |
| 1596 width, invert * height); | 1627 width, invert * height); |
| 1597 MaskCpuFlags(-1); | 1628 MaskCpuFlags(benchmark_cpu_info); |
| 1598 for (int i = 0; i < benchmark_iterations; ++i) { | 1629 for (int i = 0; i < benchmark_iterations; ++i) { |
| 1599 ARGBSobel(src_argb_a + off, kStride, | 1630 ARGBSobel(src_argb_a + off, kStride, |
| 1600 dst_argb_opt, kStride, | 1631 dst_argb_opt, kStride, |
| 1601 width, invert * height); | 1632 width, invert * height); |
| 1602 } | 1633 } |
| 1603 int max_diff = 0; | 1634 int max_diff = 0; |
| 1604 for (int i = 0; i < kStride * height; ++i) { | 1635 for (int i = 0; i < kStride * height; ++i) { |
| 1605 int abs_diff = | 1636 int abs_diff = |
| 1606 abs(static_cast<int>(dst_argb_c[i]) - | 1637 abs(static_cast<int>(dst_argb_c[i]) - |
| 1607 static_cast<int>(dst_argb_opt[i])); | 1638 static_cast<int>(dst_argb_opt[i])); |
| 1608 if (abs_diff > max_diff) { | 1639 if (abs_diff > max_diff) { |
| 1609 max_diff = abs_diff; | 1640 max_diff = abs_diff; |
| 1610 } | 1641 } |
| 1611 } | 1642 } |
| 1612 free_aligned_buffer_64(src_argb_a); | 1643 free_aligned_buffer_64(src_argb_a); |
| 1613 free_aligned_buffer_64(dst_argb_c); | 1644 free_aligned_buffer_64(dst_argb_c); |
| 1614 free_aligned_buffer_64(dst_argb_opt); | 1645 free_aligned_buffer_64(dst_argb_opt); |
| 1615 return max_diff; | 1646 return max_diff; |
| 1616 } | 1647 } |
| 1617 | 1648 |
| 1618 TEST_F(LibYUVPlanarTest, ARGBSobel_Any) { | 1649 TEST_F(LibYUVPlanarTest, ARGBSobel_Any) { |
| 1619 int max_diff = TestSobel(benchmark_width_ - 1, benchmark_height_, | 1650 int max_diff = TestSobel(benchmark_width_ - 1, benchmark_height_, |
| 1620 benchmark_iterations_, disable_cpu_flags_, +1, 0); | 1651 benchmark_iterations_, |
| 1652 disable_cpu_flags_, benchmark_cpu_info_, +1, 0); |
| 1621 EXPECT_EQ(0, max_diff); | 1653 EXPECT_EQ(0, max_diff); |
| 1622 } | 1654 } |
| 1623 | 1655 |
| 1624 TEST_F(LibYUVPlanarTest, ARGBSobel_Unaligned) { | 1656 TEST_F(LibYUVPlanarTest, ARGBSobel_Unaligned) { |
| 1625 int max_diff = TestSobel(benchmark_width_, benchmark_height_, | 1657 int max_diff = TestSobel(benchmark_width_, benchmark_height_, |
| 1626 benchmark_iterations_, disable_cpu_flags_, +1, 1); | 1658 benchmark_iterations_, |
| 1659 disable_cpu_flags_, benchmark_cpu_info_, +1, 1); |
| 1627 EXPECT_EQ(0, max_diff); | 1660 EXPECT_EQ(0, max_diff); |
| 1628 } | 1661 } |
| 1629 | 1662 |
| 1630 TEST_F(LibYUVPlanarTest, ARGBSobel_Invert) { | 1663 TEST_F(LibYUVPlanarTest, ARGBSobel_Invert) { |
| 1631 int max_diff = TestSobel(benchmark_width_, benchmark_height_, | 1664 int max_diff = TestSobel(benchmark_width_, benchmark_height_, |
| 1632 benchmark_iterations_, disable_cpu_flags_, -1, 0); | 1665 benchmark_iterations_, |
| 1666 disable_cpu_flags_, benchmark_cpu_info_, -1, 0); |
| 1633 EXPECT_EQ(0, max_diff); | 1667 EXPECT_EQ(0, max_diff); |
| 1634 } | 1668 } |
| 1635 | 1669 |
| 1636 TEST_F(LibYUVPlanarTest, ARGBSobel_Opt) { | 1670 TEST_F(LibYUVPlanarTest, ARGBSobel_Opt) { |
| 1637 int max_diff = TestSobel(benchmark_width_, benchmark_height_, | 1671 int max_diff = TestSobel(benchmark_width_, benchmark_height_, |
| 1638 benchmark_iterations_, disable_cpu_flags_, +1, 0); | 1672 benchmark_iterations_, |
| 1673 disable_cpu_flags_, benchmark_cpu_info_, +1, 0); |
| 1639 EXPECT_EQ(0, max_diff); | 1674 EXPECT_EQ(0, max_diff); |
| 1640 } | 1675 } |
| 1641 | 1676 |
| 1642 static int TestSobelToPlane(int width, int height, int benchmark_iterations, | 1677 static int TestSobelToPlane(int width, int height, int benchmark_iterations, |
| 1643 int disable_cpu_flags, int invert, int off) { | 1678 int disable_cpu_flags, int benchmark_cpu_info, |
| 1679 int invert, int off) { |
| 1644 if (width < 1) { | 1680 if (width < 1) { |
| 1645 width = 1; | 1681 width = 1; |
| 1646 } | 1682 } |
| 1647 const int kSrcBpp = 4; | 1683 const int kSrcBpp = 4; |
| 1648 const int kDstBpp = 1; | 1684 const int kDstBpp = 1; |
| 1649 const int kSrcStride = (width * kSrcBpp + 15) & ~15; | 1685 const int kSrcStride = (width * kSrcBpp + 15) & ~15; |
| 1650 const int kDstStride = (width * kDstBpp + 15) & ~15; | 1686 const int kDstStride = (width * kDstBpp + 15) & ~15; |
| 1651 align_buffer_64(src_argb_a, kSrcStride * height + off); | 1687 align_buffer_64(src_argb_a, kSrcStride * height + off); |
| 1652 align_buffer_64(dst_argb_c, kDstStride * height); | 1688 align_buffer_64(dst_argb_c, kDstStride * height); |
| 1653 align_buffer_64(dst_argb_opt, kDstStride * height); | 1689 align_buffer_64(dst_argb_opt, kDstStride * height); |
| 1654 memset(src_argb_a, 0, kSrcStride * height + off); | 1690 memset(src_argb_a, 0, kSrcStride * height + off); |
| 1655 for (int i = 0; i < kSrcStride * height; ++i) { | 1691 for (int i = 0; i < kSrcStride * height; ++i) { |
| 1656 src_argb_a[i + off] = (fastrand() & 0xff); | 1692 src_argb_a[i + off] = (fastrand() & 0xff); |
| 1657 } | 1693 } |
| 1658 memset(dst_argb_c, 0, kDstStride * height); | 1694 memset(dst_argb_c, 0, kDstStride * height); |
| 1659 memset(dst_argb_opt, 0, kDstStride * height); | 1695 memset(dst_argb_opt, 0, kDstStride * height); |
| 1660 | 1696 |
| 1661 MaskCpuFlags(disable_cpu_flags); | 1697 MaskCpuFlags(disable_cpu_flags); |
| 1662 ARGBSobelToPlane(src_argb_a + off, kSrcStride, | 1698 ARGBSobelToPlane(src_argb_a + off, kSrcStride, |
| 1663 dst_argb_c, kDstStride, | 1699 dst_argb_c, kDstStride, |
| 1664 width, invert * height); | 1700 width, invert * height); |
| 1665 MaskCpuFlags(-1); | 1701 MaskCpuFlags(benchmark_cpu_info); |
| 1666 for (int i = 0; i < benchmark_iterations; ++i) { | 1702 for (int i = 0; i < benchmark_iterations; ++i) { |
| 1667 ARGBSobelToPlane(src_argb_a + off, kSrcStride, | 1703 ARGBSobelToPlane(src_argb_a + off, kSrcStride, |
| 1668 dst_argb_opt, kDstStride, | 1704 dst_argb_opt, kDstStride, |
| 1669 width, invert * height); | 1705 width, invert * height); |
| 1670 } | 1706 } |
| 1671 int max_diff = 0; | 1707 int max_diff = 0; |
| 1672 for (int i = 0; i < kDstStride * height; ++i) { | 1708 for (int i = 0; i < kDstStride * height; ++i) { |
| 1673 int abs_diff = | 1709 int abs_diff = |
| 1674 abs(static_cast<int>(dst_argb_c[i]) - | 1710 abs(static_cast<int>(dst_argb_c[i]) - |
| 1675 static_cast<int>(dst_argb_opt[i])); | 1711 static_cast<int>(dst_argb_opt[i])); |
| 1676 if (abs_diff > max_diff) { | 1712 if (abs_diff > max_diff) { |
| 1677 max_diff = abs_diff; | 1713 max_diff = abs_diff; |
| 1678 } | 1714 } |
| 1679 } | 1715 } |
| 1680 free_aligned_buffer_64(src_argb_a); | 1716 free_aligned_buffer_64(src_argb_a); |
| 1681 free_aligned_buffer_64(dst_argb_c); | 1717 free_aligned_buffer_64(dst_argb_c); |
| 1682 free_aligned_buffer_64(dst_argb_opt); | 1718 free_aligned_buffer_64(dst_argb_opt); |
| 1683 return max_diff; | 1719 return max_diff; |
| 1684 } | 1720 } |
| 1685 | 1721 |
| 1686 TEST_F(LibYUVPlanarTest, ARGBSobelToPlane_Any) { | 1722 TEST_F(LibYUVPlanarTest, ARGBSobelToPlane_Any) { |
| 1687 int max_diff = TestSobelToPlane(benchmark_width_ - 1, benchmark_height_, | 1723 int max_diff = TestSobelToPlane(benchmark_width_ - 1, benchmark_height_, |
| 1688 benchmark_iterations_, disable_cpu_flags_, | 1724 benchmark_iterations_, |
| 1725 disable_cpu_flags_, benchmark_cpu_info_, |
| 1689 +1, 0); | 1726 +1, 0); |
| 1690 EXPECT_EQ(0, max_diff); | 1727 EXPECT_EQ(0, max_diff); |
| 1691 } | 1728 } |
| 1692 | 1729 |
| 1693 TEST_F(LibYUVPlanarTest, ARGBSobelToPlane_Unaligned) { | 1730 TEST_F(LibYUVPlanarTest, ARGBSobelToPlane_Unaligned) { |
| 1694 int max_diff = TestSobelToPlane(benchmark_width_, benchmark_height_, | 1731 int max_diff = TestSobelToPlane(benchmark_width_, benchmark_height_, |
| 1695 benchmark_iterations_, disable_cpu_flags_, | 1732 benchmark_iterations_, |
| 1733 disable_cpu_flags_, benchmark_cpu_info_, |
| 1696 +1, 1); | 1734 +1, 1); |
| 1697 EXPECT_EQ(0, max_diff); | 1735 EXPECT_EQ(0, max_diff); |
| 1698 } | 1736 } |
| 1699 | 1737 |
| 1700 TEST_F(LibYUVPlanarTest, ARGBSobelToPlane_Invert) { | 1738 TEST_F(LibYUVPlanarTest, ARGBSobelToPlane_Invert) { |
| 1701 int max_diff = TestSobelToPlane(benchmark_width_, benchmark_height_, | 1739 int max_diff = TestSobelToPlane(benchmark_width_, benchmark_height_, |
| 1702 benchmark_iterations_, disable_cpu_flags_, | 1740 benchmark_iterations_, |
| 1741 disable_cpu_flags_, benchmark_cpu_info_, |
| 1703 -1, 0); | 1742 -1, 0); |
| 1704 EXPECT_EQ(0, max_diff); | 1743 EXPECT_EQ(0, max_diff); |
| 1705 } | 1744 } |
| 1706 | 1745 |
| 1707 TEST_F(LibYUVPlanarTest, ARGBSobelToPlane_Opt) { | 1746 TEST_F(LibYUVPlanarTest, ARGBSobelToPlane_Opt) { |
| 1708 int max_diff = TestSobelToPlane(benchmark_width_, benchmark_height_, | 1747 int max_diff = TestSobelToPlane(benchmark_width_, benchmark_height_, |
| 1709 benchmark_iterations_, disable_cpu_flags_, | 1748 benchmark_iterations_, |
| 1749 disable_cpu_flags_, benchmark_cpu_info_, |
| 1710 +1, 0); | 1750 +1, 0); |
| 1711 EXPECT_EQ(0, max_diff); | 1751 EXPECT_EQ(0, max_diff); |
| 1712 } | 1752 } |
| 1713 | 1753 |
| 1714 static int TestSobelXY(int width, int height, int benchmark_iterations, | 1754 static int TestSobelXY(int width, int height, int benchmark_iterations, |
| 1715 int disable_cpu_flags, int invert, int off) { | 1755 int disable_cpu_flags, int benchmark_cpu_info, |
| 1756 int invert, int off) { |
| 1716 if (width < 1) { | 1757 if (width < 1) { |
| 1717 width = 1; | 1758 width = 1; |
| 1718 } | 1759 } |
| 1719 const int kBpp = 4; | 1760 const int kBpp = 4; |
| 1720 const int kStride = width * kBpp; | 1761 const int kStride = width * kBpp; |
| 1721 align_buffer_64(src_argb_a, kStride * height + off); | 1762 align_buffer_64(src_argb_a, kStride * height + off); |
| 1722 align_buffer_64(dst_argb_c, kStride * height); | 1763 align_buffer_64(dst_argb_c, kStride * height); |
| 1723 align_buffer_64(dst_argb_opt, kStride * height); | 1764 align_buffer_64(dst_argb_opt, kStride * height); |
| 1724 memset(src_argb_a, 0, kStride * height + off); | 1765 memset(src_argb_a, 0, kStride * height + off); |
| 1725 for (int i = 0; i < kStride * height; ++i) { | 1766 for (int i = 0; i < kStride * height; ++i) { |
| 1726 src_argb_a[i + off] = (fastrand() & 0xff); | 1767 src_argb_a[i + off] = (fastrand() & 0xff); |
| 1727 } | 1768 } |
| 1728 memset(dst_argb_c, 0, kStride * height); | 1769 memset(dst_argb_c, 0, kStride * height); |
| 1729 memset(dst_argb_opt, 0, kStride * height); | 1770 memset(dst_argb_opt, 0, kStride * height); |
| 1730 | 1771 |
| 1731 MaskCpuFlags(disable_cpu_flags); | 1772 MaskCpuFlags(disable_cpu_flags); |
| 1732 ARGBSobelXY(src_argb_a + off, kStride, | 1773 ARGBSobelXY(src_argb_a + off, kStride, |
| 1733 dst_argb_c, kStride, | 1774 dst_argb_c, kStride, |
| 1734 width, invert * height); | 1775 width, invert * height); |
| 1735 MaskCpuFlags(-1); | 1776 MaskCpuFlags(benchmark_cpu_info); |
| 1736 for (int i = 0; i < benchmark_iterations; ++i) { | 1777 for (int i = 0; i < benchmark_iterations; ++i) { |
| 1737 ARGBSobelXY(src_argb_a + off, kStride, | 1778 ARGBSobelXY(src_argb_a + off, kStride, |
| 1738 dst_argb_opt, kStride, | 1779 dst_argb_opt, kStride, |
| 1739 width, invert * height); | 1780 width, invert * height); |
| 1740 } | 1781 } |
| 1741 int max_diff = 0; | 1782 int max_diff = 0; |
| 1742 for (int i = 0; i < kStride * height; ++i) { | 1783 for (int i = 0; i < kStride * height; ++i) { |
| 1743 int abs_diff = | 1784 int abs_diff = |
| 1744 abs(static_cast<int>(dst_argb_c[i]) - | 1785 abs(static_cast<int>(dst_argb_c[i]) - |
| 1745 static_cast<int>(dst_argb_opt[i])); | 1786 static_cast<int>(dst_argb_opt[i])); |
| 1746 if (abs_diff > max_diff) { | 1787 if (abs_diff > max_diff) { |
| 1747 max_diff = abs_diff; | 1788 max_diff = abs_diff; |
| 1748 } | 1789 } |
| 1749 } | 1790 } |
| 1750 free_aligned_buffer_64(src_argb_a); | 1791 free_aligned_buffer_64(src_argb_a); |
| 1751 free_aligned_buffer_64(dst_argb_c); | 1792 free_aligned_buffer_64(dst_argb_c); |
| 1752 free_aligned_buffer_64(dst_argb_opt); | 1793 free_aligned_buffer_64(dst_argb_opt); |
| 1753 return max_diff; | 1794 return max_diff; |
| 1754 } | 1795 } |
| 1755 | 1796 |
| 1756 TEST_F(LibYUVPlanarTest, ARGBSobelXY_Any) { | 1797 TEST_F(LibYUVPlanarTest, ARGBSobelXY_Any) { |
| 1757 int max_diff = TestSobelXY(benchmark_width_ - 1, benchmark_height_, | 1798 int max_diff = TestSobelXY(benchmark_width_ - 1, benchmark_height_, |
| 1758 benchmark_iterations_, disable_cpu_flags_, +1, 0); | 1799 benchmark_iterations_, |
| 1800 disable_cpu_flags_, benchmark_cpu_info_, +1, 0); |
| 1759 EXPECT_EQ(0, max_diff); | 1801 EXPECT_EQ(0, max_diff); |
| 1760 } | 1802 } |
| 1761 | 1803 |
| 1762 TEST_F(LibYUVPlanarTest, ARGBSobelXY_Unaligned) { | 1804 TEST_F(LibYUVPlanarTest, ARGBSobelXY_Unaligned) { |
| 1763 int max_diff = TestSobelXY(benchmark_width_, benchmark_height_, | 1805 int max_diff = TestSobelXY(benchmark_width_, benchmark_height_, |
| 1764 benchmark_iterations_, disable_cpu_flags_, +1, 1); | 1806 benchmark_iterations_, |
| 1807 disable_cpu_flags_, benchmark_cpu_info_, +1, 1); |
| 1765 EXPECT_EQ(0, max_diff); | 1808 EXPECT_EQ(0, max_diff); |
| 1766 } | 1809 } |
| 1767 | 1810 |
| 1768 TEST_F(LibYUVPlanarTest, ARGBSobelXY_Invert) { | 1811 TEST_F(LibYUVPlanarTest, ARGBSobelXY_Invert) { |
| 1769 int max_diff = TestSobelXY(benchmark_width_, benchmark_height_, | 1812 int max_diff = TestSobelXY(benchmark_width_, benchmark_height_, |
| 1770 benchmark_iterations_, disable_cpu_flags_, -1, 0); | 1813 benchmark_iterations_, |
| 1814 disable_cpu_flags_, benchmark_cpu_info_, -1, 0); |
| 1771 EXPECT_EQ(0, max_diff); | 1815 EXPECT_EQ(0, max_diff); |
| 1772 } | 1816 } |
| 1773 | 1817 |
| 1774 TEST_F(LibYUVPlanarTest, ARGBSobelXY_Opt) { | 1818 TEST_F(LibYUVPlanarTest, ARGBSobelXY_Opt) { |
| 1775 int max_diff = TestSobelXY(benchmark_width_, benchmark_height_, | 1819 int max_diff = TestSobelXY(benchmark_width_, benchmark_height_, |
| 1776 benchmark_iterations_, disable_cpu_flags_, +1, 0); | 1820 benchmark_iterations_, |
| 1821 disable_cpu_flags_, benchmark_cpu_info_, +1, 0); |
| 1777 EXPECT_EQ(0, max_diff); | 1822 EXPECT_EQ(0, max_diff); |
| 1778 } | 1823 } |
| 1779 | 1824 |
| 1780 static int TestBlur(int width, int height, int benchmark_iterations, | 1825 static int TestBlur(int width, int height, int benchmark_iterations, |
| 1781 int disable_cpu_flags, int invert, int off, int radius) { | 1826 int disable_cpu_flags, int benchmark_cpu_info, |
| 1827 int invert, int off, int radius) { |
| 1782 if (width < 1) { | 1828 if (width < 1) { |
| 1783 width = 1; | 1829 width = 1; |
| 1784 } | 1830 } |
| 1785 const int kBpp = 4; | 1831 const int kBpp = 4; |
| 1786 const int kStride = width * kBpp; | 1832 const int kStride = width * kBpp; |
| 1787 align_buffer_64(src_argb_a, kStride * height + off); | 1833 align_buffer_64(src_argb_a, kStride * height + off); |
| 1788 align_buffer_64(dst_cumsum, width * height * 16); | 1834 align_buffer_64(dst_cumsum, width * height * 16); |
| 1789 align_buffer_64(dst_argb_c, kStride * height); | 1835 align_buffer_64(dst_argb_c, kStride * height); |
| 1790 align_buffer_64(dst_argb_opt, kStride * height); | 1836 align_buffer_64(dst_argb_opt, kStride * height); |
| 1791 for (int i = 0; i < kStride * height; ++i) { | 1837 for (int i = 0; i < kStride * height; ++i) { |
| 1792 src_argb_a[i + off] = (fastrand() & 0xff); | 1838 src_argb_a[i + off] = (fastrand() & 0xff); |
| 1793 } | 1839 } |
| 1794 memset(dst_cumsum, 0, width * height * 16); | 1840 memset(dst_cumsum, 0, width * height * 16); |
| 1795 memset(dst_argb_c, 0, kStride * height); | 1841 memset(dst_argb_c, 0, kStride * height); |
| 1796 memset(dst_argb_opt, 0, kStride * height); | 1842 memset(dst_argb_opt, 0, kStride * height); |
| 1797 | 1843 |
| 1798 MaskCpuFlags(disable_cpu_flags); | 1844 MaskCpuFlags(disable_cpu_flags); |
| 1799 ARGBBlur(src_argb_a + off, kStride, | 1845 ARGBBlur(src_argb_a + off, kStride, |
| 1800 dst_argb_c, kStride, | 1846 dst_argb_c, kStride, |
| 1801 reinterpret_cast<int32*>(dst_cumsum), width * 4, | 1847 reinterpret_cast<int32*>(dst_cumsum), width * 4, |
| 1802 width, invert * height, radius); | 1848 width, invert * height, radius); |
| 1803 MaskCpuFlags(-1); | 1849 MaskCpuFlags(benchmark_cpu_info); |
| 1804 for (int i = 0; i < benchmark_iterations; ++i) { | 1850 for (int i = 0; i < benchmark_iterations; ++i) { |
| 1805 ARGBBlur(src_argb_a + off, kStride, | 1851 ARGBBlur(src_argb_a + off, kStride, |
| 1806 dst_argb_opt, kStride, | 1852 dst_argb_opt, kStride, |
| 1807 reinterpret_cast<int32*>(dst_cumsum), width * 4, | 1853 reinterpret_cast<int32*>(dst_cumsum), width * 4, |
| 1808 width, invert * height, radius); | 1854 width, invert * height, radius); |
| 1809 } | 1855 } |
| 1810 int max_diff = 0; | 1856 int max_diff = 0; |
| 1811 for (int i = 0; i < kStride * height; ++i) { | 1857 for (int i = 0; i < kStride * height; ++i) { |
| 1812 int abs_diff = | 1858 int abs_diff = |
| 1813 abs(static_cast<int>(dst_argb_c[i]) - | 1859 abs(static_cast<int>(dst_argb_c[i]) - |
| 1814 static_cast<int>(dst_argb_opt[i])); | 1860 static_cast<int>(dst_argb_opt[i])); |
| 1815 if (abs_diff > max_diff) { | 1861 if (abs_diff > max_diff) { |
| 1816 max_diff = abs_diff; | 1862 max_diff = abs_diff; |
| 1817 } | 1863 } |
| 1818 } | 1864 } |
| 1819 free_aligned_buffer_64(src_argb_a); | 1865 free_aligned_buffer_64(src_argb_a); |
| 1820 free_aligned_buffer_64(dst_cumsum); | 1866 free_aligned_buffer_64(dst_cumsum); |
| 1821 free_aligned_buffer_64(dst_argb_c); | 1867 free_aligned_buffer_64(dst_argb_c); |
| 1822 free_aligned_buffer_64(dst_argb_opt); | 1868 free_aligned_buffer_64(dst_argb_opt); |
| 1823 return max_diff; | 1869 return max_diff; |
| 1824 } | 1870 } |
| 1825 | 1871 |
| 1826 static const int kBlurSize = 55; | 1872 static const int kBlurSize = 55; |
| 1827 TEST_F(LibYUVPlanarTest, ARGBBlur_Any) { | 1873 TEST_F(LibYUVPlanarTest, ARGBBlur_Any) { |
| 1828 int max_diff = TestBlur(benchmark_width_ - 1, benchmark_height_, | 1874 int max_diff = TestBlur(benchmark_width_ - 1, benchmark_height_, |
| 1829 benchmark_iterations_, disable_cpu_flags_, | 1875 benchmark_iterations_, |
| 1876 disable_cpu_flags_, benchmark_cpu_info_, |
| 1830 +1, 0, kBlurSize); | 1877 +1, 0, kBlurSize); |
| 1831 EXPECT_LE(max_diff, 1); | 1878 EXPECT_LE(max_diff, 1); |
| 1832 } | 1879 } |
| 1833 | 1880 |
| 1834 TEST_F(LibYUVPlanarTest, ARGBBlur_Unaligned) { | 1881 TEST_F(LibYUVPlanarTest, ARGBBlur_Unaligned) { |
| 1835 int max_diff = TestBlur(benchmark_width_, benchmark_height_, | 1882 int max_diff = TestBlur(benchmark_width_, benchmark_height_, |
| 1836 benchmark_iterations_, disable_cpu_flags_, | 1883 benchmark_iterations_, |
| 1884 disable_cpu_flags_, benchmark_cpu_info_, |
| 1837 +1, 1, kBlurSize); | 1885 +1, 1, kBlurSize); |
| 1838 EXPECT_LE(max_diff, 1); | 1886 EXPECT_LE(max_diff, 1); |
| 1839 } | 1887 } |
| 1840 | 1888 |
| 1841 TEST_F(LibYUVPlanarTest, ARGBBlur_Invert) { | 1889 TEST_F(LibYUVPlanarTest, ARGBBlur_Invert) { |
| 1842 int max_diff = TestBlur(benchmark_width_, benchmark_height_, | 1890 int max_diff = TestBlur(benchmark_width_, benchmark_height_, |
| 1843 benchmark_iterations_, disable_cpu_flags_, | 1891 benchmark_iterations_, |
| 1892 disable_cpu_flags_, benchmark_cpu_info_, |
| 1844 -1, 0, kBlurSize); | 1893 -1, 0, kBlurSize); |
| 1845 EXPECT_LE(max_diff, 1); | 1894 EXPECT_LE(max_diff, 1); |
| 1846 } | 1895 } |
| 1847 | 1896 |
| 1848 TEST_F(LibYUVPlanarTest, ARGBBlur_Opt) { | 1897 TEST_F(LibYUVPlanarTest, ARGBBlur_Opt) { |
| 1849 int max_diff = TestBlur(benchmark_width_, benchmark_height_, | 1898 int max_diff = TestBlur(benchmark_width_, benchmark_height_, |
| 1850 benchmark_iterations_, disable_cpu_flags_, | 1899 benchmark_iterations_, |
| 1900 disable_cpu_flags_, benchmark_cpu_info_, |
| 1851 +1, 0, kBlurSize); | 1901 +1, 0, kBlurSize); |
| 1852 EXPECT_LE(max_diff, 1); | 1902 EXPECT_LE(max_diff, 1); |
| 1853 } | 1903 } |
| 1854 | 1904 |
| 1855 static const int kBlurSmallSize = 5; | 1905 static const int kBlurSmallSize = 5; |
| 1856 TEST_F(LibYUVPlanarTest, ARGBBlurSmall_Any) { | 1906 TEST_F(LibYUVPlanarTest, ARGBBlurSmall_Any) { |
| 1857 int max_diff = TestBlur(benchmark_width_ - 1, benchmark_height_, | 1907 int max_diff = TestBlur(benchmark_width_ - 1, benchmark_height_, |
| 1858 benchmark_iterations_, disable_cpu_flags_, | 1908 benchmark_iterations_, |
| 1909 disable_cpu_flags_, benchmark_cpu_info_, |
| 1859 +1, 0, kBlurSmallSize); | 1910 +1, 0, kBlurSmallSize); |
| 1860 EXPECT_LE(max_diff, 1); | 1911 EXPECT_LE(max_diff, 1); |
| 1861 } | 1912 } |
| 1862 | 1913 |
| 1863 TEST_F(LibYUVPlanarTest, ARGBBlurSmall_Unaligned) { | 1914 TEST_F(LibYUVPlanarTest, ARGBBlurSmall_Unaligned) { |
| 1864 int max_diff = TestBlur(benchmark_width_, benchmark_height_, | 1915 int max_diff = TestBlur(benchmark_width_, benchmark_height_, |
| 1865 benchmark_iterations_, disable_cpu_flags_, | 1916 benchmark_iterations_, |
| 1917 disable_cpu_flags_, benchmark_cpu_info_, |
| 1866 +1, 1, kBlurSmallSize); | 1918 +1, 1, kBlurSmallSize); |
| 1867 EXPECT_LE(max_diff, 1); | 1919 EXPECT_LE(max_diff, 1); |
| 1868 } | 1920 } |
| 1869 | 1921 |
| 1870 TEST_F(LibYUVPlanarTest, ARGBBlurSmall_Invert) { | 1922 TEST_F(LibYUVPlanarTest, ARGBBlurSmall_Invert) { |
| 1871 int max_diff = TestBlur(benchmark_width_, benchmark_height_, | 1923 int max_diff = TestBlur(benchmark_width_, benchmark_height_, |
| 1872 benchmark_iterations_, disable_cpu_flags_, | 1924 benchmark_iterations_, |
| 1925 disable_cpu_flags_, benchmark_cpu_info_, |
| 1873 -1, 0, kBlurSmallSize); | 1926 -1, 0, kBlurSmallSize); |
| 1874 EXPECT_LE(max_diff, 1); | 1927 EXPECT_LE(max_diff, 1); |
| 1875 } | 1928 } |
| 1876 | 1929 |
| 1877 TEST_F(LibYUVPlanarTest, ARGBBlurSmall_Opt) { | 1930 TEST_F(LibYUVPlanarTest, ARGBBlurSmall_Opt) { |
| 1878 int max_diff = TestBlur(benchmark_width_, benchmark_height_, | 1931 int max_diff = TestBlur(benchmark_width_, benchmark_height_, |
| 1879 benchmark_iterations_, disable_cpu_flags_, | 1932 benchmark_iterations_, |
| 1933 disable_cpu_flags_, benchmark_cpu_info_, |
| 1880 +1, 0, kBlurSmallSize); | 1934 +1, 0, kBlurSmallSize); |
| 1881 EXPECT_LE(max_diff, 1); | 1935 EXPECT_LE(max_diff, 1); |
| 1882 } | 1936 } |
| 1883 | 1937 |
| 1884 TEST_F(LibYUVPlanarTest, TestARGBPolynomial) { | 1938 TEST_F(LibYUVPlanarTest, TestARGBPolynomial) { |
| 1885 SIMD_ALIGNED(uint8 orig_pixels[1280][4]); | 1939 SIMD_ALIGNED(uint8 orig_pixels[1280][4]); |
| 1886 SIMD_ALIGNED(uint8 dst_pixels_opt[1280][4]); | 1940 SIMD_ALIGNED(uint8 dst_pixels_opt[1280][4]); |
| 1887 SIMD_ALIGNED(uint8 dst_pixels_c[1280][4]); | 1941 SIMD_ALIGNED(uint8 dst_pixels_c[1280][4]); |
| 1888 memset(orig_pixels, 0, sizeof(orig_pixels)); | 1942 memset(orig_pixels, 0, sizeof(orig_pixels)); |
| 1889 | 1943 |
| (...skipping 56 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1946 for (int i = 0; i < 1280; ++i) { | 2000 for (int i = 0; i < 1280; ++i) { |
| 1947 orig_pixels[i][0] = i; | 2001 orig_pixels[i][0] = i; |
| 1948 orig_pixels[i][1] = i / 2; | 2002 orig_pixels[i][1] = i / 2; |
| 1949 orig_pixels[i][2] = i / 3; | 2003 orig_pixels[i][2] = i / 3; |
| 1950 orig_pixels[i][3] = i; | 2004 orig_pixels[i][3] = i; |
| 1951 } | 2005 } |
| 1952 | 2006 |
| 1953 MaskCpuFlags(disable_cpu_flags_); | 2007 MaskCpuFlags(disable_cpu_flags_); |
| 1954 ARGBPolynomial(&orig_pixels[0][0], 0, &dst_pixels_c[0][0], 0, | 2008 ARGBPolynomial(&orig_pixels[0][0], 0, &dst_pixels_c[0][0], 0, |
| 1955 &kWarmifyPolynomial[0], 1280, 1); | 2009 &kWarmifyPolynomial[0], 1280, 1); |
| 1956 MaskCpuFlags(-1); | 2010 MaskCpuFlags(benchmark_cpu_info_); |
| 1957 | 2011 |
| 1958 for (int i = 0; i < benchmark_pixels_div1280_; ++i) { | 2012 for (int i = 0; i < benchmark_pixels_div1280_; ++i) { |
| 1959 ARGBPolynomial(&orig_pixels[0][0], 0, &dst_pixels_opt[0][0], 0, | 2013 ARGBPolynomial(&orig_pixels[0][0], 0, &dst_pixels_opt[0][0], 0, |
| 1960 &kWarmifyPolynomial[0], 1280, 1); | 2014 &kWarmifyPolynomial[0], 1280, 1); |
| 1961 } | 2015 } |
| 1962 | 2016 |
| 1963 for (int i = 0; i < 1280; ++i) { | 2017 for (int i = 0; i < 1280; ++i) { |
| 1964 EXPECT_EQ(dst_pixels_c[i][0], dst_pixels_opt[i][0]); | 2018 EXPECT_EQ(dst_pixels_c[i][0], dst_pixels_opt[i][0]); |
| 1965 EXPECT_EQ(dst_pixels_c[i][1], dst_pixels_opt[i][1]); | 2019 EXPECT_EQ(dst_pixels_c[i][1], dst_pixels_opt[i][1]); |
| 1966 EXPECT_EQ(dst_pixels_c[i][2], dst_pixels_opt[i][2]); | 2020 EXPECT_EQ(dst_pixels_c[i][2], dst_pixels_opt[i][2]); |
| (...skipping 56 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 2023 for (int i = 0; i < 1280; ++i) { | 2077 for (int i = 0; i < 1280; ++i) { |
| 2024 orig_pixels[i][0] = i; | 2078 orig_pixels[i][0] = i; |
| 2025 orig_pixels[i][1] = i / 2; | 2079 orig_pixels[i][1] = i / 2; |
| 2026 orig_pixels[i][2] = i / 3; | 2080 orig_pixels[i][2] = i / 3; |
| 2027 orig_pixels[i][3] = i; | 2081 orig_pixels[i][3] = i; |
| 2028 } | 2082 } |
| 2029 | 2083 |
| 2030 MaskCpuFlags(disable_cpu_flags_); | 2084 MaskCpuFlags(disable_cpu_flags_); |
| 2031 ARGBLumaColorTable(&orig_pixels[0][0], 0, &dst_pixels_c[0][0], 0, | 2085 ARGBLumaColorTable(&orig_pixels[0][0], 0, &dst_pixels_c[0][0], 0, |
| 2032 lumacolortable, 1280, 1); | 2086 lumacolortable, 1280, 1); |
| 2033 MaskCpuFlags(-1); | 2087 MaskCpuFlags(benchmark_cpu_info_); |
| 2034 | 2088 |
| 2035 for (int i = 0; i < benchmark_pixels_div1280_; ++i) { | 2089 for (int i = 0; i < benchmark_pixels_div1280_; ++i) { |
| 2036 ARGBLumaColorTable(&orig_pixels[0][0], 0, &dst_pixels_opt[0][0], 0, | 2090 ARGBLumaColorTable(&orig_pixels[0][0], 0, &dst_pixels_opt[0][0], 0, |
| 2037 lumacolortable, 1280, 1); | 2091 lumacolortable, 1280, 1); |
| 2038 } | 2092 } |
| 2039 for (int i = 0; i < 1280; ++i) { | 2093 for (int i = 0; i < 1280; ++i) { |
| 2040 EXPECT_EQ(dst_pixels_c[i][0], dst_pixels_opt[i][0]); | 2094 EXPECT_EQ(dst_pixels_c[i][0], dst_pixels_opt[i][0]); |
| 2041 EXPECT_EQ(dst_pixels_c[i][1], dst_pixels_opt[i][1]); | 2095 EXPECT_EQ(dst_pixels_c[i][1], dst_pixels_opt[i][1]); |
| 2042 EXPECT_EQ(dst_pixels_c[i][2], dst_pixels_opt[i][2]); | 2096 EXPECT_EQ(dst_pixels_c[i][2], dst_pixels_opt[i][2]); |
| 2043 EXPECT_EQ(dst_pixels_c[i][3], dst_pixels_opt[i][3]); | 2097 EXPECT_EQ(dst_pixels_c[i][3], dst_pixels_opt[i][3]); |
| 2044 } | 2098 } |
| 2045 | 2099 |
| 2046 free_aligned_buffer_64(lumacolortable); | 2100 free_aligned_buffer_64(lumacolortable); |
| 2047 } | 2101 } |
| 2048 | 2102 |
| 2049 TEST_F(LibYUVPlanarTest, TestARGBCopyAlpha) { | 2103 TEST_F(LibYUVPlanarTest, TestARGBCopyAlpha) { |
| 2050 const int kSize = benchmark_width_ * benchmark_height_ * 4; | 2104 const int kSize = benchmark_width_ * benchmark_height_ * 4; |
| 2051 align_buffer_64(orig_pixels, kSize); | 2105 align_buffer_64(orig_pixels, kSize); |
| 2052 align_buffer_64(dst_pixels_opt, kSize); | 2106 align_buffer_64(dst_pixels_opt, kSize); |
| 2053 align_buffer_64(dst_pixels_c, kSize); | 2107 align_buffer_64(dst_pixels_c, kSize); |
| 2054 | 2108 |
| 2055 MemRandomize(orig_pixels, kSize); | 2109 MemRandomize(orig_pixels, kSize); |
| 2056 MemRandomize(dst_pixels_opt, kSize); | 2110 MemRandomize(dst_pixels_opt, kSize); |
| 2057 memcpy(dst_pixels_c, dst_pixels_opt, kSize); | 2111 memcpy(dst_pixels_c, dst_pixels_opt, kSize); |
| 2058 | 2112 |
| 2059 MaskCpuFlags(disable_cpu_flags_); | 2113 MaskCpuFlags(disable_cpu_flags_); |
| 2060 ARGBCopyAlpha(orig_pixels, benchmark_width_ * 4, | 2114 ARGBCopyAlpha(orig_pixels, benchmark_width_ * 4, |
| 2061 dst_pixels_c, benchmark_width_ * 4, | 2115 dst_pixels_c, benchmark_width_ * 4, |
| 2062 benchmark_width_, benchmark_height_); | 2116 benchmark_width_, benchmark_height_); |
| 2063 MaskCpuFlags(-1); | 2117 MaskCpuFlags(benchmark_cpu_info_); |
| 2064 | 2118 |
| 2065 for (int i = 0; i < benchmark_iterations_; ++i) { | 2119 for (int i = 0; i < benchmark_iterations_; ++i) { |
| 2066 ARGBCopyAlpha(orig_pixels, benchmark_width_ * 4, | 2120 ARGBCopyAlpha(orig_pixels, benchmark_width_ * 4, |
| 2067 dst_pixels_opt, benchmark_width_ * 4, | 2121 dst_pixels_opt, benchmark_width_ * 4, |
| 2068 benchmark_width_, benchmark_height_); | 2122 benchmark_width_, benchmark_height_); |
| 2069 } | 2123 } |
| 2070 for (int i = 0; i < kSize; ++i) { | 2124 for (int i = 0; i < kSize; ++i) { |
| 2071 EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]); | 2125 EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]); |
| 2072 } | 2126 } |
| 2073 | 2127 |
| 2074 free_aligned_buffer_64(dst_pixels_c); | 2128 free_aligned_buffer_64(dst_pixels_c); |
| 2075 free_aligned_buffer_64(dst_pixels_opt); | 2129 free_aligned_buffer_64(dst_pixels_opt); |
| 2076 free_aligned_buffer_64(orig_pixels); | 2130 free_aligned_buffer_64(orig_pixels); |
| 2077 } | 2131 } |
| 2078 | 2132 |
| 2079 TEST_F(LibYUVPlanarTest, TestARGBCopyYToAlpha) { | 2133 TEST_F(LibYUVPlanarTest, TestARGBCopyYToAlpha) { |
| 2080 const int kPixels = benchmark_width_ * benchmark_height_; | 2134 const int kPixels = benchmark_width_ * benchmark_height_; |
| 2081 align_buffer_64(orig_pixels, kPixels); | 2135 align_buffer_64(orig_pixels, kPixels); |
| 2082 align_buffer_64(dst_pixels_opt, kPixels * 4); | 2136 align_buffer_64(dst_pixels_opt, kPixels * 4); |
| 2083 align_buffer_64(dst_pixels_c, kPixels * 4); | 2137 align_buffer_64(dst_pixels_c, kPixels * 4); |
| 2084 | 2138 |
| 2085 MemRandomize(orig_pixels, kPixels); | 2139 MemRandomize(orig_pixels, kPixels); |
| 2086 MemRandomize(dst_pixels_opt, kPixels * 4); | 2140 MemRandomize(dst_pixels_opt, kPixels * 4); |
| 2087 memcpy(dst_pixels_c, dst_pixels_opt, kPixels * 4); | 2141 memcpy(dst_pixels_c, dst_pixels_opt, kPixels * 4); |
| 2088 | 2142 |
| 2089 MaskCpuFlags(disable_cpu_flags_); | 2143 MaskCpuFlags(disable_cpu_flags_); |
| 2090 ARGBCopyYToAlpha(orig_pixels, benchmark_width_, | 2144 ARGBCopyYToAlpha(orig_pixels, benchmark_width_, |
| 2091 dst_pixels_c, benchmark_width_ * 4, | 2145 dst_pixels_c, benchmark_width_ * 4, |
| 2092 benchmark_width_, benchmark_height_); | 2146 benchmark_width_, benchmark_height_); |
| 2093 MaskCpuFlags(-1); | 2147 MaskCpuFlags(benchmark_cpu_info_); |
| 2094 | 2148 |
| 2095 for (int i = 0; i < benchmark_iterations_; ++i) { | 2149 for (int i = 0; i < benchmark_iterations_; ++i) { |
| 2096 ARGBCopyYToAlpha(orig_pixels, benchmark_width_, | 2150 ARGBCopyYToAlpha(orig_pixels, benchmark_width_, |
| 2097 dst_pixels_opt, benchmark_width_ * 4, | 2151 dst_pixels_opt, benchmark_width_ * 4, |
| 2098 benchmark_width_, benchmark_height_); | 2152 benchmark_width_, benchmark_height_); |
| 2099 } | 2153 } |
| 2100 for (int i = 0; i < kPixels * 4; ++i) { | 2154 for (int i = 0; i < kPixels * 4; ++i) { |
| 2101 EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]); | 2155 EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]); |
| 2102 } | 2156 } |
| 2103 | 2157 |
| 2104 free_aligned_buffer_64(dst_pixels_c); | 2158 free_aligned_buffer_64(dst_pixels_c); |
| 2105 free_aligned_buffer_64(dst_pixels_opt); | 2159 free_aligned_buffer_64(dst_pixels_opt); |
| 2106 free_aligned_buffer_64(orig_pixels); | 2160 free_aligned_buffer_64(orig_pixels); |
| 2107 } | 2161 } |
| 2108 | 2162 |
| 2109 static int TestARGBRect(int width, int height, int benchmark_iterations, | 2163 static int TestARGBRect(int width, int height, int benchmark_iterations, |
| 2110 int disable_cpu_flags, int invert, int off, int bpp) { | 2164 int disable_cpu_flags, int benchmark_cpu_info, |
| 2165 int invert, int off, int bpp) { |
| 2111 if (width < 1) { | 2166 if (width < 1) { |
| 2112 width = 1; | 2167 width = 1; |
| 2113 } | 2168 } |
| 2114 const int kStride = width * bpp; | 2169 const int kStride = width * bpp; |
| 2115 const int kSize = kStride * height; | 2170 const int kSize = kStride * height; |
| 2116 const uint32 v32 = fastrand() & (bpp == 4 ? 0xffffffff : 0xff); | 2171 const uint32 v32 = fastrand() & (bpp == 4 ? 0xffffffff : 0xff); |
| 2117 | 2172 |
| 2118 align_buffer_64(dst_argb_c, kSize + off); | 2173 align_buffer_64(dst_argb_c, kSize + off); |
| 2119 align_buffer_64(dst_argb_opt, kSize + off); | 2174 align_buffer_64(dst_argb_opt, kSize + off); |
| 2120 | 2175 |
| 2121 MemRandomize(dst_argb_c + off, kSize); | 2176 MemRandomize(dst_argb_c + off, kSize); |
| 2122 memcpy(dst_argb_opt + off, dst_argb_c + off, kSize); | 2177 memcpy(dst_argb_opt + off, dst_argb_c + off, kSize); |
| 2123 | 2178 |
| 2124 MaskCpuFlags(disable_cpu_flags); | 2179 MaskCpuFlags(disable_cpu_flags); |
| 2125 if (bpp == 4) { | 2180 if (bpp == 4) { |
| 2126 ARGBRect(dst_argb_c + off, kStride, 0, 0, width, invert * height, v32); | 2181 ARGBRect(dst_argb_c + off, kStride, 0, 0, width, invert * height, v32); |
| 2127 } else { | 2182 } else { |
| 2128 SetPlane(dst_argb_c + off, kStride, width, invert * height, v32); | 2183 SetPlane(dst_argb_c + off, kStride, width, invert * height, v32); |
| 2129 } | 2184 } |
| 2130 | 2185 |
| 2131 MaskCpuFlags(-1); | 2186 MaskCpuFlags(benchmark_cpu_info); |
| 2132 for (int i = 0; i < benchmark_iterations; ++i) { | 2187 for (int i = 0; i < benchmark_iterations; ++i) { |
| 2133 if (bpp == 4) { | 2188 if (bpp == 4) { |
| 2134 ARGBRect(dst_argb_opt + off, kStride, 0, 0, width, invert * height, v32); | 2189 ARGBRect(dst_argb_opt + off, kStride, 0, 0, width, invert * height, v32); |
| 2135 } else { | 2190 } else { |
| 2136 SetPlane(dst_argb_opt + off, kStride, width, invert * height, v32); | 2191 SetPlane(dst_argb_opt + off, kStride, width, invert * height, v32); |
| 2137 } | 2192 } |
| 2138 } | 2193 } |
| 2139 int max_diff = 0; | 2194 int max_diff = 0; |
| 2140 for (int i = 0; i < kStride * height; ++i) { | 2195 for (int i = 0; i < kStride * height; ++i) { |
| 2141 int abs_diff = | 2196 int abs_diff = |
| 2142 abs(static_cast<int>(dst_argb_c[i + off]) - | 2197 abs(static_cast<int>(dst_argb_c[i + off]) - |
| 2143 static_cast<int>(dst_argb_opt[i + off])); | 2198 static_cast<int>(dst_argb_opt[i + off])); |
| 2144 if (abs_diff > max_diff) { | 2199 if (abs_diff > max_diff) { |
| 2145 max_diff = abs_diff; | 2200 max_diff = abs_diff; |
| 2146 } | 2201 } |
| 2147 } | 2202 } |
| 2148 free_aligned_buffer_64(dst_argb_c); | 2203 free_aligned_buffer_64(dst_argb_c); |
| 2149 free_aligned_buffer_64(dst_argb_opt); | 2204 free_aligned_buffer_64(dst_argb_opt); |
| 2150 return max_diff; | 2205 return max_diff; |
| 2151 } | 2206 } |
| 2152 | 2207 |
| 2153 TEST_F(LibYUVPlanarTest, ARGBRect_Any) { | 2208 TEST_F(LibYUVPlanarTest, ARGBRect_Any) { |
| 2154 int max_diff = TestARGBRect(benchmark_width_ - 1, benchmark_height_, | 2209 int max_diff = TestARGBRect(benchmark_width_ - 1, benchmark_height_, |
| 2155 benchmark_iterations_, disable_cpu_flags_, | 2210 benchmark_iterations_, |
| 2211 disable_cpu_flags_, benchmark_cpu_info_, |
| 2156 +1, 0, 4); | 2212 +1, 0, 4); |
| 2157 EXPECT_EQ(0, max_diff); | 2213 EXPECT_EQ(0, max_diff); |
| 2158 } | 2214 } |
| 2159 | 2215 |
| 2160 TEST_F(LibYUVPlanarTest, ARGBRect_Unaligned) { | 2216 TEST_F(LibYUVPlanarTest, ARGBRect_Unaligned) { |
| 2161 int max_diff = TestARGBRect(benchmark_width_, benchmark_height_, | 2217 int max_diff = TestARGBRect(benchmark_width_, benchmark_height_, |
| 2162 benchmark_iterations_, disable_cpu_flags_, | 2218 benchmark_iterations_, |
| 2219 disable_cpu_flags_, benchmark_cpu_info_, |
| 2163 +1, 1, 4); | 2220 +1, 1, 4); |
| 2164 EXPECT_EQ(0, max_diff); | 2221 EXPECT_EQ(0, max_diff); |
| 2165 } | 2222 } |
| 2166 | 2223 |
| 2167 TEST_F(LibYUVPlanarTest, ARGBRect_Invert) { | 2224 TEST_F(LibYUVPlanarTest, ARGBRect_Invert) { |
| 2168 int max_diff = TestARGBRect(benchmark_width_, benchmark_height_, | 2225 int max_diff = TestARGBRect(benchmark_width_, benchmark_height_, |
| 2169 benchmark_iterations_, disable_cpu_flags_, | 2226 benchmark_iterations_, |
| 2227 disable_cpu_flags_, benchmark_cpu_info_, |
| 2170 -1, 0, 4); | 2228 -1, 0, 4); |
| 2171 EXPECT_EQ(0, max_diff); | 2229 EXPECT_EQ(0, max_diff); |
| 2172 } | 2230 } |
| 2173 | 2231 |
| 2174 TEST_F(LibYUVPlanarTest, ARGBRect_Opt) { | 2232 TEST_F(LibYUVPlanarTest, ARGBRect_Opt) { |
| 2175 int max_diff = TestARGBRect(benchmark_width_, benchmark_height_, | 2233 int max_diff = TestARGBRect(benchmark_width_, benchmark_height_, |
| 2176 benchmark_iterations_, disable_cpu_flags_, | 2234 benchmark_iterations_, |
| 2235 disable_cpu_flags_, benchmark_cpu_info_, |
| 2177 +1, 0, 4); | 2236 +1, 0, 4); |
| 2178 EXPECT_EQ(0, max_diff); | 2237 EXPECT_EQ(0, max_diff); |
| 2179 } | 2238 } |
| 2180 | 2239 |
| 2181 TEST_F(LibYUVPlanarTest, SetPlane_Any) { | 2240 TEST_F(LibYUVPlanarTest, SetPlane_Any) { |
| 2182 int max_diff = TestARGBRect(benchmark_width_ - 1, benchmark_height_, | 2241 int max_diff = TestARGBRect(benchmark_width_ - 1, benchmark_height_, |
| 2183 benchmark_iterations_, disable_cpu_flags_, | 2242 benchmark_iterations_, |
| 2243 disable_cpu_flags_, benchmark_cpu_info_, |
| 2184 +1, 0, 1); | 2244 +1, 0, 1); |
| 2185 EXPECT_EQ(0, max_diff); | 2245 EXPECT_EQ(0, max_diff); |
| 2186 } | 2246 } |
| 2187 | 2247 |
| 2188 TEST_F(LibYUVPlanarTest, SetPlane_Unaligned) { | 2248 TEST_F(LibYUVPlanarTest, SetPlane_Unaligned) { |
| 2189 int max_diff = TestARGBRect(benchmark_width_, benchmark_height_, | 2249 int max_diff = TestARGBRect(benchmark_width_, benchmark_height_, |
| 2190 benchmark_iterations_, disable_cpu_flags_, | 2250 benchmark_iterations_, |
| 2251 disable_cpu_flags_, benchmark_cpu_info_, |
| 2191 +1, 1, 1); | 2252 +1, 1, 1); |
| 2192 EXPECT_EQ(0, max_diff); | 2253 EXPECT_EQ(0, max_diff); |
| 2193 } | 2254 } |
| 2194 | 2255 |
| 2195 TEST_F(LibYUVPlanarTest, SetPlane_Invert) { | 2256 TEST_F(LibYUVPlanarTest, SetPlane_Invert) { |
| 2196 int max_diff = TestARGBRect(benchmark_width_, benchmark_height_, | 2257 int max_diff = TestARGBRect(benchmark_width_, benchmark_height_, |
| 2197 benchmark_iterations_, disable_cpu_flags_, | 2258 benchmark_iterations_, |
| 2259 disable_cpu_flags_, benchmark_cpu_info_, |
| 2198 -1, 0, 1); | 2260 -1, 0, 1); |
| 2199 EXPECT_EQ(0, max_diff); | 2261 EXPECT_EQ(0, max_diff); |
| 2200 } | 2262 } |
| 2201 | 2263 |
| 2202 TEST_F(LibYUVPlanarTest, SetPlane_Opt) { | 2264 TEST_F(LibYUVPlanarTest, SetPlane_Opt) { |
| 2203 int max_diff = TestARGBRect(benchmark_width_, benchmark_height_, | 2265 int max_diff = TestARGBRect(benchmark_width_, benchmark_height_, |
| 2204 benchmark_iterations_, disable_cpu_flags_, | 2266 benchmark_iterations_, |
| 2267 disable_cpu_flags_, benchmark_cpu_info_, |
| 2205 +1, 0, 1); | 2268 +1, 0, 1); |
| 2206 EXPECT_EQ(0, max_diff); | 2269 EXPECT_EQ(0, max_diff); |
| 2207 } | 2270 } |
| 2208 | 2271 |
| 2209 } // namespace libyuv | 2272 } // namespace libyuv |
| OLD | NEW |