OLD | NEW |
1 /* | 1 /* |
2 * Copyright 2011 The LibYuv Project Authors. All rights reserved. | 2 * Copyright 2011 The LibYuv Project Authors. All rights reserved. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
9 */ | 9 */ |
10 | 10 |
(...skipping 87 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
98 EXPECT_NEAR(85, atten_pixels[255 * 4 + 2], 1); | 98 EXPECT_NEAR(85, atten_pixels[255 * 4 + 2], 1); |
99 EXPECT_EQ(255, atten_pixels[255 * 4 + 3]); | 99 EXPECT_EQ(255, atten_pixels[255 * 4 + 3]); |
100 | 100 |
101 free_aligned_buffer_64(atten2_pixels); | 101 free_aligned_buffer_64(atten2_pixels); |
102 free_aligned_buffer_64(unatten_pixels); | 102 free_aligned_buffer_64(unatten_pixels); |
103 free_aligned_buffer_64(atten_pixels); | 103 free_aligned_buffer_64(atten_pixels); |
104 free_aligned_buffer_64(orig_pixels); | 104 free_aligned_buffer_64(orig_pixels); |
105 } | 105 } |
106 | 106 |
107 static int TestAttenuateI(int width, int height, int benchmark_iterations, | 107 static int TestAttenuateI(int width, int height, int benchmark_iterations, |
108 int disable_cpu_flags, int invert, int off) { | 108 int disable_cpu_flags, int benchmark_cpu_info, |
| 109 int invert, int off) { |
109 if (width < 1) { | 110 if (width < 1) { |
110 width = 1; | 111 width = 1; |
111 } | 112 } |
112 const int kBpp = 4; | 113 const int kBpp = 4; |
113 const int kStride = width * kBpp; | 114 const int kStride = width * kBpp; |
114 align_buffer_64(src_argb, kStride * height + off); | 115 align_buffer_64(src_argb, kStride * height + off); |
115 align_buffer_64(dst_argb_c, kStride * height); | 116 align_buffer_64(dst_argb_c, kStride * height); |
116 align_buffer_64(dst_argb_opt, kStride * height); | 117 align_buffer_64(dst_argb_opt, kStride * height); |
117 for (int i = 0; i < kStride * height; ++i) { | 118 for (int i = 0; i < kStride * height; ++i) { |
118 src_argb[i + off] = (fastrand() & 0xff); | 119 src_argb[i + off] = (fastrand() & 0xff); |
119 } | 120 } |
120 memset(dst_argb_c, 0, kStride * height); | 121 memset(dst_argb_c, 0, kStride * height); |
121 memset(dst_argb_opt, 0, kStride * height); | 122 memset(dst_argb_opt, 0, kStride * height); |
122 | 123 |
123 MaskCpuFlags(disable_cpu_flags); | 124 MaskCpuFlags(disable_cpu_flags); |
124 ARGBAttenuate(src_argb + off, kStride, | 125 ARGBAttenuate(src_argb + off, kStride, |
125 dst_argb_c, kStride, | 126 dst_argb_c, kStride, |
126 width, invert * height); | 127 width, invert * height); |
127 MaskCpuFlags(-1); | 128 MaskCpuFlags(benchmark_cpu_info); |
128 for (int i = 0; i < benchmark_iterations; ++i) { | 129 for (int i = 0; i < benchmark_iterations; ++i) { |
129 ARGBAttenuate(src_argb + off, kStride, | 130 ARGBAttenuate(src_argb + off, kStride, |
130 dst_argb_opt, kStride, | 131 dst_argb_opt, kStride, |
131 width, invert * height); | 132 width, invert * height); |
132 } | 133 } |
133 int max_diff = 0; | 134 int max_diff = 0; |
134 for (int i = 0; i < kStride * height; ++i) { | 135 for (int i = 0; i < kStride * height; ++i) { |
135 int abs_diff = | 136 int abs_diff = |
136 abs(static_cast<int>(dst_argb_c[i]) - | 137 abs(static_cast<int>(dst_argb_c[i]) - |
137 static_cast<int>(dst_argb_opt[i])); | 138 static_cast<int>(dst_argb_opt[i])); |
138 if (abs_diff > max_diff) { | 139 if (abs_diff > max_diff) { |
139 max_diff = abs_diff; | 140 max_diff = abs_diff; |
140 } | 141 } |
141 } | 142 } |
142 free_aligned_buffer_64(src_argb); | 143 free_aligned_buffer_64(src_argb); |
143 free_aligned_buffer_64(dst_argb_c); | 144 free_aligned_buffer_64(dst_argb_c); |
144 free_aligned_buffer_64(dst_argb_opt); | 145 free_aligned_buffer_64(dst_argb_opt); |
145 return max_diff; | 146 return max_diff; |
146 } | 147 } |
147 | 148 |
148 TEST_F(LibYUVPlanarTest, ARGBAttenuate_Any) { | 149 TEST_F(LibYUVPlanarTest, ARGBAttenuate_Any) { |
149 int max_diff = TestAttenuateI(benchmark_width_ - 1, benchmark_height_, | 150 int max_diff = TestAttenuateI(benchmark_width_ - 1, benchmark_height_, |
150 benchmark_iterations_, disable_cpu_flags_, | 151 benchmark_iterations_, |
| 152 disable_cpu_flags_, benchmark_cpu_info_, |
151 +1, 0); | 153 +1, 0); |
152 EXPECT_LE(max_diff, 2); | 154 EXPECT_LE(max_diff, 2); |
153 } | 155 } |
154 | 156 |
155 TEST_F(LibYUVPlanarTest, ARGBAttenuate_Unaligned) { | 157 TEST_F(LibYUVPlanarTest, ARGBAttenuate_Unaligned) { |
156 int max_diff = TestAttenuateI(benchmark_width_, benchmark_height_, | 158 int max_diff = TestAttenuateI(benchmark_width_, benchmark_height_, |
157 benchmark_iterations_, disable_cpu_flags_, | 159 benchmark_iterations_, |
| 160 disable_cpu_flags_, benchmark_cpu_info_, |
158 +1, 1); | 161 +1, 1); |
159 EXPECT_LE(max_diff, 2); | 162 EXPECT_LE(max_diff, 2); |
160 } | 163 } |
161 | 164 |
162 TEST_F(LibYUVPlanarTest, ARGBAttenuate_Invert) { | 165 TEST_F(LibYUVPlanarTest, ARGBAttenuate_Invert) { |
163 int max_diff = TestAttenuateI(benchmark_width_, benchmark_height_, | 166 int max_diff = TestAttenuateI(benchmark_width_, benchmark_height_, |
164 benchmark_iterations_, disable_cpu_flags_, | 167 benchmark_iterations_, |
| 168 disable_cpu_flags_, benchmark_cpu_info_, |
165 -1, 0); | 169 -1, 0); |
166 EXPECT_LE(max_diff, 2); | 170 EXPECT_LE(max_diff, 2); |
167 } | 171 } |
168 | 172 |
169 TEST_F(LibYUVPlanarTest, ARGBAttenuate_Opt) { | 173 TEST_F(LibYUVPlanarTest, ARGBAttenuate_Opt) { |
170 int max_diff = TestAttenuateI(benchmark_width_, benchmark_height_, | 174 int max_diff = TestAttenuateI(benchmark_width_, benchmark_height_, |
171 benchmark_iterations_, disable_cpu_flags_, | 175 benchmark_iterations_, |
| 176 disable_cpu_flags_, benchmark_cpu_info_, |
172 +1, 0); | 177 +1, 0); |
173 EXPECT_LE(max_diff, 2); | 178 EXPECT_LE(max_diff, 2); |
174 } | 179 } |
175 | 180 |
176 static int TestUnattenuateI(int width, int height, int benchmark_iterations, | 181 static int TestUnattenuateI(int width, int height, int benchmark_iterations, |
177 int disable_cpu_flags, int invert, int off) { | 182 int disable_cpu_flags, int benchmark_cpu_info, |
| 183 int invert, int off) { |
178 if (width < 1) { | 184 if (width < 1) { |
179 width = 1; | 185 width = 1; |
180 } | 186 } |
181 const int kBpp = 4; | 187 const int kBpp = 4; |
182 const int kStride = width * kBpp; | 188 const int kStride = width * kBpp; |
183 align_buffer_64(src_argb, kStride * height + off); | 189 align_buffer_64(src_argb, kStride * height + off); |
184 align_buffer_64(dst_argb_c, kStride * height); | 190 align_buffer_64(dst_argb_c, kStride * height); |
185 align_buffer_64(dst_argb_opt, kStride * height); | 191 align_buffer_64(dst_argb_opt, kStride * height); |
186 for (int i = 0; i < kStride * height; ++i) { | 192 for (int i = 0; i < kStride * height; ++i) { |
187 src_argb[i + off] = (fastrand() & 0xff); | 193 src_argb[i + off] = (fastrand() & 0xff); |
188 } | 194 } |
189 ARGBAttenuate(src_argb + off, kStride, | 195 ARGBAttenuate(src_argb + off, kStride, |
190 src_argb + off, kStride, | 196 src_argb + off, kStride, |
191 width, height); | 197 width, height); |
192 memset(dst_argb_c, 0, kStride * height); | 198 memset(dst_argb_c, 0, kStride * height); |
193 memset(dst_argb_opt, 0, kStride * height); | 199 memset(dst_argb_opt, 0, kStride * height); |
194 | 200 |
195 MaskCpuFlags(disable_cpu_flags); | 201 MaskCpuFlags(disable_cpu_flags); |
196 ARGBUnattenuate(src_argb + off, kStride, | 202 ARGBUnattenuate(src_argb + off, kStride, |
197 dst_argb_c, kStride, | 203 dst_argb_c, kStride, |
198 width, invert * height); | 204 width, invert * height); |
199 MaskCpuFlags(-1); | 205 MaskCpuFlags(benchmark_cpu_info); |
200 for (int i = 0; i < benchmark_iterations; ++i) { | 206 for (int i = 0; i < benchmark_iterations; ++i) { |
201 ARGBUnattenuate(src_argb + off, kStride, | 207 ARGBUnattenuate(src_argb + off, kStride, |
202 dst_argb_opt, kStride, | 208 dst_argb_opt, kStride, |
203 width, invert * height); | 209 width, invert * height); |
204 } | 210 } |
205 int max_diff = 0; | 211 int max_diff = 0; |
206 for (int i = 0; i < kStride * height; ++i) { | 212 for (int i = 0; i < kStride * height; ++i) { |
207 int abs_diff = | 213 int abs_diff = |
208 abs(static_cast<int>(dst_argb_c[i]) - | 214 abs(static_cast<int>(dst_argb_c[i]) - |
209 static_cast<int>(dst_argb_opt[i])); | 215 static_cast<int>(dst_argb_opt[i])); |
210 if (abs_diff > max_diff) { | 216 if (abs_diff > max_diff) { |
211 max_diff = abs_diff; | 217 max_diff = abs_diff; |
212 } | 218 } |
213 } | 219 } |
214 free_aligned_buffer_64(src_argb); | 220 free_aligned_buffer_64(src_argb); |
215 free_aligned_buffer_64(dst_argb_c); | 221 free_aligned_buffer_64(dst_argb_c); |
216 free_aligned_buffer_64(dst_argb_opt); | 222 free_aligned_buffer_64(dst_argb_opt); |
217 return max_diff; | 223 return max_diff; |
218 } | 224 } |
219 | 225 |
220 TEST_F(LibYUVPlanarTest, ARGBUnattenuate_Any) { | 226 TEST_F(LibYUVPlanarTest, ARGBUnattenuate_Any) { |
221 int max_diff = TestUnattenuateI(benchmark_width_ - 1, benchmark_height_, | 227 int max_diff = TestUnattenuateI(benchmark_width_ - 1, benchmark_height_, |
222 benchmark_iterations_, disable_cpu_flags_, | 228 benchmark_iterations_, |
| 229 disable_cpu_flags_, benchmark_cpu_info_, |
223 +1, 0); | 230 +1, 0); |
224 EXPECT_LE(max_diff, 2); | 231 EXPECT_LE(max_diff, 2); |
225 } | 232 } |
226 | 233 |
227 TEST_F(LibYUVPlanarTest, ARGBUnattenuate_Unaligned) { | 234 TEST_F(LibYUVPlanarTest, ARGBUnattenuate_Unaligned) { |
228 int max_diff = TestUnattenuateI(benchmark_width_, benchmark_height_, | 235 int max_diff = TestUnattenuateI(benchmark_width_, benchmark_height_, |
229 benchmark_iterations_, disable_cpu_flags_, | 236 benchmark_iterations_, |
| 237 disable_cpu_flags_, benchmark_cpu_info_, |
230 +1, 1); | 238 +1, 1); |
231 EXPECT_LE(max_diff, 2); | 239 EXPECT_LE(max_diff, 2); |
232 } | 240 } |
233 | 241 |
234 TEST_F(LibYUVPlanarTest, ARGBUnattenuate_Invert) { | 242 TEST_F(LibYUVPlanarTest, ARGBUnattenuate_Invert) { |
235 int max_diff = TestUnattenuateI(benchmark_width_, benchmark_height_, | 243 int max_diff = TestUnattenuateI(benchmark_width_, benchmark_height_, |
236 benchmark_iterations_, disable_cpu_flags_, | 244 benchmark_iterations_, |
| 245 disable_cpu_flags_, benchmark_cpu_info_, |
237 -1, 0); | 246 -1, 0); |
238 EXPECT_LE(max_diff, 2); | 247 EXPECT_LE(max_diff, 2); |
239 } | 248 } |
240 | 249 |
241 TEST_F(LibYUVPlanarTest, ARGBUnattenuate_Opt) { | 250 TEST_F(LibYUVPlanarTest, ARGBUnattenuate_Opt) { |
242 int max_diff = TestUnattenuateI(benchmark_width_, benchmark_height_, | 251 int max_diff = TestUnattenuateI(benchmark_width_, benchmark_height_, |
243 benchmark_iterations_, disable_cpu_flags_, | 252 benchmark_iterations_, |
| 253 disable_cpu_flags_, benchmark_cpu_info_, |
244 +1, 0); | 254 +1, 0); |
245 EXPECT_LE(max_diff, 2); | 255 EXPECT_LE(max_diff, 2); |
246 } | 256 } |
247 | 257 |
248 TEST_F(LibYUVPlanarTest, TestARGBComputeCumulativeSum) { | 258 TEST_F(LibYUVPlanarTest, TestARGBComputeCumulativeSum) { |
249 SIMD_ALIGNED(uint8 orig_pixels[16][16][4]); | 259 SIMD_ALIGNED(uint8 orig_pixels[16][16][4]); |
250 SIMD_ALIGNED(int32 added_pixels[16][16][4]); | 260 SIMD_ALIGNED(int32 added_pixels[16][16][4]); |
251 | 261 |
252 for (int y = 0; y < 16; ++y) { | 262 for (int y = 0; y < 16; ++y) { |
253 for (int x = 0; x < 16; ++x) { | 263 for (int x = 0; x < 16; ++x) { |
(...skipping 289 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
543 | 553 |
544 for (int i = 0; i < 1280; ++i) { | 554 for (int i = 0; i < 1280; ++i) { |
545 orig_pixels[i][0] = i; | 555 orig_pixels[i][0] = i; |
546 orig_pixels[i][1] = i / 2; | 556 orig_pixels[i][1] = i / 2; |
547 orig_pixels[i][2] = i / 3; | 557 orig_pixels[i][2] = i / 3; |
548 orig_pixels[i][3] = i; | 558 orig_pixels[i][3] = i; |
549 } | 559 } |
550 MaskCpuFlags(disable_cpu_flags_); | 560 MaskCpuFlags(disable_cpu_flags_); |
551 ARGBColorMatrix(&orig_pixels[0][0], 0, &dst_pixels_c[0][0], 0, | 561 ARGBColorMatrix(&orig_pixels[0][0], 0, &dst_pixels_c[0][0], 0, |
552 &kRGBToSepia[0], 1280, 1); | 562 &kRGBToSepia[0], 1280, 1); |
553 MaskCpuFlags(-1); | 563 MaskCpuFlags(benchmark_cpu_info_); |
554 | 564 |
555 for (int i = 0; i < benchmark_pixels_div1280_; ++i) { | 565 for (int i = 0; i < benchmark_pixels_div1280_; ++i) { |
556 ARGBColorMatrix(&orig_pixels[0][0], 0, &dst_pixels_opt[0][0], 0, | 566 ARGBColorMatrix(&orig_pixels[0][0], 0, &dst_pixels_opt[0][0], 0, |
557 &kRGBToSepia[0], 1280, 1); | 567 &kRGBToSepia[0], 1280, 1); |
558 } | 568 } |
559 | 569 |
560 for (int i = 0; i < 1280; ++i) { | 570 for (int i = 0; i < 1280; ++i) { |
561 EXPECT_EQ(dst_pixels_c[i][0], dst_pixels_opt[i][0]); | 571 EXPECT_EQ(dst_pixels_c[i][0], dst_pixels_opt[i][0]); |
562 EXPECT_EQ(dst_pixels_c[i][1], dst_pixels_opt[i][1]); | 572 EXPECT_EQ(dst_pixels_c[i][1], dst_pixels_opt[i][1]); |
563 EXPECT_EQ(dst_pixels_c[i][2], dst_pixels_opt[i][2]); | 573 EXPECT_EQ(dst_pixels_c[i][2], dst_pixels_opt[i][2]); |
(...skipping 380 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
944 align_buffer_64(dst_argb_opt, kStrideB * kHeight); \ | 954 align_buffer_64(dst_argb_opt, kStrideB * kHeight); \ |
945 for (int i = 0; i < kStrideA * kHeight; ++i) { \ | 955 for (int i = 0; i < kStrideA * kHeight; ++i) { \ |
946 src_argb_a[i + OFF] = (fastrand() & 0xff); \ | 956 src_argb_a[i + OFF] = (fastrand() & 0xff); \ |
947 src_argb_b[i + OFF] = (fastrand() & 0xff); \ | 957 src_argb_b[i + OFF] = (fastrand() & 0xff); \ |
948 } \ | 958 } \ |
949 MaskCpuFlags(disable_cpu_flags_); \ | 959 MaskCpuFlags(disable_cpu_flags_); \ |
950 ARGBInterpolate(src_argb_a + OFF, kStrideA, \ | 960 ARGBInterpolate(src_argb_a + OFF, kStrideA, \ |
951 src_argb_b + OFF, kStrideA, \ | 961 src_argb_b + OFF, kStrideA, \ |
952 dst_argb_c, kStrideB, \ | 962 dst_argb_c, kStrideB, \ |
953 kWidth, NEG kHeight, TERP); \ | 963 kWidth, NEG kHeight, TERP); \ |
954 MaskCpuFlags(-1); \ | 964 MaskCpuFlags(benchmark_cpu_info_); \ |
955 for (int i = 0; i < benchmark_iterations_; ++i) { \ | 965 for (int i = 0; i < benchmark_iterations_; ++i) { \ |
956 ARGBInterpolate(src_argb_a + OFF, kStrideA, \ | 966 ARGBInterpolate(src_argb_a + OFF, kStrideA, \ |
957 src_argb_b + OFF, kStrideA, \ | 967 src_argb_b + OFF, kStrideA, \ |
958 dst_argb_opt, kStrideB, \ | 968 dst_argb_opt, kStrideB, \ |
959 kWidth, NEG kHeight, TERP); \ | 969 kWidth, NEG kHeight, TERP); \ |
960 } \ | 970 } \ |
961 int max_diff = 0; \ | 971 int max_diff = 0; \ |
962 for (int i = 0; i < kStrideB * kHeight; ++i) { \ | 972 for (int i = 0; i < kStrideB * kHeight; ++i) { \ |
963 int abs_diff = \ | 973 int abs_diff = \ |
964 abs(static_cast<int>(dst_argb_c[i]) - \ | 974 abs(static_cast<int>(dst_argb_c[i]) - \ |
(...skipping 21 matching lines...) Expand all Loading... |
986 TESTTERP(ARGB, 4, 1, ARGB, 4, 1, \ | 996 TESTTERP(ARGB, 4, 1, ARGB, 4, 1, \ |
987 benchmark_width_ - 1, TERP, 1, _Any_Invert, -, 0) | 997 benchmark_width_ - 1, TERP, 1, _Any_Invert, -, 0) |
988 | 998 |
989 TESTINTERPOLATE(0) | 999 TESTINTERPOLATE(0) |
990 TESTINTERPOLATE(64) | 1000 TESTINTERPOLATE(64) |
991 TESTINTERPOLATE(128) | 1001 TESTINTERPOLATE(128) |
992 TESTINTERPOLATE(192) | 1002 TESTINTERPOLATE(192) |
993 TESTINTERPOLATE(255) | 1003 TESTINTERPOLATE(255) |
994 | 1004 |
995 static int TestBlend(int width, int height, int benchmark_iterations, | 1005 static int TestBlend(int width, int height, int benchmark_iterations, |
996 int disable_cpu_flags, int invert, int off) { | 1006 int disable_cpu_flags, int benchmark_cpu_info, |
| 1007 int invert, int off) { |
997 if (width < 1) { | 1008 if (width < 1) { |
998 width = 1; | 1009 width = 1; |
999 } | 1010 } |
1000 const int kBpp = 4; | 1011 const int kBpp = 4; |
1001 const int kStride = width * kBpp; | 1012 const int kStride = width * kBpp; |
1002 align_buffer_64(src_argb_a, kStride * height + off); | 1013 align_buffer_64(src_argb_a, kStride * height + off); |
1003 align_buffer_64(src_argb_b, kStride * height + off); | 1014 align_buffer_64(src_argb_b, kStride * height + off); |
1004 align_buffer_64(dst_argb_c, kStride * height); | 1015 align_buffer_64(dst_argb_c, kStride * height); |
1005 align_buffer_64(dst_argb_opt, kStride * height); | 1016 align_buffer_64(dst_argb_opt, kStride * height); |
1006 for (int i = 0; i < kStride * height; ++i) { | 1017 for (int i = 0; i < kStride * height; ++i) { |
1007 src_argb_a[i + off] = (fastrand() & 0xff); | 1018 src_argb_a[i + off] = (fastrand() & 0xff); |
1008 src_argb_b[i + off] = (fastrand() & 0xff); | 1019 src_argb_b[i + off] = (fastrand() & 0xff); |
1009 } | 1020 } |
1010 ARGBAttenuate(src_argb_a + off, kStride, src_argb_a + off, kStride, width, | 1021 ARGBAttenuate(src_argb_a + off, kStride, src_argb_a + off, kStride, width, |
1011 height); | 1022 height); |
1012 ARGBAttenuate(src_argb_b + off, kStride, src_argb_b + off, kStride, width, | 1023 ARGBAttenuate(src_argb_b + off, kStride, src_argb_b + off, kStride, width, |
1013 height); | 1024 height); |
1014 memset(dst_argb_c, 255, kStride * height); | 1025 memset(dst_argb_c, 255, kStride * height); |
1015 memset(dst_argb_opt, 255, kStride * height); | 1026 memset(dst_argb_opt, 255, kStride * height); |
1016 | 1027 |
1017 MaskCpuFlags(disable_cpu_flags); | 1028 MaskCpuFlags(disable_cpu_flags); |
1018 ARGBBlend(src_argb_a + off, kStride, | 1029 ARGBBlend(src_argb_a + off, kStride, |
1019 src_argb_b + off, kStride, | 1030 src_argb_b + off, kStride, |
1020 dst_argb_c, kStride, | 1031 dst_argb_c, kStride, |
1021 width, invert * height); | 1032 width, invert * height); |
1022 MaskCpuFlags(-1); | 1033 MaskCpuFlags(benchmark_cpu_info); |
1023 for (int i = 0; i < benchmark_iterations; ++i) { | 1034 for (int i = 0; i < benchmark_iterations; ++i) { |
1024 ARGBBlend(src_argb_a + off, kStride, | 1035 ARGBBlend(src_argb_a + off, kStride, |
1025 src_argb_b + off, kStride, | 1036 src_argb_b + off, kStride, |
1026 dst_argb_opt, kStride, | 1037 dst_argb_opt, kStride, |
1027 width, invert * height); | 1038 width, invert * height); |
1028 } | 1039 } |
1029 int max_diff = 0; | 1040 int max_diff = 0; |
1030 for (int i = 0; i < kStride * height; ++i) { | 1041 for (int i = 0; i < kStride * height; ++i) { |
1031 int abs_diff = | 1042 int abs_diff = |
1032 abs(static_cast<int>(dst_argb_c[i]) - | 1043 abs(static_cast<int>(dst_argb_c[i]) - |
1033 static_cast<int>(dst_argb_opt[i])); | 1044 static_cast<int>(dst_argb_opt[i])); |
1034 if (abs_diff > max_diff) { | 1045 if (abs_diff > max_diff) { |
1035 max_diff = abs_diff; | 1046 max_diff = abs_diff; |
1036 } | 1047 } |
1037 } | 1048 } |
1038 free_aligned_buffer_64(src_argb_a); | 1049 free_aligned_buffer_64(src_argb_a); |
1039 free_aligned_buffer_64(src_argb_b); | 1050 free_aligned_buffer_64(src_argb_b); |
1040 free_aligned_buffer_64(dst_argb_c); | 1051 free_aligned_buffer_64(dst_argb_c); |
1041 free_aligned_buffer_64(dst_argb_opt); | 1052 free_aligned_buffer_64(dst_argb_opt); |
1042 return max_diff; | 1053 return max_diff; |
1043 } | 1054 } |
1044 | 1055 |
1045 TEST_F(LibYUVPlanarTest, ARGBBlend_Any) { | 1056 TEST_F(LibYUVPlanarTest, ARGBBlend_Any) { |
1046 int max_diff = TestBlend(benchmark_width_ - 4, benchmark_height_, | 1057 int max_diff = TestBlend(benchmark_width_ - 4, benchmark_height_, |
1047 benchmark_iterations_, disable_cpu_flags_, +1, 0); | 1058 benchmark_iterations_, |
| 1059 disable_cpu_flags_, benchmark_cpu_info_, +1, 0); |
1048 EXPECT_LE(max_diff, 1); | 1060 EXPECT_LE(max_diff, 1); |
1049 } | 1061 } |
1050 | 1062 |
1051 TEST_F(LibYUVPlanarTest, ARGBBlend_Unaligned) { | 1063 TEST_F(LibYUVPlanarTest, ARGBBlend_Unaligned) { |
1052 int max_diff = TestBlend(benchmark_width_, benchmark_height_, | 1064 int max_diff = TestBlend(benchmark_width_, benchmark_height_, |
1053 benchmark_iterations_, disable_cpu_flags_, +1, 1); | 1065 benchmark_iterations_, |
| 1066 disable_cpu_flags_, benchmark_cpu_info_, +1, 1); |
1054 EXPECT_LE(max_diff, 1); | 1067 EXPECT_LE(max_diff, 1); |
1055 } | 1068 } |
1056 | 1069 |
1057 TEST_F(LibYUVPlanarTest, ARGBBlend_Invert) { | 1070 TEST_F(LibYUVPlanarTest, ARGBBlend_Invert) { |
1058 int max_diff = TestBlend(benchmark_width_, benchmark_height_, | 1071 int max_diff = TestBlend(benchmark_width_, benchmark_height_, |
1059 benchmark_iterations_, disable_cpu_flags_, -1, 0); | 1072 benchmark_iterations_, |
| 1073 disable_cpu_flags_, benchmark_cpu_info_, -1, 0); |
1060 EXPECT_LE(max_diff, 1); | 1074 EXPECT_LE(max_diff, 1); |
1061 } | 1075 } |
1062 | 1076 |
1063 TEST_F(LibYUVPlanarTest, ARGBBlend_Opt) { | 1077 TEST_F(LibYUVPlanarTest, ARGBBlend_Opt) { |
1064 int max_diff = TestBlend(benchmark_width_, benchmark_height_, | 1078 int max_diff = TestBlend(benchmark_width_, benchmark_height_, |
1065 benchmark_iterations_, disable_cpu_flags_, +1, 0); | 1079 benchmark_iterations_, |
| 1080 disable_cpu_flags_, benchmark_cpu_info_, +1, 0); |
1066 EXPECT_LE(max_diff, 1); | 1081 EXPECT_LE(max_diff, 1); |
1067 } | 1082 } |
1068 | 1083 |
1069 TEST_F(LibYUVPlanarTest, TestAffine) { | 1084 TEST_F(LibYUVPlanarTest, TestAffine) { |
1070 SIMD_ALIGNED(uint8 orig_pixels_0[1280][4]); | 1085 SIMD_ALIGNED(uint8 orig_pixels_0[1280][4]); |
1071 SIMD_ALIGNED(uint8 interpolate_pixels_C[1280][4]); | 1086 SIMD_ALIGNED(uint8 interpolate_pixels_C[1280][4]); |
1072 | 1087 |
1073 for (int i = 0; i < 1280; ++i) { | 1088 for (int i = 0; i < 1280; ++i) { |
1074 for (int j = 0; j < 4; ++j) { | 1089 for (int j = 0; j < 4; ++j) { |
1075 orig_pixels_0[i][j] = i; | 1090 orig_pixels_0[i][j] = i; |
(...skipping 261 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1337 | 1352 |
1338 // Disable all optimizations. | 1353 // Disable all optimizations. |
1339 MaskCpuFlags(disable_cpu_flags_); | 1354 MaskCpuFlags(disable_cpu_flags_); |
1340 double c_time = get_time(); | 1355 double c_time = get_time(); |
1341 for (j = 0; j < benchmark_iterations_; j++) { | 1356 for (j = 0; j < benchmark_iterations_; j++) { |
1342 CopyPlane(orig_y + y_off, y_st, dst_c + y_off, stride, yw, yh); | 1357 CopyPlane(orig_y + y_off, y_st, dst_c + y_off, stride, yw, yh); |
1343 } | 1358 } |
1344 c_time = (get_time() - c_time) / benchmark_iterations_; | 1359 c_time = (get_time() - c_time) / benchmark_iterations_; |
1345 | 1360 |
1346 // Enable optimizations. | 1361 // Enable optimizations. |
1347 MaskCpuFlags(-1); | 1362 MaskCpuFlags(benchmark_cpu_info_); |
1348 double opt_time = get_time(); | 1363 double opt_time = get_time(); |
1349 for (j = 0; j < benchmark_iterations_; j++) { | 1364 for (j = 0; j < benchmark_iterations_; j++) { |
1350 CopyPlane(orig_y + y_off, y_st, dst_opt + y_off, stride, yw, yh); | 1365 CopyPlane(orig_y + y_off, y_st, dst_opt + y_off, stride, yw, yh); |
1351 } | 1366 } |
1352 opt_time = (get_time() - opt_time) / benchmark_iterations_; | 1367 opt_time = (get_time() - opt_time) / benchmark_iterations_; |
1353 | 1368 |
1354 for (i = 0; i < y_plane_size; ++i) { | 1369 for (i = 0; i < y_plane_size; ++i) { |
1355 if (dst_c[i] != dst_opt[i]) | 1370 if (dst_c[i] != dst_opt[i]) |
1356 ++err; | 1371 ++err; |
1357 } | 1372 } |
1358 | 1373 |
1359 free_aligned_buffer_64(orig_y); | 1374 free_aligned_buffer_64(orig_y); |
1360 free_aligned_buffer_64(dst_c); | 1375 free_aligned_buffer_64(dst_c); |
1361 free_aligned_buffer_64(dst_opt); | 1376 free_aligned_buffer_64(dst_opt); |
1362 | 1377 |
1363 EXPECT_EQ(0, err); | 1378 EXPECT_EQ(0, err); |
1364 } | 1379 } |
1365 | 1380 |
1366 static int TestMultiply(int width, int height, int benchmark_iterations, | 1381 static int TestMultiply(int width, int height, int benchmark_iterations, |
1367 int disable_cpu_flags, int invert, int off) { | 1382 int disable_cpu_flags, int benchmark_cpu_info, |
| 1383 int invert, int off) { |
1368 if (width < 1) { | 1384 if (width < 1) { |
1369 width = 1; | 1385 width = 1; |
1370 } | 1386 } |
1371 const int kBpp = 4; | 1387 const int kBpp = 4; |
1372 const int kStride = width * kBpp; | 1388 const int kStride = width * kBpp; |
1373 align_buffer_64(src_argb_a, kStride * height + off); | 1389 align_buffer_64(src_argb_a, kStride * height + off); |
1374 align_buffer_64(src_argb_b, kStride * height + off); | 1390 align_buffer_64(src_argb_b, kStride * height + off); |
1375 align_buffer_64(dst_argb_c, kStride * height); | 1391 align_buffer_64(dst_argb_c, kStride * height); |
1376 align_buffer_64(dst_argb_opt, kStride * height); | 1392 align_buffer_64(dst_argb_opt, kStride * height); |
1377 for (int i = 0; i < kStride * height; ++i) { | 1393 for (int i = 0; i < kStride * height; ++i) { |
1378 src_argb_a[i + off] = (fastrand() & 0xff); | 1394 src_argb_a[i + off] = (fastrand() & 0xff); |
1379 src_argb_b[i + off] = (fastrand() & 0xff); | 1395 src_argb_b[i + off] = (fastrand() & 0xff); |
1380 } | 1396 } |
1381 memset(dst_argb_c, 0, kStride * height); | 1397 memset(dst_argb_c, 0, kStride * height); |
1382 memset(dst_argb_opt, 0, kStride * height); | 1398 memset(dst_argb_opt, 0, kStride * height); |
1383 | 1399 |
1384 MaskCpuFlags(disable_cpu_flags); | 1400 MaskCpuFlags(disable_cpu_flags); |
1385 ARGBMultiply(src_argb_a + off, kStride, | 1401 ARGBMultiply(src_argb_a + off, kStride, |
1386 src_argb_b + off, kStride, | 1402 src_argb_b + off, kStride, |
1387 dst_argb_c, kStride, | 1403 dst_argb_c, kStride, |
1388 width, invert * height); | 1404 width, invert * height); |
1389 MaskCpuFlags(-1); | 1405 MaskCpuFlags(benchmark_cpu_info); |
1390 for (int i = 0; i < benchmark_iterations; ++i) { | 1406 for (int i = 0; i < benchmark_iterations; ++i) { |
1391 ARGBMultiply(src_argb_a + off, kStride, | 1407 ARGBMultiply(src_argb_a + off, kStride, |
1392 src_argb_b + off, kStride, | 1408 src_argb_b + off, kStride, |
1393 dst_argb_opt, kStride, | 1409 dst_argb_opt, kStride, |
1394 width, invert * height); | 1410 width, invert * height); |
1395 } | 1411 } |
1396 int max_diff = 0; | 1412 int max_diff = 0; |
1397 for (int i = 0; i < kStride * height; ++i) { | 1413 for (int i = 0; i < kStride * height; ++i) { |
1398 int abs_diff = | 1414 int abs_diff = |
1399 abs(static_cast<int>(dst_argb_c[i]) - | 1415 abs(static_cast<int>(dst_argb_c[i]) - |
1400 static_cast<int>(dst_argb_opt[i])); | 1416 static_cast<int>(dst_argb_opt[i])); |
1401 if (abs_diff > max_diff) { | 1417 if (abs_diff > max_diff) { |
1402 max_diff = abs_diff; | 1418 max_diff = abs_diff; |
1403 } | 1419 } |
1404 } | 1420 } |
1405 free_aligned_buffer_64(src_argb_a); | 1421 free_aligned_buffer_64(src_argb_a); |
1406 free_aligned_buffer_64(src_argb_b); | 1422 free_aligned_buffer_64(src_argb_b); |
1407 free_aligned_buffer_64(dst_argb_c); | 1423 free_aligned_buffer_64(dst_argb_c); |
1408 free_aligned_buffer_64(dst_argb_opt); | 1424 free_aligned_buffer_64(dst_argb_opt); |
1409 return max_diff; | 1425 return max_diff; |
1410 } | 1426 } |
1411 | 1427 |
1412 TEST_F(LibYUVPlanarTest, ARGBMultiply_Any) { | 1428 TEST_F(LibYUVPlanarTest, ARGBMultiply_Any) { |
1413 int max_diff = TestMultiply(benchmark_width_ - 1, benchmark_height_, | 1429 int max_diff = TestMultiply(benchmark_width_ - 1, benchmark_height_, |
1414 benchmark_iterations_, disable_cpu_flags_, +1, 0); | 1430 benchmark_iterations_, |
| 1431 disable_cpu_flags_, benchmark_cpu_info_, +1, 0); |
1415 EXPECT_LE(max_diff, 1); | 1432 EXPECT_LE(max_diff, 1); |
1416 } | 1433 } |
1417 | 1434 |
1418 TEST_F(LibYUVPlanarTest, ARGBMultiply_Unaligned) { | 1435 TEST_F(LibYUVPlanarTest, ARGBMultiply_Unaligned) { |
1419 int max_diff = TestMultiply(benchmark_width_, benchmark_height_, | 1436 int max_diff = TestMultiply(benchmark_width_, benchmark_height_, |
1420 benchmark_iterations_, disable_cpu_flags_, +1, 1); | 1437 benchmark_iterations_, |
| 1438 disable_cpu_flags_, benchmark_cpu_info_, +1, 1); |
1421 EXPECT_LE(max_diff, 1); | 1439 EXPECT_LE(max_diff, 1); |
1422 } | 1440 } |
1423 | 1441 |
1424 TEST_F(LibYUVPlanarTest, ARGBMultiply_Invert) { | 1442 TEST_F(LibYUVPlanarTest, ARGBMultiply_Invert) { |
1425 int max_diff = TestMultiply(benchmark_width_, benchmark_height_, | 1443 int max_diff = TestMultiply(benchmark_width_, benchmark_height_, |
1426 benchmark_iterations_, disable_cpu_flags_, -1, 0); | 1444 benchmark_iterations_, |
| 1445 disable_cpu_flags_, benchmark_cpu_info_, -1, 0); |
1427 EXPECT_LE(max_diff, 1); | 1446 EXPECT_LE(max_diff, 1); |
1428 } | 1447 } |
1429 | 1448 |
1430 TEST_F(LibYUVPlanarTest, ARGBMultiply_Opt) { | 1449 TEST_F(LibYUVPlanarTest, ARGBMultiply_Opt) { |
1431 int max_diff = TestMultiply(benchmark_width_, benchmark_height_, | 1450 int max_diff = TestMultiply(benchmark_width_, benchmark_height_, |
1432 benchmark_iterations_, disable_cpu_flags_, +1, 0); | 1451 benchmark_iterations_, |
| 1452 disable_cpu_flags_, benchmark_cpu_info_, +1, 0); |
1433 EXPECT_LE(max_diff, 1); | 1453 EXPECT_LE(max_diff, 1); |
1434 } | 1454 } |
1435 | 1455 |
1436 static int TestAdd(int width, int height, int benchmark_iterations, | 1456 static int TestAdd(int width, int height, int benchmark_iterations, |
1437 int disable_cpu_flags, int invert, int off) { | 1457 int disable_cpu_flags, int benchmark_cpu_info, |
| 1458 int invert, int off) { |
1438 if (width < 1) { | 1459 if (width < 1) { |
1439 width = 1; | 1460 width = 1; |
1440 } | 1461 } |
1441 const int kBpp = 4; | 1462 const int kBpp = 4; |
1442 const int kStride = width * kBpp; | 1463 const int kStride = width * kBpp; |
1443 align_buffer_64(src_argb_a, kStride * height + off); | 1464 align_buffer_64(src_argb_a, kStride * height + off); |
1444 align_buffer_64(src_argb_b, kStride * height + off); | 1465 align_buffer_64(src_argb_b, kStride * height + off); |
1445 align_buffer_64(dst_argb_c, kStride * height); | 1466 align_buffer_64(dst_argb_c, kStride * height); |
1446 align_buffer_64(dst_argb_opt, kStride * height); | 1467 align_buffer_64(dst_argb_opt, kStride * height); |
1447 for (int i = 0; i < kStride * height; ++i) { | 1468 for (int i = 0; i < kStride * height; ++i) { |
1448 src_argb_a[i + off] = (fastrand() & 0xff); | 1469 src_argb_a[i + off] = (fastrand() & 0xff); |
1449 src_argb_b[i + off] = (fastrand() & 0xff); | 1470 src_argb_b[i + off] = (fastrand() & 0xff); |
1450 } | 1471 } |
1451 memset(dst_argb_c, 0, kStride * height); | 1472 memset(dst_argb_c, 0, kStride * height); |
1452 memset(dst_argb_opt, 0, kStride * height); | 1473 memset(dst_argb_opt, 0, kStride * height); |
1453 | 1474 |
1454 MaskCpuFlags(disable_cpu_flags); | 1475 MaskCpuFlags(disable_cpu_flags); |
1455 ARGBAdd(src_argb_a + off, kStride, | 1476 ARGBAdd(src_argb_a + off, kStride, |
1456 src_argb_b + off, kStride, | 1477 src_argb_b + off, kStride, |
1457 dst_argb_c, kStride, | 1478 dst_argb_c, kStride, |
1458 width, invert * height); | 1479 width, invert * height); |
1459 MaskCpuFlags(-1); | 1480 MaskCpuFlags(benchmark_cpu_info); |
1460 for (int i = 0; i < benchmark_iterations; ++i) { | 1481 for (int i = 0; i < benchmark_iterations; ++i) { |
1461 ARGBAdd(src_argb_a + off, kStride, | 1482 ARGBAdd(src_argb_a + off, kStride, |
1462 src_argb_b + off, kStride, | 1483 src_argb_b + off, kStride, |
1463 dst_argb_opt, kStride, | 1484 dst_argb_opt, kStride, |
1464 width, invert * height); | 1485 width, invert * height); |
1465 } | 1486 } |
1466 int max_diff = 0; | 1487 int max_diff = 0; |
1467 for (int i = 0; i < kStride * height; ++i) { | 1488 for (int i = 0; i < kStride * height; ++i) { |
1468 int abs_diff = | 1489 int abs_diff = |
1469 abs(static_cast<int>(dst_argb_c[i]) - | 1490 abs(static_cast<int>(dst_argb_c[i]) - |
1470 static_cast<int>(dst_argb_opt[i])); | 1491 static_cast<int>(dst_argb_opt[i])); |
1471 if (abs_diff > max_diff) { | 1492 if (abs_diff > max_diff) { |
1472 max_diff = abs_diff; | 1493 max_diff = abs_diff; |
1473 } | 1494 } |
1474 } | 1495 } |
1475 free_aligned_buffer_64(src_argb_a); | 1496 free_aligned_buffer_64(src_argb_a); |
1476 free_aligned_buffer_64(src_argb_b); | 1497 free_aligned_buffer_64(src_argb_b); |
1477 free_aligned_buffer_64(dst_argb_c); | 1498 free_aligned_buffer_64(dst_argb_c); |
1478 free_aligned_buffer_64(dst_argb_opt); | 1499 free_aligned_buffer_64(dst_argb_opt); |
1479 return max_diff; | 1500 return max_diff; |
1480 } | 1501 } |
1481 | 1502 |
1482 TEST_F(LibYUVPlanarTest, ARGBAdd_Any) { | 1503 TEST_F(LibYUVPlanarTest, ARGBAdd_Any) { |
1483 int max_diff = TestAdd(benchmark_width_ - 1, benchmark_height_, | 1504 int max_diff = TestAdd(benchmark_width_ - 1, benchmark_height_, |
1484 benchmark_iterations_, disable_cpu_flags_, +1, 0); | 1505 benchmark_iterations_, |
| 1506 disable_cpu_flags_, benchmark_cpu_info_, +1, 0); |
1485 EXPECT_LE(max_diff, 1); | 1507 EXPECT_LE(max_diff, 1); |
1486 } | 1508 } |
1487 | 1509 |
1488 TEST_F(LibYUVPlanarTest, ARGBAdd_Unaligned) { | 1510 TEST_F(LibYUVPlanarTest, ARGBAdd_Unaligned) { |
1489 int max_diff = TestAdd(benchmark_width_, benchmark_height_, | 1511 int max_diff = TestAdd(benchmark_width_, benchmark_height_, |
1490 benchmark_iterations_, disable_cpu_flags_, +1, 1); | 1512 benchmark_iterations_, |
| 1513 disable_cpu_flags_, benchmark_cpu_info_, +1, 1); |
1491 EXPECT_LE(max_diff, 1); | 1514 EXPECT_LE(max_diff, 1); |
1492 } | 1515 } |
1493 | 1516 |
1494 TEST_F(LibYUVPlanarTest, ARGBAdd_Invert) { | 1517 TEST_F(LibYUVPlanarTest, ARGBAdd_Invert) { |
1495 int max_diff = TestAdd(benchmark_width_, benchmark_height_, | 1518 int max_diff = TestAdd(benchmark_width_, benchmark_height_, |
1496 benchmark_iterations_, disable_cpu_flags_, -1, 0); | 1519 benchmark_iterations_, |
| 1520 disable_cpu_flags_, benchmark_cpu_info_, -1, 0); |
1497 EXPECT_LE(max_diff, 1); | 1521 EXPECT_LE(max_diff, 1); |
1498 } | 1522 } |
1499 | 1523 |
1500 TEST_F(LibYUVPlanarTest, ARGBAdd_Opt) { | 1524 TEST_F(LibYUVPlanarTest, ARGBAdd_Opt) { |
1501 int max_diff = TestAdd(benchmark_width_, benchmark_height_, | 1525 int max_diff = TestAdd(benchmark_width_, benchmark_height_, |
1502 benchmark_iterations_, disable_cpu_flags_, +1, 0); | 1526 benchmark_iterations_, |
| 1527 disable_cpu_flags_, benchmark_cpu_info_, +1, 0); |
1503 EXPECT_LE(max_diff, 1); | 1528 EXPECT_LE(max_diff, 1); |
1504 } | 1529 } |
1505 | 1530 |
1506 static int TestSubtract(int width, int height, int benchmark_iterations, | 1531 static int TestSubtract(int width, int height, int benchmark_iterations, |
1507 int disable_cpu_flags, int invert, int off) { | 1532 int disable_cpu_flags, int benchmark_cpu_info, |
| 1533 int invert, int off) { |
1508 if (width < 1) { | 1534 if (width < 1) { |
1509 width = 1; | 1535 width = 1; |
1510 } | 1536 } |
1511 const int kBpp = 4; | 1537 const int kBpp = 4; |
1512 const int kStride = width * kBpp; | 1538 const int kStride = width * kBpp; |
1513 align_buffer_64(src_argb_a, kStride * height + off); | 1539 align_buffer_64(src_argb_a, kStride * height + off); |
1514 align_buffer_64(src_argb_b, kStride * height + off); | 1540 align_buffer_64(src_argb_b, kStride * height + off); |
1515 align_buffer_64(dst_argb_c, kStride * height); | 1541 align_buffer_64(dst_argb_c, kStride * height); |
1516 align_buffer_64(dst_argb_opt, kStride * height); | 1542 align_buffer_64(dst_argb_opt, kStride * height); |
1517 for (int i = 0; i < kStride * height; ++i) { | 1543 for (int i = 0; i < kStride * height; ++i) { |
1518 src_argb_a[i + off] = (fastrand() & 0xff); | 1544 src_argb_a[i + off] = (fastrand() & 0xff); |
1519 src_argb_b[i + off] = (fastrand() & 0xff); | 1545 src_argb_b[i + off] = (fastrand() & 0xff); |
1520 } | 1546 } |
1521 memset(dst_argb_c, 0, kStride * height); | 1547 memset(dst_argb_c, 0, kStride * height); |
1522 memset(dst_argb_opt, 0, kStride * height); | 1548 memset(dst_argb_opt, 0, kStride * height); |
1523 | 1549 |
1524 MaskCpuFlags(disable_cpu_flags); | 1550 MaskCpuFlags(disable_cpu_flags); |
1525 ARGBSubtract(src_argb_a + off, kStride, | 1551 ARGBSubtract(src_argb_a + off, kStride, |
1526 src_argb_b + off, kStride, | 1552 src_argb_b + off, kStride, |
1527 dst_argb_c, kStride, | 1553 dst_argb_c, kStride, |
1528 width, invert * height); | 1554 width, invert * height); |
1529 MaskCpuFlags(-1); | 1555 MaskCpuFlags(benchmark_cpu_info); |
1530 for (int i = 0; i < benchmark_iterations; ++i) { | 1556 for (int i = 0; i < benchmark_iterations; ++i) { |
1531 ARGBSubtract(src_argb_a + off, kStride, | 1557 ARGBSubtract(src_argb_a + off, kStride, |
1532 src_argb_b + off, kStride, | 1558 src_argb_b + off, kStride, |
1533 dst_argb_opt, kStride, | 1559 dst_argb_opt, kStride, |
1534 width, invert * height); | 1560 width, invert * height); |
1535 } | 1561 } |
1536 int max_diff = 0; | 1562 int max_diff = 0; |
1537 for (int i = 0; i < kStride * height; ++i) { | 1563 for (int i = 0; i < kStride * height; ++i) { |
1538 int abs_diff = | 1564 int abs_diff = |
1539 abs(static_cast<int>(dst_argb_c[i]) - | 1565 abs(static_cast<int>(dst_argb_c[i]) - |
1540 static_cast<int>(dst_argb_opt[i])); | 1566 static_cast<int>(dst_argb_opt[i])); |
1541 if (abs_diff > max_diff) { | 1567 if (abs_diff > max_diff) { |
1542 max_diff = abs_diff; | 1568 max_diff = abs_diff; |
1543 } | 1569 } |
1544 } | 1570 } |
1545 free_aligned_buffer_64(src_argb_a); | 1571 free_aligned_buffer_64(src_argb_a); |
1546 free_aligned_buffer_64(src_argb_b); | 1572 free_aligned_buffer_64(src_argb_b); |
1547 free_aligned_buffer_64(dst_argb_c); | 1573 free_aligned_buffer_64(dst_argb_c); |
1548 free_aligned_buffer_64(dst_argb_opt); | 1574 free_aligned_buffer_64(dst_argb_opt); |
1549 return max_diff; | 1575 return max_diff; |
1550 } | 1576 } |
1551 | 1577 |
1552 TEST_F(LibYUVPlanarTest, ARGBSubtract_Any) { | 1578 TEST_F(LibYUVPlanarTest, ARGBSubtract_Any) { |
1553 int max_diff = TestSubtract(benchmark_width_ - 1, benchmark_height_, | 1579 int max_diff = TestSubtract(benchmark_width_ - 1, benchmark_height_, |
1554 benchmark_iterations_, disable_cpu_flags_, +1, 0); | 1580 benchmark_iterations_, |
| 1581 disable_cpu_flags_, benchmark_cpu_info_, +1, 0); |
1555 EXPECT_LE(max_diff, 1); | 1582 EXPECT_LE(max_diff, 1); |
1556 } | 1583 } |
1557 | 1584 |
1558 TEST_F(LibYUVPlanarTest, ARGBSubtract_Unaligned) { | 1585 TEST_F(LibYUVPlanarTest, ARGBSubtract_Unaligned) { |
1559 int max_diff = TestSubtract(benchmark_width_, benchmark_height_, | 1586 int max_diff = TestSubtract(benchmark_width_, benchmark_height_, |
1560 benchmark_iterations_, disable_cpu_flags_, +1, 1); | 1587 benchmark_iterations_, |
| 1588 disable_cpu_flags_, benchmark_cpu_info_, +1, 1); |
1561 EXPECT_LE(max_diff, 1); | 1589 EXPECT_LE(max_diff, 1); |
1562 } | 1590 } |
1563 | 1591 |
1564 TEST_F(LibYUVPlanarTest, ARGBSubtract_Invert) { | 1592 TEST_F(LibYUVPlanarTest, ARGBSubtract_Invert) { |
1565 int max_diff = TestSubtract(benchmark_width_, benchmark_height_, | 1593 int max_diff = TestSubtract(benchmark_width_, benchmark_height_, |
1566 benchmark_iterations_, disable_cpu_flags_, -1, 0); | 1594 benchmark_iterations_, |
| 1595 disable_cpu_flags_, benchmark_cpu_info_, -1, 0); |
1567 EXPECT_LE(max_diff, 1); | 1596 EXPECT_LE(max_diff, 1); |
1568 } | 1597 } |
1569 | 1598 |
1570 TEST_F(LibYUVPlanarTest, ARGBSubtract_Opt) { | 1599 TEST_F(LibYUVPlanarTest, ARGBSubtract_Opt) { |
1571 int max_diff = TestSubtract(benchmark_width_, benchmark_height_, | 1600 int max_diff = TestSubtract(benchmark_width_, benchmark_height_, |
1572 benchmark_iterations_, disable_cpu_flags_, +1, 0); | 1601 benchmark_iterations_, |
| 1602 disable_cpu_flags_, benchmark_cpu_info_, +1, 0); |
1573 EXPECT_LE(max_diff, 1); | 1603 EXPECT_LE(max_diff, 1); |
1574 } | 1604 } |
1575 | 1605 |
1576 static int TestSobel(int width, int height, int benchmark_iterations, | 1606 static int TestSobel(int width, int height, int benchmark_iterations, |
1577 int disable_cpu_flags, int invert, int off) { | 1607 int disable_cpu_flags, int benchmark_cpu_info, |
| 1608 int invert, int off) { |
1578 if (width < 1) { | 1609 if (width < 1) { |
1579 width = 1; | 1610 width = 1; |
1580 } | 1611 } |
1581 const int kBpp = 4; | 1612 const int kBpp = 4; |
1582 const int kStride = width * kBpp; | 1613 const int kStride = width * kBpp; |
1583 align_buffer_64(src_argb_a, kStride * height + off); | 1614 align_buffer_64(src_argb_a, kStride * height + off); |
1584 align_buffer_64(dst_argb_c, kStride * height); | 1615 align_buffer_64(dst_argb_c, kStride * height); |
1585 align_buffer_64(dst_argb_opt, kStride * height); | 1616 align_buffer_64(dst_argb_opt, kStride * height); |
1586 memset(src_argb_a, 0, kStride * height + off); | 1617 memset(src_argb_a, 0, kStride * height + off); |
1587 for (int i = 0; i < kStride * height; ++i) { | 1618 for (int i = 0; i < kStride * height; ++i) { |
1588 src_argb_a[i + off] = (fastrand() & 0xff); | 1619 src_argb_a[i + off] = (fastrand() & 0xff); |
1589 } | 1620 } |
1590 memset(dst_argb_c, 0, kStride * height); | 1621 memset(dst_argb_c, 0, kStride * height); |
1591 memset(dst_argb_opt, 0, kStride * height); | 1622 memset(dst_argb_opt, 0, kStride * height); |
1592 | 1623 |
1593 MaskCpuFlags(disable_cpu_flags); | 1624 MaskCpuFlags(disable_cpu_flags); |
1594 ARGBSobel(src_argb_a + off, kStride, | 1625 ARGBSobel(src_argb_a + off, kStride, |
1595 dst_argb_c, kStride, | 1626 dst_argb_c, kStride, |
1596 width, invert * height); | 1627 width, invert * height); |
1597 MaskCpuFlags(-1); | 1628 MaskCpuFlags(benchmark_cpu_info); |
1598 for (int i = 0; i < benchmark_iterations; ++i) { | 1629 for (int i = 0; i < benchmark_iterations; ++i) { |
1599 ARGBSobel(src_argb_a + off, kStride, | 1630 ARGBSobel(src_argb_a + off, kStride, |
1600 dst_argb_opt, kStride, | 1631 dst_argb_opt, kStride, |
1601 width, invert * height); | 1632 width, invert * height); |
1602 } | 1633 } |
1603 int max_diff = 0; | 1634 int max_diff = 0; |
1604 for (int i = 0; i < kStride * height; ++i) { | 1635 for (int i = 0; i < kStride * height; ++i) { |
1605 int abs_diff = | 1636 int abs_diff = |
1606 abs(static_cast<int>(dst_argb_c[i]) - | 1637 abs(static_cast<int>(dst_argb_c[i]) - |
1607 static_cast<int>(dst_argb_opt[i])); | 1638 static_cast<int>(dst_argb_opt[i])); |
1608 if (abs_diff > max_diff) { | 1639 if (abs_diff > max_diff) { |
1609 max_diff = abs_diff; | 1640 max_diff = abs_diff; |
1610 } | 1641 } |
1611 } | 1642 } |
1612 free_aligned_buffer_64(src_argb_a); | 1643 free_aligned_buffer_64(src_argb_a); |
1613 free_aligned_buffer_64(dst_argb_c); | 1644 free_aligned_buffer_64(dst_argb_c); |
1614 free_aligned_buffer_64(dst_argb_opt); | 1645 free_aligned_buffer_64(dst_argb_opt); |
1615 return max_diff; | 1646 return max_diff; |
1616 } | 1647 } |
1617 | 1648 |
1618 TEST_F(LibYUVPlanarTest, ARGBSobel_Any) { | 1649 TEST_F(LibYUVPlanarTest, ARGBSobel_Any) { |
1619 int max_diff = TestSobel(benchmark_width_ - 1, benchmark_height_, | 1650 int max_diff = TestSobel(benchmark_width_ - 1, benchmark_height_, |
1620 benchmark_iterations_, disable_cpu_flags_, +1, 0); | 1651 benchmark_iterations_, |
| 1652 disable_cpu_flags_, benchmark_cpu_info_, +1, 0); |
1621 EXPECT_EQ(0, max_diff); | 1653 EXPECT_EQ(0, max_diff); |
1622 } | 1654 } |
1623 | 1655 |
1624 TEST_F(LibYUVPlanarTest, ARGBSobel_Unaligned) { | 1656 TEST_F(LibYUVPlanarTest, ARGBSobel_Unaligned) { |
1625 int max_diff = TestSobel(benchmark_width_, benchmark_height_, | 1657 int max_diff = TestSobel(benchmark_width_, benchmark_height_, |
1626 benchmark_iterations_, disable_cpu_flags_, +1, 1); | 1658 benchmark_iterations_, |
| 1659 disable_cpu_flags_, benchmark_cpu_info_, +1, 1); |
1627 EXPECT_EQ(0, max_diff); | 1660 EXPECT_EQ(0, max_diff); |
1628 } | 1661 } |
1629 | 1662 |
1630 TEST_F(LibYUVPlanarTest, ARGBSobel_Invert) { | 1663 TEST_F(LibYUVPlanarTest, ARGBSobel_Invert) { |
1631 int max_diff = TestSobel(benchmark_width_, benchmark_height_, | 1664 int max_diff = TestSobel(benchmark_width_, benchmark_height_, |
1632 benchmark_iterations_, disable_cpu_flags_, -1, 0); | 1665 benchmark_iterations_, |
| 1666 disable_cpu_flags_, benchmark_cpu_info_, -1, 0); |
1633 EXPECT_EQ(0, max_diff); | 1667 EXPECT_EQ(0, max_diff); |
1634 } | 1668 } |
1635 | 1669 |
1636 TEST_F(LibYUVPlanarTest, ARGBSobel_Opt) { | 1670 TEST_F(LibYUVPlanarTest, ARGBSobel_Opt) { |
1637 int max_diff = TestSobel(benchmark_width_, benchmark_height_, | 1671 int max_diff = TestSobel(benchmark_width_, benchmark_height_, |
1638 benchmark_iterations_, disable_cpu_flags_, +1, 0); | 1672 benchmark_iterations_, |
| 1673 disable_cpu_flags_, benchmark_cpu_info_, +1, 0); |
1639 EXPECT_EQ(0, max_diff); | 1674 EXPECT_EQ(0, max_diff); |
1640 } | 1675 } |
1641 | 1676 |
1642 static int TestSobelToPlane(int width, int height, int benchmark_iterations, | 1677 static int TestSobelToPlane(int width, int height, int benchmark_iterations, |
1643 int disable_cpu_flags, int invert, int off) { | 1678 int disable_cpu_flags, int benchmark_cpu_info, |
| 1679 int invert, int off) { |
1644 if (width < 1) { | 1680 if (width < 1) { |
1645 width = 1; | 1681 width = 1; |
1646 } | 1682 } |
1647 const int kSrcBpp = 4; | 1683 const int kSrcBpp = 4; |
1648 const int kDstBpp = 1; | 1684 const int kDstBpp = 1; |
1649 const int kSrcStride = (width * kSrcBpp + 15) & ~15; | 1685 const int kSrcStride = (width * kSrcBpp + 15) & ~15; |
1650 const int kDstStride = (width * kDstBpp + 15) & ~15; | 1686 const int kDstStride = (width * kDstBpp + 15) & ~15; |
1651 align_buffer_64(src_argb_a, kSrcStride * height + off); | 1687 align_buffer_64(src_argb_a, kSrcStride * height + off); |
1652 align_buffer_64(dst_argb_c, kDstStride * height); | 1688 align_buffer_64(dst_argb_c, kDstStride * height); |
1653 align_buffer_64(dst_argb_opt, kDstStride * height); | 1689 align_buffer_64(dst_argb_opt, kDstStride * height); |
1654 memset(src_argb_a, 0, kSrcStride * height + off); | 1690 memset(src_argb_a, 0, kSrcStride * height + off); |
1655 for (int i = 0; i < kSrcStride * height; ++i) { | 1691 for (int i = 0; i < kSrcStride * height; ++i) { |
1656 src_argb_a[i + off] = (fastrand() & 0xff); | 1692 src_argb_a[i + off] = (fastrand() & 0xff); |
1657 } | 1693 } |
1658 memset(dst_argb_c, 0, kDstStride * height); | 1694 memset(dst_argb_c, 0, kDstStride * height); |
1659 memset(dst_argb_opt, 0, kDstStride * height); | 1695 memset(dst_argb_opt, 0, kDstStride * height); |
1660 | 1696 |
1661 MaskCpuFlags(disable_cpu_flags); | 1697 MaskCpuFlags(disable_cpu_flags); |
1662 ARGBSobelToPlane(src_argb_a + off, kSrcStride, | 1698 ARGBSobelToPlane(src_argb_a + off, kSrcStride, |
1663 dst_argb_c, kDstStride, | 1699 dst_argb_c, kDstStride, |
1664 width, invert * height); | 1700 width, invert * height); |
1665 MaskCpuFlags(-1); | 1701 MaskCpuFlags(benchmark_cpu_info); |
1666 for (int i = 0; i < benchmark_iterations; ++i) { | 1702 for (int i = 0; i < benchmark_iterations; ++i) { |
1667 ARGBSobelToPlane(src_argb_a + off, kSrcStride, | 1703 ARGBSobelToPlane(src_argb_a + off, kSrcStride, |
1668 dst_argb_opt, kDstStride, | 1704 dst_argb_opt, kDstStride, |
1669 width, invert * height); | 1705 width, invert * height); |
1670 } | 1706 } |
1671 int max_diff = 0; | 1707 int max_diff = 0; |
1672 for (int i = 0; i < kDstStride * height; ++i) { | 1708 for (int i = 0; i < kDstStride * height; ++i) { |
1673 int abs_diff = | 1709 int abs_diff = |
1674 abs(static_cast<int>(dst_argb_c[i]) - | 1710 abs(static_cast<int>(dst_argb_c[i]) - |
1675 static_cast<int>(dst_argb_opt[i])); | 1711 static_cast<int>(dst_argb_opt[i])); |
1676 if (abs_diff > max_diff) { | 1712 if (abs_diff > max_diff) { |
1677 max_diff = abs_diff; | 1713 max_diff = abs_diff; |
1678 } | 1714 } |
1679 } | 1715 } |
1680 free_aligned_buffer_64(src_argb_a); | 1716 free_aligned_buffer_64(src_argb_a); |
1681 free_aligned_buffer_64(dst_argb_c); | 1717 free_aligned_buffer_64(dst_argb_c); |
1682 free_aligned_buffer_64(dst_argb_opt); | 1718 free_aligned_buffer_64(dst_argb_opt); |
1683 return max_diff; | 1719 return max_diff; |
1684 } | 1720 } |
1685 | 1721 |
1686 TEST_F(LibYUVPlanarTest, ARGBSobelToPlane_Any) { | 1722 TEST_F(LibYUVPlanarTest, ARGBSobelToPlane_Any) { |
1687 int max_diff = TestSobelToPlane(benchmark_width_ - 1, benchmark_height_, | 1723 int max_diff = TestSobelToPlane(benchmark_width_ - 1, benchmark_height_, |
1688 benchmark_iterations_, disable_cpu_flags_, | 1724 benchmark_iterations_, |
| 1725 disable_cpu_flags_, benchmark_cpu_info_, |
1689 +1, 0); | 1726 +1, 0); |
1690 EXPECT_EQ(0, max_diff); | 1727 EXPECT_EQ(0, max_diff); |
1691 } | 1728 } |
1692 | 1729 |
1693 TEST_F(LibYUVPlanarTest, ARGBSobelToPlane_Unaligned) { | 1730 TEST_F(LibYUVPlanarTest, ARGBSobelToPlane_Unaligned) { |
1694 int max_diff = TestSobelToPlane(benchmark_width_, benchmark_height_, | 1731 int max_diff = TestSobelToPlane(benchmark_width_, benchmark_height_, |
1695 benchmark_iterations_, disable_cpu_flags_, | 1732 benchmark_iterations_, |
| 1733 disable_cpu_flags_, benchmark_cpu_info_, |
1696 +1, 1); | 1734 +1, 1); |
1697 EXPECT_EQ(0, max_diff); | 1735 EXPECT_EQ(0, max_diff); |
1698 } | 1736 } |
1699 | 1737 |
1700 TEST_F(LibYUVPlanarTest, ARGBSobelToPlane_Invert) { | 1738 TEST_F(LibYUVPlanarTest, ARGBSobelToPlane_Invert) { |
1701 int max_diff = TestSobelToPlane(benchmark_width_, benchmark_height_, | 1739 int max_diff = TestSobelToPlane(benchmark_width_, benchmark_height_, |
1702 benchmark_iterations_, disable_cpu_flags_, | 1740 benchmark_iterations_, |
| 1741 disable_cpu_flags_, benchmark_cpu_info_, |
1703 -1, 0); | 1742 -1, 0); |
1704 EXPECT_EQ(0, max_diff); | 1743 EXPECT_EQ(0, max_diff); |
1705 } | 1744 } |
1706 | 1745 |
1707 TEST_F(LibYUVPlanarTest, ARGBSobelToPlane_Opt) { | 1746 TEST_F(LibYUVPlanarTest, ARGBSobelToPlane_Opt) { |
1708 int max_diff = TestSobelToPlane(benchmark_width_, benchmark_height_, | 1747 int max_diff = TestSobelToPlane(benchmark_width_, benchmark_height_, |
1709 benchmark_iterations_, disable_cpu_flags_, | 1748 benchmark_iterations_, |
| 1749 disable_cpu_flags_, benchmark_cpu_info_, |
1710 +1, 0); | 1750 +1, 0); |
1711 EXPECT_EQ(0, max_diff); | 1751 EXPECT_EQ(0, max_diff); |
1712 } | 1752 } |
1713 | 1753 |
1714 static int TestSobelXY(int width, int height, int benchmark_iterations, | 1754 static int TestSobelXY(int width, int height, int benchmark_iterations, |
1715 int disable_cpu_flags, int invert, int off) { | 1755 int disable_cpu_flags, int benchmark_cpu_info, |
| 1756 int invert, int off) { |
1716 if (width < 1) { | 1757 if (width < 1) { |
1717 width = 1; | 1758 width = 1; |
1718 } | 1759 } |
1719 const int kBpp = 4; | 1760 const int kBpp = 4; |
1720 const int kStride = width * kBpp; | 1761 const int kStride = width * kBpp; |
1721 align_buffer_64(src_argb_a, kStride * height + off); | 1762 align_buffer_64(src_argb_a, kStride * height + off); |
1722 align_buffer_64(dst_argb_c, kStride * height); | 1763 align_buffer_64(dst_argb_c, kStride * height); |
1723 align_buffer_64(dst_argb_opt, kStride * height); | 1764 align_buffer_64(dst_argb_opt, kStride * height); |
1724 memset(src_argb_a, 0, kStride * height + off); | 1765 memset(src_argb_a, 0, kStride * height + off); |
1725 for (int i = 0; i < kStride * height; ++i) { | 1766 for (int i = 0; i < kStride * height; ++i) { |
1726 src_argb_a[i + off] = (fastrand() & 0xff); | 1767 src_argb_a[i + off] = (fastrand() & 0xff); |
1727 } | 1768 } |
1728 memset(dst_argb_c, 0, kStride * height); | 1769 memset(dst_argb_c, 0, kStride * height); |
1729 memset(dst_argb_opt, 0, kStride * height); | 1770 memset(dst_argb_opt, 0, kStride * height); |
1730 | 1771 |
1731 MaskCpuFlags(disable_cpu_flags); | 1772 MaskCpuFlags(disable_cpu_flags); |
1732 ARGBSobelXY(src_argb_a + off, kStride, | 1773 ARGBSobelXY(src_argb_a + off, kStride, |
1733 dst_argb_c, kStride, | 1774 dst_argb_c, kStride, |
1734 width, invert * height); | 1775 width, invert * height); |
1735 MaskCpuFlags(-1); | 1776 MaskCpuFlags(benchmark_cpu_info); |
1736 for (int i = 0; i < benchmark_iterations; ++i) { | 1777 for (int i = 0; i < benchmark_iterations; ++i) { |
1737 ARGBSobelXY(src_argb_a + off, kStride, | 1778 ARGBSobelXY(src_argb_a + off, kStride, |
1738 dst_argb_opt, kStride, | 1779 dst_argb_opt, kStride, |
1739 width, invert * height); | 1780 width, invert * height); |
1740 } | 1781 } |
1741 int max_diff = 0; | 1782 int max_diff = 0; |
1742 for (int i = 0; i < kStride * height; ++i) { | 1783 for (int i = 0; i < kStride * height; ++i) { |
1743 int abs_diff = | 1784 int abs_diff = |
1744 abs(static_cast<int>(dst_argb_c[i]) - | 1785 abs(static_cast<int>(dst_argb_c[i]) - |
1745 static_cast<int>(dst_argb_opt[i])); | 1786 static_cast<int>(dst_argb_opt[i])); |
1746 if (abs_diff > max_diff) { | 1787 if (abs_diff > max_diff) { |
1747 max_diff = abs_diff; | 1788 max_diff = abs_diff; |
1748 } | 1789 } |
1749 } | 1790 } |
1750 free_aligned_buffer_64(src_argb_a); | 1791 free_aligned_buffer_64(src_argb_a); |
1751 free_aligned_buffer_64(dst_argb_c); | 1792 free_aligned_buffer_64(dst_argb_c); |
1752 free_aligned_buffer_64(dst_argb_opt); | 1793 free_aligned_buffer_64(dst_argb_opt); |
1753 return max_diff; | 1794 return max_diff; |
1754 } | 1795 } |
1755 | 1796 |
1756 TEST_F(LibYUVPlanarTest, ARGBSobelXY_Any) { | 1797 TEST_F(LibYUVPlanarTest, ARGBSobelXY_Any) { |
1757 int max_diff = TestSobelXY(benchmark_width_ - 1, benchmark_height_, | 1798 int max_diff = TestSobelXY(benchmark_width_ - 1, benchmark_height_, |
1758 benchmark_iterations_, disable_cpu_flags_, +1, 0); | 1799 benchmark_iterations_, |
| 1800 disable_cpu_flags_, benchmark_cpu_info_, +1, 0); |
1759 EXPECT_EQ(0, max_diff); | 1801 EXPECT_EQ(0, max_diff); |
1760 } | 1802 } |
1761 | 1803 |
1762 TEST_F(LibYUVPlanarTest, ARGBSobelXY_Unaligned) { | 1804 TEST_F(LibYUVPlanarTest, ARGBSobelXY_Unaligned) { |
1763 int max_diff = TestSobelXY(benchmark_width_, benchmark_height_, | 1805 int max_diff = TestSobelXY(benchmark_width_, benchmark_height_, |
1764 benchmark_iterations_, disable_cpu_flags_, +1, 1); | 1806 benchmark_iterations_, |
| 1807 disable_cpu_flags_, benchmark_cpu_info_, +1, 1); |
1765 EXPECT_EQ(0, max_diff); | 1808 EXPECT_EQ(0, max_diff); |
1766 } | 1809 } |
1767 | 1810 |
1768 TEST_F(LibYUVPlanarTest, ARGBSobelXY_Invert) { | 1811 TEST_F(LibYUVPlanarTest, ARGBSobelXY_Invert) { |
1769 int max_diff = TestSobelXY(benchmark_width_, benchmark_height_, | 1812 int max_diff = TestSobelXY(benchmark_width_, benchmark_height_, |
1770 benchmark_iterations_, disable_cpu_flags_, -1, 0); | 1813 benchmark_iterations_, |
| 1814 disable_cpu_flags_, benchmark_cpu_info_, -1, 0); |
1771 EXPECT_EQ(0, max_diff); | 1815 EXPECT_EQ(0, max_diff); |
1772 } | 1816 } |
1773 | 1817 |
1774 TEST_F(LibYUVPlanarTest, ARGBSobelXY_Opt) { | 1818 TEST_F(LibYUVPlanarTest, ARGBSobelXY_Opt) { |
1775 int max_diff = TestSobelXY(benchmark_width_, benchmark_height_, | 1819 int max_diff = TestSobelXY(benchmark_width_, benchmark_height_, |
1776 benchmark_iterations_, disable_cpu_flags_, +1, 0); | 1820 benchmark_iterations_, |
| 1821 disable_cpu_flags_, benchmark_cpu_info_, +1, 0); |
1777 EXPECT_EQ(0, max_diff); | 1822 EXPECT_EQ(0, max_diff); |
1778 } | 1823 } |
1779 | 1824 |
1780 static int TestBlur(int width, int height, int benchmark_iterations, | 1825 static int TestBlur(int width, int height, int benchmark_iterations, |
1781 int disable_cpu_flags, int invert, int off, int radius) { | 1826 int disable_cpu_flags, int benchmark_cpu_info, |
| 1827 int invert, int off, int radius) { |
1782 if (width < 1) { | 1828 if (width < 1) { |
1783 width = 1; | 1829 width = 1; |
1784 } | 1830 } |
1785 const int kBpp = 4; | 1831 const int kBpp = 4; |
1786 const int kStride = width * kBpp; | 1832 const int kStride = width * kBpp; |
1787 align_buffer_64(src_argb_a, kStride * height + off); | 1833 align_buffer_64(src_argb_a, kStride * height + off); |
1788 align_buffer_64(dst_cumsum, width * height * 16); | 1834 align_buffer_64(dst_cumsum, width * height * 16); |
1789 align_buffer_64(dst_argb_c, kStride * height); | 1835 align_buffer_64(dst_argb_c, kStride * height); |
1790 align_buffer_64(dst_argb_opt, kStride * height); | 1836 align_buffer_64(dst_argb_opt, kStride * height); |
1791 for (int i = 0; i < kStride * height; ++i) { | 1837 for (int i = 0; i < kStride * height; ++i) { |
1792 src_argb_a[i + off] = (fastrand() & 0xff); | 1838 src_argb_a[i + off] = (fastrand() & 0xff); |
1793 } | 1839 } |
1794 memset(dst_cumsum, 0, width * height * 16); | 1840 memset(dst_cumsum, 0, width * height * 16); |
1795 memset(dst_argb_c, 0, kStride * height); | 1841 memset(dst_argb_c, 0, kStride * height); |
1796 memset(dst_argb_opt, 0, kStride * height); | 1842 memset(dst_argb_opt, 0, kStride * height); |
1797 | 1843 |
1798 MaskCpuFlags(disable_cpu_flags); | 1844 MaskCpuFlags(disable_cpu_flags); |
1799 ARGBBlur(src_argb_a + off, kStride, | 1845 ARGBBlur(src_argb_a + off, kStride, |
1800 dst_argb_c, kStride, | 1846 dst_argb_c, kStride, |
1801 reinterpret_cast<int32*>(dst_cumsum), width * 4, | 1847 reinterpret_cast<int32*>(dst_cumsum), width * 4, |
1802 width, invert * height, radius); | 1848 width, invert * height, radius); |
1803 MaskCpuFlags(-1); | 1849 MaskCpuFlags(benchmark_cpu_info); |
1804 for (int i = 0; i < benchmark_iterations; ++i) { | 1850 for (int i = 0; i < benchmark_iterations; ++i) { |
1805 ARGBBlur(src_argb_a + off, kStride, | 1851 ARGBBlur(src_argb_a + off, kStride, |
1806 dst_argb_opt, kStride, | 1852 dst_argb_opt, kStride, |
1807 reinterpret_cast<int32*>(dst_cumsum), width * 4, | 1853 reinterpret_cast<int32*>(dst_cumsum), width * 4, |
1808 width, invert * height, radius); | 1854 width, invert * height, radius); |
1809 } | 1855 } |
1810 int max_diff = 0; | 1856 int max_diff = 0; |
1811 for (int i = 0; i < kStride * height; ++i) { | 1857 for (int i = 0; i < kStride * height; ++i) { |
1812 int abs_diff = | 1858 int abs_diff = |
1813 abs(static_cast<int>(dst_argb_c[i]) - | 1859 abs(static_cast<int>(dst_argb_c[i]) - |
1814 static_cast<int>(dst_argb_opt[i])); | 1860 static_cast<int>(dst_argb_opt[i])); |
1815 if (abs_diff > max_diff) { | 1861 if (abs_diff > max_diff) { |
1816 max_diff = abs_diff; | 1862 max_diff = abs_diff; |
1817 } | 1863 } |
1818 } | 1864 } |
1819 free_aligned_buffer_64(src_argb_a); | 1865 free_aligned_buffer_64(src_argb_a); |
1820 free_aligned_buffer_64(dst_cumsum); | 1866 free_aligned_buffer_64(dst_cumsum); |
1821 free_aligned_buffer_64(dst_argb_c); | 1867 free_aligned_buffer_64(dst_argb_c); |
1822 free_aligned_buffer_64(dst_argb_opt); | 1868 free_aligned_buffer_64(dst_argb_opt); |
1823 return max_diff; | 1869 return max_diff; |
1824 } | 1870 } |
1825 | 1871 |
1826 static const int kBlurSize = 55; | 1872 static const int kBlurSize = 55; |
1827 TEST_F(LibYUVPlanarTest, ARGBBlur_Any) { | 1873 TEST_F(LibYUVPlanarTest, ARGBBlur_Any) { |
1828 int max_diff = TestBlur(benchmark_width_ - 1, benchmark_height_, | 1874 int max_diff = TestBlur(benchmark_width_ - 1, benchmark_height_, |
1829 benchmark_iterations_, disable_cpu_flags_, | 1875 benchmark_iterations_, |
| 1876 disable_cpu_flags_, benchmark_cpu_info_, |
1830 +1, 0, kBlurSize); | 1877 +1, 0, kBlurSize); |
1831 EXPECT_LE(max_diff, 1); | 1878 EXPECT_LE(max_diff, 1); |
1832 } | 1879 } |
1833 | 1880 |
1834 TEST_F(LibYUVPlanarTest, ARGBBlur_Unaligned) { | 1881 TEST_F(LibYUVPlanarTest, ARGBBlur_Unaligned) { |
1835 int max_diff = TestBlur(benchmark_width_, benchmark_height_, | 1882 int max_diff = TestBlur(benchmark_width_, benchmark_height_, |
1836 benchmark_iterations_, disable_cpu_flags_, | 1883 benchmark_iterations_, |
| 1884 disable_cpu_flags_, benchmark_cpu_info_, |
1837 +1, 1, kBlurSize); | 1885 +1, 1, kBlurSize); |
1838 EXPECT_LE(max_diff, 1); | 1886 EXPECT_LE(max_diff, 1); |
1839 } | 1887 } |
1840 | 1888 |
1841 TEST_F(LibYUVPlanarTest, ARGBBlur_Invert) { | 1889 TEST_F(LibYUVPlanarTest, ARGBBlur_Invert) { |
1842 int max_diff = TestBlur(benchmark_width_, benchmark_height_, | 1890 int max_diff = TestBlur(benchmark_width_, benchmark_height_, |
1843 benchmark_iterations_, disable_cpu_flags_, | 1891 benchmark_iterations_, |
| 1892 disable_cpu_flags_, benchmark_cpu_info_, |
1844 -1, 0, kBlurSize); | 1893 -1, 0, kBlurSize); |
1845 EXPECT_LE(max_diff, 1); | 1894 EXPECT_LE(max_diff, 1); |
1846 } | 1895 } |
1847 | 1896 |
1848 TEST_F(LibYUVPlanarTest, ARGBBlur_Opt) { | 1897 TEST_F(LibYUVPlanarTest, ARGBBlur_Opt) { |
1849 int max_diff = TestBlur(benchmark_width_, benchmark_height_, | 1898 int max_diff = TestBlur(benchmark_width_, benchmark_height_, |
1850 benchmark_iterations_, disable_cpu_flags_, | 1899 benchmark_iterations_, |
| 1900 disable_cpu_flags_, benchmark_cpu_info_, |
1851 +1, 0, kBlurSize); | 1901 +1, 0, kBlurSize); |
1852 EXPECT_LE(max_diff, 1); | 1902 EXPECT_LE(max_diff, 1); |
1853 } | 1903 } |
1854 | 1904 |
1855 static const int kBlurSmallSize = 5; | 1905 static const int kBlurSmallSize = 5; |
1856 TEST_F(LibYUVPlanarTest, ARGBBlurSmall_Any) { | 1906 TEST_F(LibYUVPlanarTest, ARGBBlurSmall_Any) { |
1857 int max_diff = TestBlur(benchmark_width_ - 1, benchmark_height_, | 1907 int max_diff = TestBlur(benchmark_width_ - 1, benchmark_height_, |
1858 benchmark_iterations_, disable_cpu_flags_, | 1908 benchmark_iterations_, |
| 1909 disable_cpu_flags_, benchmark_cpu_info_, |
1859 +1, 0, kBlurSmallSize); | 1910 +1, 0, kBlurSmallSize); |
1860 EXPECT_LE(max_diff, 1); | 1911 EXPECT_LE(max_diff, 1); |
1861 } | 1912 } |
1862 | 1913 |
1863 TEST_F(LibYUVPlanarTest, ARGBBlurSmall_Unaligned) { | 1914 TEST_F(LibYUVPlanarTest, ARGBBlurSmall_Unaligned) { |
1864 int max_diff = TestBlur(benchmark_width_, benchmark_height_, | 1915 int max_diff = TestBlur(benchmark_width_, benchmark_height_, |
1865 benchmark_iterations_, disable_cpu_flags_, | 1916 benchmark_iterations_, |
| 1917 disable_cpu_flags_, benchmark_cpu_info_, |
1866 +1, 1, kBlurSmallSize); | 1918 +1, 1, kBlurSmallSize); |
1867 EXPECT_LE(max_diff, 1); | 1919 EXPECT_LE(max_diff, 1); |
1868 } | 1920 } |
1869 | 1921 |
1870 TEST_F(LibYUVPlanarTest, ARGBBlurSmall_Invert) { | 1922 TEST_F(LibYUVPlanarTest, ARGBBlurSmall_Invert) { |
1871 int max_diff = TestBlur(benchmark_width_, benchmark_height_, | 1923 int max_diff = TestBlur(benchmark_width_, benchmark_height_, |
1872 benchmark_iterations_, disable_cpu_flags_, | 1924 benchmark_iterations_, |
| 1925 disable_cpu_flags_, benchmark_cpu_info_, |
1873 -1, 0, kBlurSmallSize); | 1926 -1, 0, kBlurSmallSize); |
1874 EXPECT_LE(max_diff, 1); | 1927 EXPECT_LE(max_diff, 1); |
1875 } | 1928 } |
1876 | 1929 |
1877 TEST_F(LibYUVPlanarTest, ARGBBlurSmall_Opt) { | 1930 TEST_F(LibYUVPlanarTest, ARGBBlurSmall_Opt) { |
1878 int max_diff = TestBlur(benchmark_width_, benchmark_height_, | 1931 int max_diff = TestBlur(benchmark_width_, benchmark_height_, |
1879 benchmark_iterations_, disable_cpu_flags_, | 1932 benchmark_iterations_, |
| 1933 disable_cpu_flags_, benchmark_cpu_info_, |
1880 +1, 0, kBlurSmallSize); | 1934 +1, 0, kBlurSmallSize); |
1881 EXPECT_LE(max_diff, 1); | 1935 EXPECT_LE(max_diff, 1); |
1882 } | 1936 } |
1883 | 1937 |
1884 TEST_F(LibYUVPlanarTest, TestARGBPolynomial) { | 1938 TEST_F(LibYUVPlanarTest, TestARGBPolynomial) { |
1885 SIMD_ALIGNED(uint8 orig_pixels[1280][4]); | 1939 SIMD_ALIGNED(uint8 orig_pixels[1280][4]); |
1886 SIMD_ALIGNED(uint8 dst_pixels_opt[1280][4]); | 1940 SIMD_ALIGNED(uint8 dst_pixels_opt[1280][4]); |
1887 SIMD_ALIGNED(uint8 dst_pixels_c[1280][4]); | 1941 SIMD_ALIGNED(uint8 dst_pixels_c[1280][4]); |
1888 memset(orig_pixels, 0, sizeof(orig_pixels)); | 1942 memset(orig_pixels, 0, sizeof(orig_pixels)); |
1889 | 1943 |
(...skipping 56 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1946 for (int i = 0; i < 1280; ++i) { | 2000 for (int i = 0; i < 1280; ++i) { |
1947 orig_pixels[i][0] = i; | 2001 orig_pixels[i][0] = i; |
1948 orig_pixels[i][1] = i / 2; | 2002 orig_pixels[i][1] = i / 2; |
1949 orig_pixels[i][2] = i / 3; | 2003 orig_pixels[i][2] = i / 3; |
1950 orig_pixels[i][3] = i; | 2004 orig_pixels[i][3] = i; |
1951 } | 2005 } |
1952 | 2006 |
1953 MaskCpuFlags(disable_cpu_flags_); | 2007 MaskCpuFlags(disable_cpu_flags_); |
1954 ARGBPolynomial(&orig_pixels[0][0], 0, &dst_pixels_c[0][0], 0, | 2008 ARGBPolynomial(&orig_pixels[0][0], 0, &dst_pixels_c[0][0], 0, |
1955 &kWarmifyPolynomial[0], 1280, 1); | 2009 &kWarmifyPolynomial[0], 1280, 1); |
1956 MaskCpuFlags(-1); | 2010 MaskCpuFlags(benchmark_cpu_info_); |
1957 | 2011 |
1958 for (int i = 0; i < benchmark_pixels_div1280_; ++i) { | 2012 for (int i = 0; i < benchmark_pixels_div1280_; ++i) { |
1959 ARGBPolynomial(&orig_pixels[0][0], 0, &dst_pixels_opt[0][0], 0, | 2013 ARGBPolynomial(&orig_pixels[0][0], 0, &dst_pixels_opt[0][0], 0, |
1960 &kWarmifyPolynomial[0], 1280, 1); | 2014 &kWarmifyPolynomial[0], 1280, 1); |
1961 } | 2015 } |
1962 | 2016 |
1963 for (int i = 0; i < 1280; ++i) { | 2017 for (int i = 0; i < 1280; ++i) { |
1964 EXPECT_EQ(dst_pixels_c[i][0], dst_pixels_opt[i][0]); | 2018 EXPECT_EQ(dst_pixels_c[i][0], dst_pixels_opt[i][0]); |
1965 EXPECT_EQ(dst_pixels_c[i][1], dst_pixels_opt[i][1]); | 2019 EXPECT_EQ(dst_pixels_c[i][1], dst_pixels_opt[i][1]); |
1966 EXPECT_EQ(dst_pixels_c[i][2], dst_pixels_opt[i][2]); | 2020 EXPECT_EQ(dst_pixels_c[i][2], dst_pixels_opt[i][2]); |
(...skipping 56 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2023 for (int i = 0; i < 1280; ++i) { | 2077 for (int i = 0; i < 1280; ++i) { |
2024 orig_pixels[i][0] = i; | 2078 orig_pixels[i][0] = i; |
2025 orig_pixels[i][1] = i / 2; | 2079 orig_pixels[i][1] = i / 2; |
2026 orig_pixels[i][2] = i / 3; | 2080 orig_pixels[i][2] = i / 3; |
2027 orig_pixels[i][3] = i; | 2081 orig_pixels[i][3] = i; |
2028 } | 2082 } |
2029 | 2083 |
2030 MaskCpuFlags(disable_cpu_flags_); | 2084 MaskCpuFlags(disable_cpu_flags_); |
2031 ARGBLumaColorTable(&orig_pixels[0][0], 0, &dst_pixels_c[0][0], 0, | 2085 ARGBLumaColorTable(&orig_pixels[0][0], 0, &dst_pixels_c[0][0], 0, |
2032 lumacolortable, 1280, 1); | 2086 lumacolortable, 1280, 1); |
2033 MaskCpuFlags(-1); | 2087 MaskCpuFlags(benchmark_cpu_info_); |
2034 | 2088 |
2035 for (int i = 0; i < benchmark_pixels_div1280_; ++i) { | 2089 for (int i = 0; i < benchmark_pixels_div1280_; ++i) { |
2036 ARGBLumaColorTable(&orig_pixels[0][0], 0, &dst_pixels_opt[0][0], 0, | 2090 ARGBLumaColorTable(&orig_pixels[0][0], 0, &dst_pixels_opt[0][0], 0, |
2037 lumacolortable, 1280, 1); | 2091 lumacolortable, 1280, 1); |
2038 } | 2092 } |
2039 for (int i = 0; i < 1280; ++i) { | 2093 for (int i = 0; i < 1280; ++i) { |
2040 EXPECT_EQ(dst_pixels_c[i][0], dst_pixels_opt[i][0]); | 2094 EXPECT_EQ(dst_pixels_c[i][0], dst_pixels_opt[i][0]); |
2041 EXPECT_EQ(dst_pixels_c[i][1], dst_pixels_opt[i][1]); | 2095 EXPECT_EQ(dst_pixels_c[i][1], dst_pixels_opt[i][1]); |
2042 EXPECT_EQ(dst_pixels_c[i][2], dst_pixels_opt[i][2]); | 2096 EXPECT_EQ(dst_pixels_c[i][2], dst_pixels_opt[i][2]); |
2043 EXPECT_EQ(dst_pixels_c[i][3], dst_pixels_opt[i][3]); | 2097 EXPECT_EQ(dst_pixels_c[i][3], dst_pixels_opt[i][3]); |
2044 } | 2098 } |
2045 | 2099 |
2046 free_aligned_buffer_64(lumacolortable); | 2100 free_aligned_buffer_64(lumacolortable); |
2047 } | 2101 } |
2048 | 2102 |
2049 TEST_F(LibYUVPlanarTest, TestARGBCopyAlpha) { | 2103 TEST_F(LibYUVPlanarTest, TestARGBCopyAlpha) { |
2050 const int kSize = benchmark_width_ * benchmark_height_ * 4; | 2104 const int kSize = benchmark_width_ * benchmark_height_ * 4; |
2051 align_buffer_64(orig_pixels, kSize); | 2105 align_buffer_64(orig_pixels, kSize); |
2052 align_buffer_64(dst_pixels_opt, kSize); | 2106 align_buffer_64(dst_pixels_opt, kSize); |
2053 align_buffer_64(dst_pixels_c, kSize); | 2107 align_buffer_64(dst_pixels_c, kSize); |
2054 | 2108 |
2055 MemRandomize(orig_pixels, kSize); | 2109 MemRandomize(orig_pixels, kSize); |
2056 MemRandomize(dst_pixels_opt, kSize); | 2110 MemRandomize(dst_pixels_opt, kSize); |
2057 memcpy(dst_pixels_c, dst_pixels_opt, kSize); | 2111 memcpy(dst_pixels_c, dst_pixels_opt, kSize); |
2058 | 2112 |
2059 MaskCpuFlags(disable_cpu_flags_); | 2113 MaskCpuFlags(disable_cpu_flags_); |
2060 ARGBCopyAlpha(orig_pixels, benchmark_width_ * 4, | 2114 ARGBCopyAlpha(orig_pixels, benchmark_width_ * 4, |
2061 dst_pixels_c, benchmark_width_ * 4, | 2115 dst_pixels_c, benchmark_width_ * 4, |
2062 benchmark_width_, benchmark_height_); | 2116 benchmark_width_, benchmark_height_); |
2063 MaskCpuFlags(-1); | 2117 MaskCpuFlags(benchmark_cpu_info_); |
2064 | 2118 |
2065 for (int i = 0; i < benchmark_iterations_; ++i) { | 2119 for (int i = 0; i < benchmark_iterations_; ++i) { |
2066 ARGBCopyAlpha(orig_pixels, benchmark_width_ * 4, | 2120 ARGBCopyAlpha(orig_pixels, benchmark_width_ * 4, |
2067 dst_pixels_opt, benchmark_width_ * 4, | 2121 dst_pixels_opt, benchmark_width_ * 4, |
2068 benchmark_width_, benchmark_height_); | 2122 benchmark_width_, benchmark_height_); |
2069 } | 2123 } |
2070 for (int i = 0; i < kSize; ++i) { | 2124 for (int i = 0; i < kSize; ++i) { |
2071 EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]); | 2125 EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]); |
2072 } | 2126 } |
2073 | 2127 |
2074 free_aligned_buffer_64(dst_pixels_c); | 2128 free_aligned_buffer_64(dst_pixels_c); |
2075 free_aligned_buffer_64(dst_pixels_opt); | 2129 free_aligned_buffer_64(dst_pixels_opt); |
2076 free_aligned_buffer_64(orig_pixels); | 2130 free_aligned_buffer_64(orig_pixels); |
2077 } | 2131 } |
2078 | 2132 |
2079 TEST_F(LibYUVPlanarTest, TestARGBCopyYToAlpha) { | 2133 TEST_F(LibYUVPlanarTest, TestARGBCopyYToAlpha) { |
2080 const int kPixels = benchmark_width_ * benchmark_height_; | 2134 const int kPixels = benchmark_width_ * benchmark_height_; |
2081 align_buffer_64(orig_pixels, kPixels); | 2135 align_buffer_64(orig_pixels, kPixels); |
2082 align_buffer_64(dst_pixels_opt, kPixels * 4); | 2136 align_buffer_64(dst_pixels_opt, kPixels * 4); |
2083 align_buffer_64(dst_pixels_c, kPixels * 4); | 2137 align_buffer_64(dst_pixels_c, kPixels * 4); |
2084 | 2138 |
2085 MemRandomize(orig_pixels, kPixels); | 2139 MemRandomize(orig_pixels, kPixels); |
2086 MemRandomize(dst_pixels_opt, kPixels * 4); | 2140 MemRandomize(dst_pixels_opt, kPixels * 4); |
2087 memcpy(dst_pixels_c, dst_pixels_opt, kPixels * 4); | 2141 memcpy(dst_pixels_c, dst_pixels_opt, kPixels * 4); |
2088 | 2142 |
2089 MaskCpuFlags(disable_cpu_flags_); | 2143 MaskCpuFlags(disable_cpu_flags_); |
2090 ARGBCopyYToAlpha(orig_pixels, benchmark_width_, | 2144 ARGBCopyYToAlpha(orig_pixels, benchmark_width_, |
2091 dst_pixels_c, benchmark_width_ * 4, | 2145 dst_pixels_c, benchmark_width_ * 4, |
2092 benchmark_width_, benchmark_height_); | 2146 benchmark_width_, benchmark_height_); |
2093 MaskCpuFlags(-1); | 2147 MaskCpuFlags(benchmark_cpu_info_); |
2094 | 2148 |
2095 for (int i = 0; i < benchmark_iterations_; ++i) { | 2149 for (int i = 0; i < benchmark_iterations_; ++i) { |
2096 ARGBCopyYToAlpha(orig_pixels, benchmark_width_, | 2150 ARGBCopyYToAlpha(orig_pixels, benchmark_width_, |
2097 dst_pixels_opt, benchmark_width_ * 4, | 2151 dst_pixels_opt, benchmark_width_ * 4, |
2098 benchmark_width_, benchmark_height_); | 2152 benchmark_width_, benchmark_height_); |
2099 } | 2153 } |
2100 for (int i = 0; i < kPixels * 4; ++i) { | 2154 for (int i = 0; i < kPixels * 4; ++i) { |
2101 EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]); | 2155 EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]); |
2102 } | 2156 } |
2103 | 2157 |
2104 free_aligned_buffer_64(dst_pixels_c); | 2158 free_aligned_buffer_64(dst_pixels_c); |
2105 free_aligned_buffer_64(dst_pixels_opt); | 2159 free_aligned_buffer_64(dst_pixels_opt); |
2106 free_aligned_buffer_64(orig_pixels); | 2160 free_aligned_buffer_64(orig_pixels); |
2107 } | 2161 } |
2108 | 2162 |
2109 static int TestARGBRect(int width, int height, int benchmark_iterations, | 2163 static int TestARGBRect(int width, int height, int benchmark_iterations, |
2110 int disable_cpu_flags, int invert, int off, int bpp) { | 2164 int disable_cpu_flags, int benchmark_cpu_info, |
| 2165 int invert, int off, int bpp) { |
2111 if (width < 1) { | 2166 if (width < 1) { |
2112 width = 1; | 2167 width = 1; |
2113 } | 2168 } |
2114 const int kStride = width * bpp; | 2169 const int kStride = width * bpp; |
2115 const int kSize = kStride * height; | 2170 const int kSize = kStride * height; |
2116 const uint32 v32 = fastrand() & (bpp == 4 ? 0xffffffff : 0xff); | 2171 const uint32 v32 = fastrand() & (bpp == 4 ? 0xffffffff : 0xff); |
2117 | 2172 |
2118 align_buffer_64(dst_argb_c, kSize + off); | 2173 align_buffer_64(dst_argb_c, kSize + off); |
2119 align_buffer_64(dst_argb_opt, kSize + off); | 2174 align_buffer_64(dst_argb_opt, kSize + off); |
2120 | 2175 |
2121 MemRandomize(dst_argb_c + off, kSize); | 2176 MemRandomize(dst_argb_c + off, kSize); |
2122 memcpy(dst_argb_opt + off, dst_argb_c + off, kSize); | 2177 memcpy(dst_argb_opt + off, dst_argb_c + off, kSize); |
2123 | 2178 |
2124 MaskCpuFlags(disable_cpu_flags); | 2179 MaskCpuFlags(disable_cpu_flags); |
2125 if (bpp == 4) { | 2180 if (bpp == 4) { |
2126 ARGBRect(dst_argb_c + off, kStride, 0, 0, width, invert * height, v32); | 2181 ARGBRect(dst_argb_c + off, kStride, 0, 0, width, invert * height, v32); |
2127 } else { | 2182 } else { |
2128 SetPlane(dst_argb_c + off, kStride, width, invert * height, v32); | 2183 SetPlane(dst_argb_c + off, kStride, width, invert * height, v32); |
2129 } | 2184 } |
2130 | 2185 |
2131 MaskCpuFlags(-1); | 2186 MaskCpuFlags(benchmark_cpu_info); |
2132 for (int i = 0; i < benchmark_iterations; ++i) { | 2187 for (int i = 0; i < benchmark_iterations; ++i) { |
2133 if (bpp == 4) { | 2188 if (bpp == 4) { |
2134 ARGBRect(dst_argb_opt + off, kStride, 0, 0, width, invert * height, v32); | 2189 ARGBRect(dst_argb_opt + off, kStride, 0, 0, width, invert * height, v32); |
2135 } else { | 2190 } else { |
2136 SetPlane(dst_argb_opt + off, kStride, width, invert * height, v32); | 2191 SetPlane(dst_argb_opt + off, kStride, width, invert * height, v32); |
2137 } | 2192 } |
2138 } | 2193 } |
2139 int max_diff = 0; | 2194 int max_diff = 0; |
2140 for (int i = 0; i < kStride * height; ++i) { | 2195 for (int i = 0; i < kStride * height; ++i) { |
2141 int abs_diff = | 2196 int abs_diff = |
2142 abs(static_cast<int>(dst_argb_c[i + off]) - | 2197 abs(static_cast<int>(dst_argb_c[i + off]) - |
2143 static_cast<int>(dst_argb_opt[i + off])); | 2198 static_cast<int>(dst_argb_opt[i + off])); |
2144 if (abs_diff > max_diff) { | 2199 if (abs_diff > max_diff) { |
2145 max_diff = abs_diff; | 2200 max_diff = abs_diff; |
2146 } | 2201 } |
2147 } | 2202 } |
2148 free_aligned_buffer_64(dst_argb_c); | 2203 free_aligned_buffer_64(dst_argb_c); |
2149 free_aligned_buffer_64(dst_argb_opt); | 2204 free_aligned_buffer_64(dst_argb_opt); |
2150 return max_diff; | 2205 return max_diff; |
2151 } | 2206 } |
2152 | 2207 |
2153 TEST_F(LibYUVPlanarTest, ARGBRect_Any) { | 2208 TEST_F(LibYUVPlanarTest, ARGBRect_Any) { |
2154 int max_diff = TestARGBRect(benchmark_width_ - 1, benchmark_height_, | 2209 int max_diff = TestARGBRect(benchmark_width_ - 1, benchmark_height_, |
2155 benchmark_iterations_, disable_cpu_flags_, | 2210 benchmark_iterations_, |
| 2211 disable_cpu_flags_, benchmark_cpu_info_, |
2156 +1, 0, 4); | 2212 +1, 0, 4); |
2157 EXPECT_EQ(0, max_diff); | 2213 EXPECT_EQ(0, max_diff); |
2158 } | 2214 } |
2159 | 2215 |
2160 TEST_F(LibYUVPlanarTest, ARGBRect_Unaligned) { | 2216 TEST_F(LibYUVPlanarTest, ARGBRect_Unaligned) { |
2161 int max_diff = TestARGBRect(benchmark_width_, benchmark_height_, | 2217 int max_diff = TestARGBRect(benchmark_width_, benchmark_height_, |
2162 benchmark_iterations_, disable_cpu_flags_, | 2218 benchmark_iterations_, |
| 2219 disable_cpu_flags_, benchmark_cpu_info_, |
2163 +1, 1, 4); | 2220 +1, 1, 4); |
2164 EXPECT_EQ(0, max_diff); | 2221 EXPECT_EQ(0, max_diff); |
2165 } | 2222 } |
2166 | 2223 |
2167 TEST_F(LibYUVPlanarTest, ARGBRect_Invert) { | 2224 TEST_F(LibYUVPlanarTest, ARGBRect_Invert) { |
2168 int max_diff = TestARGBRect(benchmark_width_, benchmark_height_, | 2225 int max_diff = TestARGBRect(benchmark_width_, benchmark_height_, |
2169 benchmark_iterations_, disable_cpu_flags_, | 2226 benchmark_iterations_, |
| 2227 disable_cpu_flags_, benchmark_cpu_info_, |
2170 -1, 0, 4); | 2228 -1, 0, 4); |
2171 EXPECT_EQ(0, max_diff); | 2229 EXPECT_EQ(0, max_diff); |
2172 } | 2230 } |
2173 | 2231 |
2174 TEST_F(LibYUVPlanarTest, ARGBRect_Opt) { | 2232 TEST_F(LibYUVPlanarTest, ARGBRect_Opt) { |
2175 int max_diff = TestARGBRect(benchmark_width_, benchmark_height_, | 2233 int max_diff = TestARGBRect(benchmark_width_, benchmark_height_, |
2176 benchmark_iterations_, disable_cpu_flags_, | 2234 benchmark_iterations_, |
| 2235 disable_cpu_flags_, benchmark_cpu_info_, |
2177 +1, 0, 4); | 2236 +1, 0, 4); |
2178 EXPECT_EQ(0, max_diff); | 2237 EXPECT_EQ(0, max_diff); |
2179 } | 2238 } |
2180 | 2239 |
2181 TEST_F(LibYUVPlanarTest, SetPlane_Any) { | 2240 TEST_F(LibYUVPlanarTest, SetPlane_Any) { |
2182 int max_diff = TestARGBRect(benchmark_width_ - 1, benchmark_height_, | 2241 int max_diff = TestARGBRect(benchmark_width_ - 1, benchmark_height_, |
2183 benchmark_iterations_, disable_cpu_flags_, | 2242 benchmark_iterations_, |
| 2243 disable_cpu_flags_, benchmark_cpu_info_, |
2184 +1, 0, 1); | 2244 +1, 0, 1); |
2185 EXPECT_EQ(0, max_diff); | 2245 EXPECT_EQ(0, max_diff); |
2186 } | 2246 } |
2187 | 2247 |
2188 TEST_F(LibYUVPlanarTest, SetPlane_Unaligned) { | 2248 TEST_F(LibYUVPlanarTest, SetPlane_Unaligned) { |
2189 int max_diff = TestARGBRect(benchmark_width_, benchmark_height_, | 2249 int max_diff = TestARGBRect(benchmark_width_, benchmark_height_, |
2190 benchmark_iterations_, disable_cpu_flags_, | 2250 benchmark_iterations_, |
| 2251 disable_cpu_flags_, benchmark_cpu_info_, |
2191 +1, 1, 1); | 2252 +1, 1, 1); |
2192 EXPECT_EQ(0, max_diff); | 2253 EXPECT_EQ(0, max_diff); |
2193 } | 2254 } |
2194 | 2255 |
2195 TEST_F(LibYUVPlanarTest, SetPlane_Invert) { | 2256 TEST_F(LibYUVPlanarTest, SetPlane_Invert) { |
2196 int max_diff = TestARGBRect(benchmark_width_, benchmark_height_, | 2257 int max_diff = TestARGBRect(benchmark_width_, benchmark_height_, |
2197 benchmark_iterations_, disable_cpu_flags_, | 2258 benchmark_iterations_, |
| 2259 disable_cpu_flags_, benchmark_cpu_info_, |
2198 -1, 0, 1); | 2260 -1, 0, 1); |
2199 EXPECT_EQ(0, max_diff); | 2261 EXPECT_EQ(0, max_diff); |
2200 } | 2262 } |
2201 | 2263 |
2202 TEST_F(LibYUVPlanarTest, SetPlane_Opt) { | 2264 TEST_F(LibYUVPlanarTest, SetPlane_Opt) { |
2203 int max_diff = TestARGBRect(benchmark_width_, benchmark_height_, | 2265 int max_diff = TestARGBRect(benchmark_width_, benchmark_height_, |
2204 benchmark_iterations_, disable_cpu_flags_, | 2266 benchmark_iterations_, |
| 2267 disable_cpu_flags_, benchmark_cpu_info_, |
2205 +1, 0, 1); | 2268 +1, 0, 1); |
2206 EXPECT_EQ(0, max_diff); | 2269 EXPECT_EQ(0, max_diff); |
2207 } | 2270 } |
2208 | 2271 |
2209 } // namespace libyuv | 2272 } // namespace libyuv |
OLD | NEW |