media/gpu/video_encode_accelerator_unittest.cc - Issue 2270713002: VEA unittest: align input buffers to 128-byte boundary on ARM only.

Side by Side Diff: media/gpu/video_encode_accelerator_unittest.cc

Issue 2270713002: VEA unittest: align input buffers to 128-byte boundary on ARM only. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master

Patch Set: Created 4 years, 4 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 // Copyright 2013 The Chromium Authors. All rights reserved.	1 // Copyright 2013 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 #include <inttypes.h>	5 #include <inttypes.h>

6 #include <stddef.h>	6 #include <stddef.h>

7 #include <stdint.h>	7 #include <stdint.h>

8	8

9 #include <algorithm>	9 #include <algorithm>

10 #include <memory>	10 #include <memory>

(...skipping 138 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
149 bool g_fake_encoder = false;	149 bool g_fake_encoder = false;

150	150

151 // Environment to store test stream data for all test cases.	151 // Environment to store test stream data for all test cases.

152 class VideoEncodeAcceleratorTestEnvironment;	152 class VideoEncodeAcceleratorTestEnvironment;

153 VideoEncodeAcceleratorTestEnvironment* g_env;	153 VideoEncodeAcceleratorTestEnvironment* g_env;

154	154

155 // The number of frames to be encoded. This variable is set by the switch	155 // The number of frames to be encoded. This variable is set by the switch

156 // "--num_frames_to_encode". Ignored if 0.	156 // "--num_frames_to_encode". Ignored if 0.

157 int g_num_frames_to_encode = 0;	157 int g_num_frames_to_encode = 0;

158	158

	159 #ifdef ARCH_CPU_ARMEL

	160 // ARM performs CPU cache management with CPU cache line granularity. We thus

	161 // need to ensure our buffers are CPU cache line-aligned (64 byte-aligned).

	162 // Otherwise newer kernels will refuse to accept them, and on older kernels

	163 // we'll be treating ourselves to random corruption.

	164 // Moreover, some hardware codecs require 128-byte alignment for physical

	165 // buffers.

	166 const size_t kPlatformBufferAlignment = 128;

	167 #else

	168 const size_t kPlatformBufferAlignment = 1;

	169 #endif

	170

	171 inline static size_t AlignToPlatformRequirements(size_t value) {

	172 return base::bits::Align(value, kPlatformBufferAlignment);

	173 }

	174

159 // An aligned STL allocator.	175 // An aligned STL allocator.

160 template <typename T, size_t ByteAlignment>	176 template <typename T, size_t ByteAlignment>

161 class AlignedAllocator : public std::allocator<T> {	177 class AlignedAllocator : public std::allocator<T> {

162 public:	178 public:

163 typedef size_t size_type;	179 typedef size_t size_type;

164 typedef T* pointer;	180 typedef T* pointer;

165	181

166 template <class T1>	182 template <class T1>

167 struct rebind {	183 struct rebind {

168 typedef AlignedAllocator<T1, ByteAlignment> other;	184 typedef AlignedAllocator<T1, ByteAlignment> other;

(...skipping 30 matching lines...) Expand all Loading...
199	215

200 gfx::Size visible_size;	216 gfx::Size visible_size;

201 gfx::Size coded_size;	217 gfx::Size coded_size;

202 unsigned int num_frames;	218 unsigned int num_frames;

203	219

204 // Original unaligned input file name provided as an argument to the test.	220 // Original unaligned input file name provided as an argument to the test.

205 // And the file must be an I420 (YUV planar) raw stream.	221 // And the file must be an I420 (YUV planar) raw stream.

206 std::string in_filename;	222 std::string in_filename;

207	223

208 // A vector used to prepare aligned input buffers of \|in_filename\|. This	224 // A vector used to prepare aligned input buffers of \|in_filename\|. This

209 // makes sure starting address of YUV planes are 64 bytes-aligned.	225 // makes sure starting addresses of YUV planes are aligned to

210 std::vector<char, AlignedAllocator<char, 64>> aligned_in_file_data;	226 // kPlatformBufferAlignment bytes.

	227 std::vector<char, AlignedAllocator<char, kPlatformBufferAlignment>>

	228 aligned_in_file_data;

211	229

212 // Byte size of a frame of \|aligned_in_file_data\|.	230 // Byte size of a frame of \|aligned_in_file_data\|.

213 size_t aligned_buffer_size;	231 size_t aligned_buffer_size;

214	232

215 // Byte size for each aligned plane of a frame.	233 // Byte size for each aligned plane of a frame.

216 std::vector<size_t> aligned_plane_size;	234 std::vector<size_t> aligned_plane_size;

217	235

218 std::string out_filename;	236 std::string out_filename;

219 VideoCodecProfile requested_profile;	237 VideoCodecProfile requested_profile;

220 unsigned int requested_bitrate;	238 unsigned int requested_bitrate;

221 unsigned int requested_framerate;	239 unsigned int requested_framerate;

222 unsigned int requested_subsequent_bitrate;	240 unsigned int requested_subsequent_bitrate;

223 unsigned int requested_subsequent_framerate;	241 unsigned int requested_subsequent_framerate;

224 };	242 };

225	243

226 inline static size_t Align64Bytes(size_t value) {

227 return base::bits::Align(value, 64);

228 }

229

230 // Return the \|percentile\| from a sorted vector.	244 // Return the \|percentile\| from a sorted vector.

231 static base::TimeDelta Percentile(	245 static base::TimeDelta Percentile(

232 const std::vector<base::TimeDelta>& sorted_values,	246 const std::vector<base::TimeDelta>& sorted_values,

233 unsigned int percentile) {	247 unsigned int percentile) {

234 size_t size = sorted_values.size();	248 size_t size = sorted_values.size();

235 LOG_ASSERT(size > 0UL);	249 LOG_ASSERT(size > 0UL);

236 LOG_ASSERT(percentile <= 100UL);	250 LOG_ASSERT(percentile <= 100UL);

237 // Use Nearest Rank method in http://en.wikipedia.org/wiki/Percentile.	251 // Use Nearest Rank method in http://en.wikipedia.org/wiki/Percentile.

238 int index =	252 int index =

239 std::max(static_cast<int>(ceil(0.01f * percentile * size)) - 1, 0);	253 std::max(static_cast<int>(ceil(0.01f * percentile * size)) - 1, 0);

(...skipping 20 matching lines...) Expand all Loading...
260	274

261 static std::string FilePathStringTypeToString(	275 static std::string FilePathStringTypeToString(

262 const base::FilePath::StringType& str) {	276 const base::FilePath::StringType& str) {

263 #if defined(OS_WIN)	277 #if defined(OS_WIN)

264 return base::WideToUTF8(str);	278 return base::WideToUTF8(str);

265 #else	279 #else

266 return str;	280 return str;

267 #endif // defined(OS_WIN)	281 #endif // defined(OS_WIN)

268 }	282 }

269	283

270 // ARM performs CPU cache management with CPU cache line granularity. We thus	284 // Some platforms may have requirements on physical memory buffer alignment.

271 // need to ensure our buffers are CPU cache line-aligned (64 byte-aligned).

272 // Otherwise newer kernels will refuse to accept them, and on older kernels

273 // we'll be treating ourselves to random corruption.

274 // Since we are just mapping and passing chunks of the input file directly to	285 // Since we are just mapping and passing chunks of the input file directly to

275 // the VEA as input frames to avoid copying large chunks of raw data on each	286 // the VEA as input frames, to avoid copying large chunks of raw data on each

276 // frame and thus affecting performance measurements, we have to prepare a	287 // frame, and thus affecting performance measurements, we have to prepare a

277 // temporary file with all planes aligned to 64-byte boundaries beforehand.	288 // temporary file with all planes aligned to the required alignment beforehand.

278 static void CreateAlignedInputStreamFile(const gfx::Size& coded_size,	289 static void CreateAlignedInputStreamFile(const gfx::Size& coded_size,

279 TestStream* test_stream) {	290 TestStream* test_stream) {

280 // Test case may have many encoders and memory should be prepared once.	291 // Test case may have many encoders and memory should be prepared once.

281 if (test_stream->coded_size == coded_size &&	292 if (test_stream->coded_size == coded_size &&

282 !test_stream->aligned_in_file_data.empty())	293 !test_stream->aligned_in_file_data.empty())

283 return;	294 return;

284	295

285 // All encoders in multiple encoder test reuse the same test_stream, make	296 // All encoders in multiple encoder test reuse the same test_stream, make

286 // sure they requested the same coded_size	297 // sure they requested the same coded_size

287 ASSERT_TRUE(test_stream->aligned_in_file_data.empty() \|\|	298 ASSERT_TRUE(test_stream->aligned_in_file_data.empty() \|\|

288 coded_size == test_stream->coded_size);	299 coded_size == test_stream->coded_size);

289 test_stream->coded_size = coded_size;	300 test_stream->coded_size = coded_size;

290	301

291 size_t num_planes = VideoFrame::NumPlanes(kInputFormat);	302 size_t num_planes = VideoFrame::NumPlanes(kInputFormat);

292 std::vector<size_t> padding_sizes(num_planes);	303 std::vector<size_t> padding_sizes(num_planes);

293 std::vector<size_t> coded_bpl(num_planes);	304 std::vector<size_t> coded_bpl(num_planes);

294 std::vector<size_t> visible_bpl(num_planes);	305 std::vector<size_t> visible_bpl(num_planes);

295 std::vector<size_t> visible_plane_rows(num_planes);	306 std::vector<size_t> visible_plane_rows(num_planes);

296	307

297 // Calculate padding in bytes to be added after each plane required to keep	308 // Calculate padding in bytes to be added after each plane required to keep

298 // starting addresses of all planes at a 64 byte boudnary. This padding will	309 // starting addresses of all planes at a byte boundary required by the

299 // be added after each plane when copying to the temporary file.	310 // platform. This padding will be added after each plane when copying to the

	311 // temporary file.

300 // At the same time we also need to take into account coded_size requested by	312 // At the same time we also need to take into account coded_size requested by

301 // the VEA; each row of visible_bpl bytes in the original file needs to be	313 // the VEA; each row of visible_bpl bytes in the original file needs to be

302 // copied into a row of coded_bpl bytes in the aligned file.	314 // copied into a row of coded_bpl bytes in the aligned file.

303 for (size_t i = 0; i < num_planes; i++) {	315 for (size_t i = 0; i < num_planes; i++) {

304 const size_t size =	316 const size_t size =

305 VideoFrame::PlaneSize(kInputFormat, i, coded_size).GetArea();	317 VideoFrame::PlaneSize(kInputFormat, i, coded_size).GetArea();

306 test_stream->aligned_plane_size.push_back(Align64Bytes(size));	318 test_stream->aligned_plane_size.push_back(

	319 AlignToPlatformRequirements(size));

307 test_stream->aligned_buffer_size += test_stream->aligned_plane_size.back();	320 test_stream->aligned_buffer_size += test_stream->aligned_plane_size.back();

308	321

309 coded_bpl[i] = VideoFrame::RowBytes(i, kInputFormat, coded_size.width());	322 coded_bpl[i] = VideoFrame::RowBytes(i, kInputFormat, coded_size.width());

310 visible_bpl[i] = VideoFrame::RowBytes(i, kInputFormat,	323 visible_bpl[i] = VideoFrame::RowBytes(i, kInputFormat,

311 test_stream->visible_size.width());	324 test_stream->visible_size.width());

312 visible_plane_rows[i] =	325 visible_plane_rows[i] =

313 VideoFrame::Rows(i, kInputFormat, test_stream->visible_size.height());	326 VideoFrame::Rows(i, kInputFormat, test_stream->visible_size.height());

314 const size_t padding_rows =	327 const size_t padding_rows =

315 VideoFrame::Rows(i, kInputFormat, coded_size.height()) -	328 VideoFrame::Rows(i, kInputFormat, coded_size.height()) -

316 visible_plane_rows[i];	329 visible_plane_rows[i];

317 padding_sizes[i] = padding_rows * coded_bpl[i] + Align64Bytes(size) - size;	330 padding_sizes[i] =

	331 padding_rows * coded_bpl[i] + AlignToPlatformRequirements(size) - size;

318 }	332 }

319	333

320 base::FilePath src_file(StringToFilePathStringType(test_stream->in_filename));	334 base::FilePath src_file(StringToFilePathStringType(test_stream->in_filename));

321 int64_t src_file_size = 0;	335 int64_t src_file_size = 0;

322 LOG_ASSERT(base::GetFileSize(src_file, &src_file_size));	336 LOG_ASSERT(base::GetFileSize(src_file, &src_file_size));

323	337

324 size_t visible_buffer_size =	338 size_t visible_buffer_size =

325 VideoFrame::AllocationSize(kInputFormat, test_stream->visible_size);	339 VideoFrame::AllocationSize(kInputFormat, test_stream->visible_size);

326 LOG_ASSERT(src_file_size % visible_buffer_size == 0U)	340 LOG_ASSERT(src_file_size % visible_buffer_size == 0U)

327 << "Stream byte size is not a product of calculated frame byte size";	341 << "Stream byte size is not a product of calculated frame byte size";

328	342

329 test_stream->num_frames =	343 test_stream->num_frames =

330 static_cast<unsigned int>(src_file_size / visible_buffer_size);	344 static_cast<unsigned int>(src_file_size / visible_buffer_size);

331	345

332 LOG_ASSERT(test_stream->aligned_buffer_size > 0UL);	346 LOG_ASSERT(test_stream->aligned_buffer_size > 0UL);

333 test_stream->aligned_in_file_data.resize(test_stream->aligned_buffer_size *	347 test_stream->aligned_in_file_data.resize(test_stream->aligned_buffer_size *

334 test_stream->num_frames);	348 test_stream->num_frames);

335	349

336 base::File src(src_file, base::File::FLAG_OPEN \| base::File::FLAG_READ);	350 base::File src(src_file, base::File::FLAG_OPEN \| base::File::FLAG_READ);

337 std::vector<char> src_data(visible_buffer_size);	351 std::vector<char> src_data(visible_buffer_size);

338 off_t src_offset = 0, dest_offset = 0;	352 off_t src_offset = 0, dest_offset = 0;

339 for (size_t frame = 0; frame < test_stream->num_frames; frame++) {	353 for (size_t frame = 0; frame < test_stream->num_frames; frame++) {

340 LOG_ASSERT(src.Read(src_offset, &src_data[0],	354 LOG_ASSERT(src.Read(src_offset, &src_data[0],

341 static_cast<int>(visible_buffer_size)) ==	355 static_cast<int>(visible_buffer_size)) ==

342 static_cast<int>(visible_buffer_size));	356 static_cast<int>(visible_buffer_size));

343 const char* src_ptr = &src_data[0];	357 const char* src_ptr = &src_data[0];

344 for (size_t i = 0; i < num_planes; i++) {	358 for (size_t i = 0; i < num_planes; i++) {

345 // Assert that each plane of frame starts at 64 byte boundary.	359 // Assert that each plane of frame starts at required byte boundary.

346 ASSERT_EQ(dest_offset & 63, 0)	360 ASSERT_EQ(dest_offset & kPlatformBufferAlignment, 0)
	kcwu 2016/08/23 09:09:02 (kPlatformBufferAlignment - 1) (kPlatformBufferAlignment - 1) Pawel Osciak 2016/08/23 09:18:32 Ah nice catch, thanks. Show quoted text On 2016/08/23 09:09:02, kcwu wrote: > (kPlatformBufferAlignment - 1) Ah nice catch, thanks.
347 << "Planes of frame should be mapped at a 64 byte boundary";	361 << "Planes of frame should be mapped per platform requirements";

348 for (size_t j = 0; j < visible_plane_rows[i]; j++) {	362 for (size_t j = 0; j < visible_plane_rows[i]; j++) {

349 memcpy(&test_stream->aligned_in_file_data[dest_offset], src_ptr,	363 memcpy(&test_stream->aligned_in_file_data[dest_offset], src_ptr,

350 visible_bpl[i]);	364 visible_bpl[i]);

351 src_ptr += visible_bpl[i];	365 src_ptr += visible_bpl[i];

352 dest_offset += static_cast<off_t>(coded_bpl[i]);	366 dest_offset += static_cast<off_t>(coded_bpl[i]);

353 }	367 }

354 dest_offset += static_cast<off_t>(padding_sizes[i]);	368 dest_offset += static_cast<off_t>(padding_sizes[i]);

355 }	369 }

356 src_offset += static_cast<off_t>(visible_buffer_size);	370 src_offset += static_cast<off_t>(visible_buffer_size);

357 }	371 }

358 src.Close();	372 src.Close();

359	373

360 #if defined(OS_POSIX)

361 // Assert that memory mapped of file starts at 64 byte boundary. So each

362 // plane of frames also start at 64 byte boundary.

363 ASSERT_EQ(reinterpret_cast<off_t>(&test_stream->aligned_in_file_data[0]) & 63,

364 0)

365 << "File should be mapped at a 64 byte boundary";

366 #endif // defined(OS_POSIX)

367

368 LOG_ASSERT(test_stream->num_frames > 0UL);	374 LOG_ASSERT(test_stream->num_frames > 0UL);

369 }	375 }

370	376

371 // Parse \|data\| into its constituent parts, set the various output fields	377 // Parse \|data\| into its constituent parts, set the various output fields

372 // accordingly, read in video stream, and store them to \|test_streams\|.	378 // accordingly, read in video stream, and store them to \|test_streams\|.

373 static void ParseAndReadTestStreamData(const base::FilePath::StringType& data,	379 static void ParseAndReadTestStreamData(const base::FilePath::StringType& data,

374 ScopedVector<TestStream>* test_streams) {	380 ScopedVector<TestStream>* test_streams) {

375 // Split the string to individual test stream data.	381 // Split the string to individual test stream data.

376 std::vector<base::FilePath::StringType> test_streams_data =	382 std::vector<base::FilePath::StringType> test_streams_data =

377 base::SplitString(data, base::FilePath::StringType(1, ';'),	383 base::SplitString(data, base::FilePath::StringType(1, ';'),

(...skipping 1556 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1934	1940

1935 media::g_env =	1941 media::g_env =

1936 reinterpret_cast<media::VideoEncodeAcceleratorTestEnvironment*>(	1942 reinterpret_cast<media::VideoEncodeAcceleratorTestEnvironment*>(

1937 testing::AddGlobalTestEnvironment(	1943 testing::AddGlobalTestEnvironment(

1938 new media::VideoEncodeAcceleratorTestEnvironment(	1944 new media::VideoEncodeAcceleratorTestEnvironment(

1939 std::move(test_stream_data), log_path, run_at_fps,	1945 std::move(test_stream_data), log_path, run_at_fps,

1940 needs_encode_latency, verify_all_output)));	1946 needs_encode_latency, verify_all_output)));

1941	1947

1942 return RUN_ALL_TESTS();	1948 return RUN_ALL_TESTS();

1943 }	1949 }

OLD	NEW

« no previous file with comments | « no previous file | no next file » | no next file with comments »