OLD | NEW |
---|---|
(Empty) | |
1 // Copyright 2014 The Chromium Authors. All rights reserved. | |
2 // Use of this source code is governed by a BSD-style license that can be | |
3 // found in the LICENSE file. | |
4 | |
5 #include "media/formats/mp2t/es_parser_mpeg1audio.h" | |
6 | |
7 #include <list> | |
8 | |
9 #include "base/basictypes.h" | |
10 #include "base/logging.h" | |
11 #include "base/strings/string_number_conversions.h" | |
12 #include "media/base/audio_timestamp_helper.h" | |
13 #include "media/base/bit_reader.h" | |
14 #include "media/base/buffers.h" | |
15 #include "media/base/channel_layout.h" | |
16 #include "media/base/stream_parser_buffer.h" | |
17 #include "media/formats/common/offset_byte_queue.h" | |
18 #include "media/formats/mp2t/mp2t_common.h" | |
19 | |
20 namespace media { | |
21 namespace mp2t { | |
22 | |
23 // Map that determines which bitrate_index & channel_mode combinations | |
24 // are allowed. | |
wolenetz
2014/08/26 20:26:31
nit: describe columns and indices? (in case the re
| |
25 // Derived from: http://mpgedit.org/mpgedit/mpeg_format/MP3Format.html | |
26 static const bool kIsAllowed[17][4] = { | |
wolenetz
2014/08/26 20:26:31
nit: s/17/16/ ?
| |
27 { true, true, true, true }, // free | |
28 { true, false, false, false }, // 32 | |
29 { true, false, false, false }, // 48 | |
30 { true, false, false, false }, // 56 | |
31 { true, true, true, true }, // 64 | |
32 { true, false, false, false }, // 80 | |
33 { true, true, true, true }, // 96 | |
34 { true, true, true, true }, // 112 | |
35 { true, true, true, true }, // 128 | |
36 { true, true, true, true }, // 160 | |
37 { true, true, true, true }, // 192 | |
38 { false, true, true, true }, // 224 | |
39 { false, true, true, true }, // 256 | |
40 { false, true, true, true }, // 320 | |
41 { false, true, true, true }, // 384 | |
42 { false, false, false, false } // bad | |
43 }; | |
44 | |
45 // Maps version and layer information in the frame header | |
46 // into an index for the |kBitrateMap|. | |
47 // Derived from: http://mpgedit.org/mpgedit/mpeg_format/MP3Format.html | |
48 static const int kVersionLayerMap[4][4] = { | |
49 // { reserved, L3, L2, L1 } | |
50 { 5, 4, 4, 3 }, // MPEG 2.5 | |
51 { 5, 5, 5, 5 }, // reserved | |
52 { 5, 4, 4, 3 }, // MPEG 2 | |
53 { 5, 2, 1, 0 } // MPEG 1 | |
54 }; | |
55 | |
56 // Maps the bitrate index field in the header and an index | |
57 // from |kVersionLayerMap| to a frame bitrate. | |
58 // Derived from: http://mpgedit.org/mpgedit/mpeg_format/MP3Format.html | |
59 static const int kBitrateMap[16][6] = { | |
60 // { V1L1, V1L2, V1L3, V2L1, V2L2 & V2L3, reserved } | |
61 { 0, 0, 0, 0, 0, 0 }, | |
62 { 32, 32, 32, 32, 8, 0 }, | |
63 { 64, 48, 40, 48, 16, 0 }, | |
64 { 96, 56, 48, 56, 24, 0 }, | |
65 { 128, 64, 56, 64, 32, 0 }, | |
66 { 160, 80, 64, 80, 40, 0 }, | |
67 { 192, 96, 80, 96, 48, 0 }, | |
68 { 224, 112, 96, 112, 56, 0 }, | |
69 { 256, 128, 112, 128, 64, 0 }, | |
70 { 288, 160, 128, 144, 80, 0 }, | |
71 { 320, 192, 160, 160, 96, 0 }, | |
72 { 352, 224, 192, 176, 112, 0 }, | |
73 { 384, 256, 224, 192, 128, 0 }, | |
74 { 416, 320, 256, 224, 144, 0 }, | |
75 { 448, 384, 320, 256, 160, 0 }, | |
76 { 0, 0, 0, 0, 0} | |
77 }; | |
78 | |
79 // Maps the sample rate index and version fields from the frame header | |
80 // to a sample rate. | |
81 // Derived from: http://mpgedit.org/mpgedit/mpeg_format/MP3Format.html | |
82 static const int kSampleRateMap[4][4] = { | |
83 // { V2.5, reserved, V2, V1 } | |
84 { 11025, 0, 22050, 44100 }, | |
85 { 12000, 0, 24000, 48000 }, | |
86 { 8000, 0, 16000, 32000 }, | |
87 { 0, 0, 0, 0 } | |
88 }; | |
89 | |
90 #if 0 | |
91 // Offset in bytes from the end of the MP3 header to "Xing" or "Info" tags which | |
92 // indicate a frame is silent metadata frame. Values taken from FFmpeg. | |
93 static const int kXingHeaderMap[2][2] = {{32, 17}, {17, 9}}; | |
94 #endif | |
95 | |
96 // Frame header field constants. | |
97 static const int kVersion2 = 2; | |
98 static const int kVersionReserved = 1; | |
99 static const int kVersion2_5 = 0; | |
100 static const int kLayerReserved = 0; | |
101 static const int kLayer1 = 3; | |
102 static const int kLayer2 = 2; | |
103 static const int kLayer3 = 1; | |
104 static const int kBitrateFree = 0; | |
105 static const int kBitrateBad = 0xf; | |
106 static const int kSampleRateReserved = 3; | |
107 | |
108 int ParseMpegAudioFrameHeader(const uint8* data, | |
109 int size, | |
110 int* frame_size, | |
111 int* sample_rate, | |
112 ChannelLayout* channel_layout, | |
113 int* sample_count, | |
114 bool* metadata_frame) { | |
115 DCHECK(data); | |
116 DCHECK_GE(size, 0); | |
117 DCHECK(frame_size); | |
118 | |
119 if (size < 4) | |
120 return 0; | |
121 | |
122 BitReader reader(data, size); | |
123 int sync; | |
124 int version; | |
125 int layer; | |
126 int is_protected; | |
127 int bitrate_index; | |
128 int sample_rate_index; | |
129 int has_padding; | |
130 int is_private; | |
131 int channel_mode; | |
132 int other_flags; | |
133 | |
134 if (!reader.ReadBits(11, &sync) || | |
135 !reader.ReadBits(2, &version) || | |
136 !reader.ReadBits(2, &layer) || | |
137 !reader.ReadBits(1, &is_protected) || | |
138 !reader.ReadBits(4, &bitrate_index) || | |
139 !reader.ReadBits(2, &sample_rate_index) || | |
140 !reader.ReadBits(1, &has_padding) || | |
141 !reader.ReadBits(1, &is_private) || | |
142 !reader.ReadBits(2, &channel_mode) || | |
143 !reader.ReadBits(6, &other_flags)) { | |
144 return -1; | |
145 } | |
146 | |
147 DVLOG(2) << "Header data :" << std::hex | |
148 << " sync 0x" << sync | |
149 << " version 0x" << version | |
150 << " layer 0x" << layer | |
151 << " bitrate_index 0x" << bitrate_index | |
152 << " sample_rate_index 0x" << sample_rate_index | |
153 << " channel_mode 0x" << channel_mode; | |
154 | |
155 if (sync != 0x7ff || | |
156 version == kVersionReserved || | |
157 layer == kLayerReserved || | |
158 bitrate_index == kBitrateFree || bitrate_index == kBitrateBad || | |
159 sample_rate_index == kSampleRateReserved) { | |
160 return -1; | |
161 } | |
162 | |
163 if (layer == kLayer2 && kIsAllowed[bitrate_index][channel_mode]) { | |
164 return -1; | |
165 } | |
166 | |
167 int bitrate = kBitrateMap[bitrate_index][kVersionLayerMap[version][layer]]; | |
168 | |
169 if (bitrate == 0) { | |
170 return -1; | |
171 } | |
172 | |
173 DVLOG(2) << " bitrate " << bitrate; | |
174 | |
175 int frame_sample_rate = kSampleRateMap[sample_rate_index][version]; | |
176 if (frame_sample_rate == 0) { | |
177 return -1; | |
178 } | |
179 | |
180 if (sample_rate) | |
181 *sample_rate = frame_sample_rate; | |
182 | |
183 // http://teslabs.com/openplayer/docs/docs/specs/mp3_structure2.pdf | |
184 // Table 2.1.5 | |
185 int samples_per_frame; | |
186 switch (layer) { | |
187 case kLayer1: | |
188 samples_per_frame = 384; | |
189 break; | |
190 | |
191 case kLayer2: | |
192 samples_per_frame = 1152; | |
193 break; | |
194 | |
195 case kLayer3: | |
196 if (version == kVersion2 || version == kVersion2_5) | |
197 samples_per_frame = 576; | |
198 else | |
199 samples_per_frame = 1152; | |
200 break; | |
201 | |
202 default: | |
203 return -1; | |
204 } | |
205 | |
206 if (sample_count) | |
207 *sample_count = samples_per_frame; | |
208 | |
209 // http://teslabs.com/openplayer/docs/docs/specs/mp3_structure2.pdf | |
210 // Text just below Table 2.1.5. | |
211 if (layer == kLayer1) { | |
212 // This formulation is a slight variation on the equation below, | |
213 // but has slightly different truncation characteristics to deal | |
214 // with the fact that Layer 1 has 4 byte "slots" instead of single | |
215 // byte ones. | |
216 *frame_size = 4 * (12 * bitrate * 1000 / frame_sample_rate); | |
217 } else { | |
218 *frame_size = | |
219 ((samples_per_frame / 8) * bitrate * 1000) / frame_sample_rate; | |
220 } | |
221 | |
222 if (has_padding) | |
223 *frame_size += (layer == kLayer1) ? 4 : 1; | |
224 | |
225 if (channel_layout) { | |
226 // Map Stereo(0), Joint Stereo(1), and Dual Channel (2) to | |
227 // CHANNEL_LAYOUT_STEREO and Single Channel (3) to CHANNEL_LAYOUT_MONO. | |
228 *channel_layout = | |
229 (channel_mode == 3) ? CHANNEL_LAYOUT_MONO : CHANNEL_LAYOUT_STEREO; | |
230 } | |
231 | |
232 if (metadata_frame) | |
233 *metadata_frame = false; | |
234 | |
235 const int header_bytes_read = reader.bits_read() / 8; | |
236 #if 1 | |
237 return header_bytes_read; | |
238 #else | |
239 if (layer != kLayer3) | |
240 return header_bytes_read; | |
241 | |
242 // Check if this is a XING frame and tell the base parser to skip it if so. | |
243 const int xing_header_index = | |
244 kXingHeaderMap[version == kVersion2 || | |
245 version == kVersion2_5][channel_mode == 3]; | |
246 uint32_t tag = 0; | |
247 | |
248 // It's not a XING frame if the frame isn't big enough to be one. | |
249 if (*frame_size < | |
250 header_bytes_read + xing_header_index + static_cast<int>(sizeof(tag))) { | |
251 return header_bytes_read; | |
252 } | |
253 | |
254 // If we don't have enough data available to check, return 0 so frame parsing | |
255 // will be retried once more data is available. | |
256 if (!reader.SkipBits(xing_header_index * 8) || | |
257 !reader.ReadBits(sizeof(tag) * 8, &tag)) { | |
258 return 0; | |
259 } | |
260 | |
261 // Check to see if the tag contains 'Xing' or 'Info' | |
262 if (tag == 0x496e666f || tag == 0x58696e67) { | |
263 if (metadata_frame) | |
264 *metadata_frame = true; | |
265 return reader.bits_read() / 8; | |
266 } | |
267 | |
268 // If it wasn't a XING frame, just return the number consumed bytes. | |
269 return header_bytes_read; | |
270 #endif | |
271 } | |
272 | |
273 static const int kMpegAudioHeaderMinSize = 4; | |
274 | |
275 | |
276 struct EsParserMpeg1Audio::Mpeg1AudioFrame { | |
277 // Pointer to the ES data. | |
278 const uint8* data; | |
279 | |
280 // Frame size. | |
281 int size; | |
282 | |
283 // Number of samples in the frame. | |
284 int sample_count; | |
285 | |
286 // Frame offset in the ES queue. | |
287 int64 queue_offset; | |
288 }; | |
289 | |
290 bool EsParserMpeg1Audio::LookForMpeg1AudioFrame( | |
291 Mpeg1AudioFrame* mpeg1audio_frame) { | |
292 int es_size; | |
293 const uint8* es; | |
294 es_queue_->Peek(&es, &es_size); | |
295 | |
296 int max_offset = es_size - kMpegAudioHeaderMinSize; | |
297 if (max_offset <= 0) | |
298 return false; | |
299 | |
300 for (int offset = 0; offset < max_offset; offset++) { | |
301 const uint8* cur_buf = &es[offset]; | |
302 if (cur_buf[0] != 0xff) | |
303 continue; | |
304 | |
305 int frame_size; | |
306 int sample_rate; | |
307 ChannelLayout channel_layout; | |
308 int sample_count; | |
309 bool metadata_frame; | |
310 | |
311 int remaining_size = es_size - offset; | |
312 int header_size = | |
313 ParseMpegAudioFrameHeader(cur_buf, remaining_size, | |
314 &frame_size, &sample_rate, &channel_layout, | |
315 &sample_count, &metadata_frame); | |
316 | |
317 if (header_size < 0) | |
318 continue; | |
319 | |
320 if (remaining_size < frame_size) { | |
321 // Not a full frame: will resume when we have more data. | |
322 es_queue_->Pop(offset); | |
323 return false; | |
324 } | |
325 | |
326 // Check whether there is another frame | |
327 // |frame_size| apart from the current one. | |
328 if (remaining_size >= frame_size + 2 && | |
329 cur_buf[frame_size] != 0xff) { | |
330 continue; | |
331 } | |
332 | |
333 es_queue_->Pop(offset); | |
334 es_queue_->Peek(&mpeg1audio_frame->data, &es_size); | |
335 mpeg1audio_frame->queue_offset = es_queue_->head(); | |
336 mpeg1audio_frame->size = frame_size; | |
337 mpeg1audio_frame->sample_count = sample_count; | |
338 DVLOG(LOG_LEVEL_ES) | |
339 << "MPEG1 audio syncword @ pos=" << mpeg1audio_frame->queue_offset | |
340 << " frame_size=" << mpeg1audio_frame->size; | |
341 DVLOG(LOG_LEVEL_ES) | |
342 << "MPEG1 audio header: " | |
343 << base::HexEncode(mpeg1audio_frame->data, kMpegAudioHeaderMinSize); | |
344 return true; | |
345 } | |
346 | |
347 es_queue_->Pop(max_offset); | |
348 return false; | |
349 } | |
350 | |
351 void EsParserMpeg1Audio::SkipMpeg1AudioFrame( | |
352 const Mpeg1AudioFrame& mpeg1audio_frame) { | |
353 DCHECK_EQ(mpeg1audio_frame.queue_offset, es_queue_->head()); | |
354 es_queue_->Pop(mpeg1audio_frame.size); | |
355 } | |
356 | |
357 EsParserMpeg1Audio::EsParserMpeg1Audio( | |
358 const NewAudioConfigCB& new_audio_config_cb, | |
359 const EmitBufferCB& emit_buffer_cb) | |
360 : new_audio_config_cb_(new_audio_config_cb), | |
361 emit_buffer_cb_(emit_buffer_cb), | |
362 es_queue_(new media::OffsetByteQueue()) { | |
363 } | |
364 | |
365 EsParserMpeg1Audio::~EsParserMpeg1Audio() { | |
366 } | |
367 | |
368 bool EsParserMpeg1Audio::Parse( | |
369 const uint8* buf, int size, | |
370 base::TimeDelta pts, | |
371 DecodeTimestamp dts) { | |
372 // The incoming PTS applies to the access unit that comes just after | |
373 // the beginning of |buf|. | |
374 if (pts != kNoTimestamp()) | |
375 pts_list_.push_back(EsPts(es_queue_->tail(), pts)); | |
376 | |
377 // Copy the input data to the ES buffer. | |
378 es_queue_->Push(buf, size); | |
379 | |
380 // Look for every MPEG1 audio frame in the ES buffer. | |
381 Mpeg1AudioFrame mpeg1audio_frame; | |
382 while (LookForMpeg1AudioFrame(&mpeg1audio_frame)) { | |
383 // Update the audio configuration if needed. | |
384 DCHECK_GE(mpeg1audio_frame.size, kMpegAudioHeaderMinSize); | |
385 if (!UpdateAudioConfiguration(mpeg1audio_frame.data)) | |
386 return false; | |
387 | |
388 // Get the PTS & the duration of this access unit. | |
389 while (!pts_list_.empty() && | |
390 pts_list_.front().first <= mpeg1audio_frame.queue_offset) { | |
391 audio_timestamp_helper_->SetBaseTimestamp(pts_list_.front().second); | |
392 pts_list_.pop_front(); | |
393 } | |
394 | |
395 if (audio_timestamp_helper_->base_timestamp() == kNoTimestamp()) { | |
396 DVLOG(1) << "Audio frame with unknown timestamp"; | |
397 return false; | |
398 } | |
399 base::TimeDelta current_pts = audio_timestamp_helper_->GetTimestamp(); | |
400 base::TimeDelta frame_duration = | |
401 audio_timestamp_helper_->GetFrameDuration( | |
402 mpeg1audio_frame.sample_count); | |
403 | |
404 // Emit an audio frame. | |
405 bool is_key_frame = true; | |
406 | |
407 // TODO(wolenetz/acolwell): Validate and use a common cross-parser TrackId | |
408 // type and allow multiple audio tracks. See https://crbug.com/341581. | |
409 scoped_refptr<StreamParserBuffer> stream_parser_buffer = | |
410 StreamParserBuffer::CopyFrom( | |
411 mpeg1audio_frame.data, | |
412 mpeg1audio_frame.size, | |
413 is_key_frame, | |
414 DemuxerStream::AUDIO, 0); | |
415 stream_parser_buffer->set_timestamp(current_pts); | |
416 stream_parser_buffer->set_duration(frame_duration); | |
417 emit_buffer_cb_.Run(stream_parser_buffer); | |
418 | |
419 // Update the PTS of the next frame. | |
420 audio_timestamp_helper_->AddFrames(mpeg1audio_frame.sample_count); | |
421 | |
422 // Skip the current frame. | |
423 SkipMpeg1AudioFrame(mpeg1audio_frame); | |
424 } | |
425 | |
426 return true; | |
427 } | |
428 | |
429 void EsParserMpeg1Audio::Flush() { | |
430 } | |
431 | |
432 void EsParserMpeg1Audio::Reset() { | |
433 es_queue_.reset(new media::OffsetByteQueue()); | |
434 pts_list_.clear(); | |
435 last_audio_decoder_config_ = AudioDecoderConfig(); | |
436 } | |
437 | |
438 bool EsParserMpeg1Audio::UpdateAudioConfiguration( | |
439 const uint8* mpeg1audio_header) { | |
440 int frame_size; | |
441 int sample_rate; | |
442 ChannelLayout channel_layout; | |
443 int sample_count; | |
444 bool metadata_frame; | |
445 int header_size = | |
446 ParseMpegAudioFrameHeader(mpeg1audio_header, kMpegAudioHeaderMinSize, | |
447 &frame_size, &sample_rate, &channel_layout, | |
448 &sample_count, &metadata_frame); | |
449 if (header_size < 0) | |
450 return false; | |
451 | |
452 #if 0 | |
453 // The following code is written according to ISO 14496 Part 3 Table 1.13 - | |
454 // Syntax of AudioSpecificConfig. | |
455 uint16 extra_data_int = | |
456 // Note: adts_profile is in the range [0,3], since the ADTS header only | |
457 // allows two bits for its value. | |
458 ((adts_profile + 1) << 11) + | |
459 (frequency_index << 7) + | |
460 (channel_configuration << 3); | |
461 uint8 extra_data[2] = { | |
462 static_cast<uint8>(extra_data_int >> 8), | |
463 static_cast<uint8>(extra_data_int & 0xff) | |
464 }; | |
465 #endif | |
466 | |
467 AudioDecoderConfig audio_decoder_config( | |
468 kCodecMP3, | |
469 kSampleFormatS16, | |
470 channel_layout, | |
471 sample_rate, | |
472 NULL, 0, | |
473 false); | |
474 | |
475 if (!audio_decoder_config.Matches(last_audio_decoder_config_)) { | |
476 DVLOG(1) << "Sampling frequency: " << sample_rate; | |
477 // Reset the timestamp helper to use a new time scale. | |
478 if (audio_timestamp_helper_ && | |
479 audio_timestamp_helper_->base_timestamp() != kNoTimestamp()) { | |
480 base::TimeDelta base_timestamp = audio_timestamp_helper_->GetTimestamp(); | |
481 audio_timestamp_helper_.reset( | |
482 new AudioTimestampHelper(sample_rate)); | |
483 audio_timestamp_helper_->SetBaseTimestamp(base_timestamp); | |
484 } else { | |
485 audio_timestamp_helper_.reset( | |
486 new AudioTimestampHelper(sample_rate)); | |
487 } | |
488 // Audio config notification. | |
489 last_audio_decoder_config_ = audio_decoder_config; | |
490 new_audio_config_cb_.Run(audio_decoder_config); | |
491 } | |
492 | |
493 return true; | |
494 } | |
495 | |
496 } // namespace mp2t | |
497 } // namespace media | |
OLD | NEW |