OLD | NEW |
(Empty) | |
| 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. |
| 4 |
| 5 #include "media/webm/webm_parser.h" |
| 6 |
| 7 // This file contains code to parse WebM file elements. It was created |
| 8 // from information in the Matroska spec. |
| 9 // http://www.matroska.org/technical/specs/index.html |
| 10 |
| 11 #include <iomanip> |
| 12 |
| 13 #include "base/logging.h" |
| 14 #include "media/webm/webm_constants.h" |
| 15 |
| 16 namespace media { |
| 17 |
| 18 // Maximum depth of WebM elements. Some WebM elements are lists of |
| 19 // other elements. This limits the number levels of recursion allowed. |
| 20 static const int kMaxLevelDepth = 6; |
| 21 |
| 22 enum ElementType { |
| 23 LIST, |
| 24 UINT, |
| 25 FLOAT, |
| 26 BINARY, |
| 27 STRING, |
| 28 SBLOCK, |
| 29 SKIP, |
| 30 }; |
| 31 |
| 32 struct ElementIdInfo { |
| 33 int level_; |
| 34 ElementType type_; |
| 35 int id_; |
| 36 }; |
| 37 |
| 38 struct ListElementInfo { |
| 39 int id_; |
| 40 const ElementIdInfo* id_info_; |
| 41 int id_info_size_; |
| 42 }; |
| 43 |
| 44 // The following are tables indicating what IDs are valid sub-elements |
| 45 // of particular elements. If an element is encountered that doesn't |
| 46 // appear in the list, a parsing error is signalled. Some elements are |
| 47 // marked as SKIP because they are valid, but we don't care about them |
| 48 // right now. |
| 49 static const ElementIdInfo kClusterIds[] = { |
| 50 {2, UINT, kWebMIdTimecode}, |
| 51 {2, SBLOCK, kWebMIdSimpleBlock}, |
| 52 {2, LIST, kWebMIdBlockGroup}, |
| 53 }; |
| 54 |
| 55 static const ElementIdInfo kInfoIds[] = { |
| 56 {2, SKIP, kWebMIdSegmentUID}, |
| 57 {2, UINT, kWebMIdTimecodeScale}, |
| 58 {2, FLOAT, kWebMIdDuration}, |
| 59 {2, SKIP, kWebMIdDateUTC}, |
| 60 {2, SKIP, kWebMIdTitle}, |
| 61 {2, SKIP, kWebMIdMuxingApp}, |
| 62 {2, SKIP, kWebMIdWritingApp}, |
| 63 }; |
| 64 |
| 65 static const ElementIdInfo kTracksIds[] = { |
| 66 {2, LIST, kWebMIdTrackEntry}, |
| 67 }; |
| 68 |
| 69 static const ElementIdInfo kTrackEntryIds[] = { |
| 70 {3, UINT, kWebMIdTrackNumber}, |
| 71 {3, SKIP, kWebMIdTrackUID}, |
| 72 {3, UINT, kWebMIdTrackType}, |
| 73 {3, SKIP, kWebMIdFlagEnabled}, |
| 74 {3, SKIP, kWebMIdFlagDefault}, |
| 75 {3, SKIP, kWebMIdFlagForced}, |
| 76 {3, UINT, kWebMIdFlagLacing}, |
| 77 {3, UINT, kWebMIdDefaultDuration}, |
| 78 {3, SKIP, kWebMIdName}, |
| 79 {3, SKIP, kWebMIdLanguage}, |
| 80 {3, STRING, kWebMIdCodecID}, |
| 81 {3, BINARY, kWebMIdCodecPrivate}, |
| 82 {3, SKIP, kWebMIdCodecName}, |
| 83 {3, LIST, kWebMIdVideo}, |
| 84 {3, LIST, kWebMIdAudio}, |
| 85 }; |
| 86 |
| 87 static const ElementIdInfo kVideoIds[] = { |
| 88 {4, SKIP, kWebMIdFlagInterlaced}, |
| 89 {4, SKIP, kWebMIdStereoMode}, |
| 90 {4, UINT, kWebMIdPixelWidth}, |
| 91 {4, UINT, kWebMIdPixelHeight}, |
| 92 {4, SKIP, kWebMIdPixelCropBottom}, |
| 93 {4, SKIP, kWebMIdPixelCropTop}, |
| 94 {4, SKIP, kWebMIdPixelCropLeft}, |
| 95 {4, SKIP, kWebMIdPixelCropRight}, |
| 96 {4, SKIP, kWebMIdDisplayWidth}, |
| 97 {4, SKIP, kWebMIdDisplayHeight}, |
| 98 {4, SKIP, kWebMIdDisplayUnit}, |
| 99 {4, SKIP, kWebMIdAspectRatioType}, |
| 100 }; |
| 101 |
| 102 static const ElementIdInfo kAudioIds[] = { |
| 103 {4, SKIP, kWebMIdSamplingFrequency}, |
| 104 {4, SKIP, kWebMIdOutputSamplingFrequency}, |
| 105 {4, UINT, kWebMIdChannels}, |
| 106 {4, SKIP, kWebMIdBitDepth}, |
| 107 }; |
| 108 |
| 109 static const ElementIdInfo kClustersOnly[] = { |
| 110 {1, LIST, kWebMIdCluster}, |
| 111 }; |
| 112 |
| 113 static const ListElementInfo kListElementInfo[] = { |
| 114 { kWebMIdCluster, kClusterIds, sizeof(kClusterIds) }, |
| 115 { kWebMIdInfo, kInfoIds, sizeof(kInfoIds) }, |
| 116 { kWebMIdTracks, kTracksIds, sizeof(kTracksIds) }, |
| 117 { kWebMIdTrackEntry, kTrackEntryIds, sizeof(kTrackEntryIds) }, |
| 118 { kWebMIdVideo, kVideoIds, sizeof(kVideoIds) }, |
| 119 { kWebMIdAudio, kAudioIds, sizeof(kAudioIds) }, |
| 120 }; |
| 121 |
| 122 // Number of elements in kListElementInfo. |
| 123 const int kListElementInfoCount = |
| 124 sizeof(kListElementInfo) / sizeof(ListElementInfo); |
| 125 |
| 126 WebMParserClient::~WebMParserClient() {} |
| 127 |
| 128 // Parses an element header id or size field. These fields are variable length |
| 129 // encoded. The first byte indicates how many bytes the field occupies. |
| 130 // |buf| - The buffer to parse. |
| 131 // |size| - The number of bytes in |buf| |
| 132 // |max_bytes| - The maximum number of bytes the field can be. ID fields |
| 133 // set this to 4 & element size fields set this to 8. If the |
| 134 // first byte indicates a larger field size than this it is a |
| 135 // parser error. |
| 136 // |mask_first_byte| - For element size fields the field length encoding bits |
| 137 // need to be masked off. This parameter is true for |
| 138 // element size fields and is false for ID field values. |
| 139 // |
| 140 // Returns: The number of bytes parsed on success. -1 on error. |
| 141 static int ParseWebMElementHeaderField(const uint8* buf, int size, |
| 142 int max_bytes, bool mask_first_byte, |
| 143 int64* num) { |
| 144 DCHECK(buf); |
| 145 DCHECK(num); |
| 146 |
| 147 if (size <= 0) |
| 148 return -1; |
| 149 |
| 150 int mask = 0x80; |
| 151 uint8 ch = buf[0]; |
| 152 int extra_bytes = -1; |
| 153 for (int i = 0; i < max_bytes; ++i) { |
| 154 if ((ch & mask) == mask) { |
| 155 *num = mask_first_byte ? ch & ~mask : ch; |
| 156 extra_bytes = i; |
| 157 break; |
| 158 } |
| 159 mask >>= 1; |
| 160 } |
| 161 |
| 162 if ((extra_bytes == -1) || ((1 + extra_bytes) > size)) |
| 163 return -1; |
| 164 |
| 165 int bytes_used = 1; |
| 166 |
| 167 for (int i = 0; i < extra_bytes; ++i) |
| 168 *num = (*num << 8) | (0xff & buf[bytes_used++]); |
| 169 |
| 170 return bytes_used; |
| 171 } |
| 172 |
| 173 // Parses an element header & returns the ID and element size. |
| 174 // |
| 175 // Returns: The number of bytes parsed on success. -1 on error. |
| 176 // |*id| contains the element ID on success & undefined on error. |
| 177 // |*element_size| contains the element size on success & undefined on error. |
| 178 static int ParseWebMElementHeader(const uint8* buf, int size, |
| 179 int* id, int64* element_size) { |
| 180 DCHECK(buf); |
| 181 DCHECK_GE(size, 0); |
| 182 DCHECK(id); |
| 183 DCHECK(element_size); |
| 184 |
| 185 if (size == 0) |
| 186 return 0; |
| 187 |
| 188 int64 tmp; |
| 189 int num_id_bytes = ParseWebMElementHeaderField(buf, size, 4, false, &tmp); |
| 190 |
| 191 if (num_id_bytes <= 0) |
| 192 return num_id_bytes; |
| 193 |
| 194 *id = static_cast<int>(tmp); |
| 195 |
| 196 int num_size_bytes = ParseWebMElementHeaderField(buf + num_id_bytes, |
| 197 size - num_id_bytes, |
| 198 8, true, &tmp); |
| 199 |
| 200 if (num_size_bytes <= 0) |
| 201 return num_size_bytes; |
| 202 |
| 203 *element_size = tmp; |
| 204 return num_id_bytes + num_size_bytes; |
| 205 } |
| 206 |
| 207 // Finds ElementIdInfo for a specific ID. |
| 208 static const ElementIdInfo* FindIdInfo(int id, |
| 209 const ElementIdInfo* id_info, |
| 210 int id_info_size) { |
| 211 int count = id_info_size / sizeof(*id_info); |
| 212 for (int i = 0; i < count; ++i) { |
| 213 if (id == id_info[i].id_) |
| 214 return &id_info[i]; |
| 215 } |
| 216 |
| 217 return NULL; |
| 218 } |
| 219 |
| 220 // Finds ListElementInfo for a specific ID. |
| 221 static const ListElementInfo* FindListInfo(int id) { |
| 222 for (int i = 0; i < kListElementInfoCount; ++i) { |
| 223 if (id == kListElementInfo[i].id_) |
| 224 return &kListElementInfo[i]; |
| 225 } |
| 226 |
| 227 return NULL; |
| 228 } |
| 229 |
| 230 static int ParseSimpleBlock(const uint8* buf, int size, |
| 231 WebMParserClient* client) { |
| 232 if (size < 4) |
| 233 return -1; |
| 234 |
| 235 // Return an error if the trackNum > 127. We just aren't |
| 236 // going to support large track numbers right now. |
| 237 if ((buf[0] & 0x80) != 0x80) { |
| 238 VLOG(1) << "TrackNumber over 127 not supported"; |
| 239 return -1; |
| 240 } |
| 241 |
| 242 int track_num = buf[0] & 0x7f; |
| 243 int timecode = buf[1] << 8 | buf[2]; |
| 244 int flags = buf[3] & 0xff; |
| 245 int lacing = (flags >> 1) & 0x3; |
| 246 |
| 247 if (lacing != 0) { |
| 248 VLOG(1) << "Lacing " << lacing << " not supported yet."; |
| 249 return -1; |
| 250 } |
| 251 |
| 252 // Sign extend negative timecode offsets. |
| 253 if (timecode & 0x8000) |
| 254 timecode |= (-1 << 16); |
| 255 |
| 256 const uint8* frame_data = buf + 4; |
| 257 int frame_size = size - (frame_data - buf); |
| 258 if (!client->OnSimpleBlock(track_num, timecode, flags, |
| 259 frame_data, frame_size)) { |
| 260 return -1; |
| 261 } |
| 262 |
| 263 return size; |
| 264 } |
| 265 |
| 266 static int ParseElements(const ElementIdInfo* id_info, |
| 267 int id_info_size, |
| 268 const uint8* buf, int size, int level, |
| 269 WebMParserClient* client); |
| 270 |
| 271 static int ParseElementList(const uint8* buf, int size, |
| 272 int id, int level, |
| 273 WebMParserClient* client) { |
| 274 const ListElementInfo* list_info = FindListInfo(id); |
| 275 |
| 276 if (!list_info) { |
| 277 VLOG(1) << "Failed to find list info for ID " << std::hex << id; |
| 278 return -1; |
| 279 } |
| 280 |
| 281 if (!client->OnListStart(id)) |
| 282 return -1; |
| 283 |
| 284 int res = ParseElements(list_info->id_info_, |
| 285 list_info->id_info_size_, |
| 286 buf, size, |
| 287 level + 1, |
| 288 client); |
| 289 |
| 290 if (res < 0) |
| 291 return -1; |
| 292 |
| 293 if (!client->OnListEnd(id)) |
| 294 return -1; |
| 295 |
| 296 DCHECK_EQ(res, size); |
| 297 return res; |
| 298 } |
| 299 |
| 300 static int ParseUInt(const uint8* buf, int size, int id, |
| 301 WebMParserClient* client) { |
| 302 if ((size <= 0) || (size > 8)) |
| 303 return -1; |
| 304 |
| 305 // Read in the big-endian integer. |
| 306 int64 value = 0; |
| 307 for (int i = 0; i < size; ++i) |
| 308 value = (value << 8) | buf[i]; |
| 309 |
| 310 if (!client->OnUInt(id, value)) |
| 311 return -1; |
| 312 |
| 313 return size; |
| 314 } |
| 315 |
| 316 static int ParseFloat(const uint8* buf, int size, int id, |
| 317 WebMParserClient* client) { |
| 318 |
| 319 if ((size != 4) && (size != 8)) |
| 320 return -1; |
| 321 |
| 322 double value = -1; |
| 323 |
| 324 // Read the bytes from big-endian form into a native endian integer. |
| 325 int64 tmp = 0; |
| 326 for (int i = 0; i < size; ++i) |
| 327 tmp = (tmp << 8) | buf[i]; |
| 328 |
| 329 // Use a union to convert the integer bit pattern into a floating point |
| 330 // number. |
| 331 if (size == 4) { |
| 332 union { |
| 333 int32 src; |
| 334 float dst; |
| 335 } tmp2; |
| 336 tmp2.src = static_cast<int32>(tmp); |
| 337 value = tmp2.dst; |
| 338 } else if (size == 8) { |
| 339 union { |
| 340 int64 src; |
| 341 double dst; |
| 342 } tmp2; |
| 343 tmp2.src = tmp; |
| 344 value = tmp2.dst; |
| 345 } else { |
| 346 return -1; |
| 347 } |
| 348 |
| 349 if (!client->OnFloat(id, value)) |
| 350 return -1; |
| 351 |
| 352 return size; |
| 353 } |
| 354 |
| 355 static int ParseElements(const ElementIdInfo* id_info, |
| 356 int id_info_size, |
| 357 const uint8* buf, int size, int level, |
| 358 WebMParserClient* client) { |
| 359 DCHECK_GE(id_info_size, 0); |
| 360 DCHECK_GE(size, 0); |
| 361 DCHECK_GE(level, 0); |
| 362 |
| 363 const uint8* cur = buf; |
| 364 int cur_size = size; |
| 365 int used = 0; |
| 366 |
| 367 if (level > kMaxLevelDepth) |
| 368 return -1; |
| 369 |
| 370 while (cur_size > 0) { |
| 371 int id; |
| 372 int64 element_size; |
| 373 int res = ParseWebMElementHeader(cur, cur_size, &id, &element_size); |
| 374 |
| 375 if (res < 0) |
| 376 return res; |
| 377 |
| 378 if (res == 0) |
| 379 break; |
| 380 |
| 381 cur += res; |
| 382 cur_size -= res; |
| 383 used += res; |
| 384 |
| 385 // Check to see if the element is larger than the remaining data. |
| 386 if (element_size > cur_size) |
| 387 return -1; |
| 388 |
| 389 const ElementIdInfo* info = FindIdInfo(id, id_info, id_info_size); |
| 390 |
| 391 if (info == NULL) { |
| 392 VLOG(1) << "No info for ID " << std::hex << id; |
| 393 |
| 394 // TODO(acolwell): Change this to return -1 after the API has solidified. |
| 395 // We don't want to allow elements we don't recognize. |
| 396 cur += element_size; |
| 397 cur_size -= element_size; |
| 398 used += element_size; |
| 399 continue; |
| 400 } |
| 401 |
| 402 if (info->level_ != level) { |
| 403 VLOG(1) << "ID " << std::hex << id << std::dec << " at level " |
| 404 << level << " instead of " << info->level_; |
| 405 return -1; |
| 406 } |
| 407 |
| 408 switch(info->type_) { |
| 409 case SBLOCK: |
| 410 if (ParseSimpleBlock(cur, element_size, client) <= 0) |
| 411 return -1; |
| 412 break; |
| 413 case LIST: |
| 414 if (ParseElementList(cur, element_size, id, level, client) < 0) |
| 415 return -1; |
| 416 break; |
| 417 case UINT: |
| 418 if (ParseUInt(cur, element_size, id, client) <= 0) |
| 419 return -1; |
| 420 break; |
| 421 case FLOAT: |
| 422 if (ParseFloat(cur, element_size, id, client) <= 0) |
| 423 return -1; |
| 424 break; |
| 425 case BINARY: |
| 426 if (!client->OnBinary(id, cur, element_size)) |
| 427 return -1; |
| 428 break; |
| 429 case STRING: |
| 430 if (!client->OnString(id, |
| 431 std::string(reinterpret_cast<const char*>(cur), |
| 432 element_size))) |
| 433 return -1; |
| 434 break; |
| 435 case SKIP: |
| 436 // Do nothing. |
| 437 break; |
| 438 default: |
| 439 VLOG(1) << "Unhandled id type " << info->type_; |
| 440 return -1; |
| 441 }; |
| 442 |
| 443 cur += element_size; |
| 444 cur_size -= element_size; |
| 445 used += element_size; |
| 446 } |
| 447 |
| 448 return used; |
| 449 } |
| 450 |
| 451 // Parses a single list element that matches |id|. This method fails if the |
| 452 // buffer points to an element that does not match |id|. |
| 453 int WebMParseListElement(const uint8* buf, int size, int id, |
| 454 int level, WebMParserClient* client) { |
| 455 if (size == 0) |
| 456 return -1; |
| 457 |
| 458 const uint8* cur = buf; |
| 459 int cur_size = size; |
| 460 |
| 461 int element_id = 0; |
| 462 int64 element_size = 0; |
| 463 int res = ParseWebMElementHeader(cur, cur_size, &element_id, &element_size); |
| 464 |
| 465 if (res <= 0) |
| 466 return res; |
| 467 |
| 468 cur += res; |
| 469 cur_size -= res; |
| 470 |
| 471 if (element_id != id || element_size > cur_size) |
| 472 return -1; |
| 473 |
| 474 res = ParseElementList(cur, element_size, element_id, level, client); |
| 475 |
| 476 if (res < 0) |
| 477 return -1; |
| 478 |
| 479 cur += res; |
| 480 cur_size -= res; |
| 481 |
| 482 return size - cur_size; |
| 483 } |
| 484 |
| 485 } // namespace media |
OLD | NEW |