chrome/browser/media_galleries/fileapi/itunes_library_parser.cc - Issue 16231016: Extract track information from iTunes library xml file.

Unified Diff: chrome/browser/media_galleries/fileapi/itunes_library_parser.cc

Issue 16231016: Extract track information from iTunes library xml file. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src

Patch Set: Created 7 years, 7 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

« chrome/browser/media_galleries/fileapi/itunes_library_parser.h ('K') | « chrome/browser/media_galleries/fileapi/itunes_library_parser.h ('k') | chrome/browser/media_galleries/fileapi/itunes_library_parser_unittest.cc » ('j') | chrome/browser/media_galleries/fileapi/itunes_library_parser_unittest.cc » ('J')
Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

Index: chrome/browser/media_galleries/fileapi/itunes_library_parser.cc

diff --git a/chrome/browser/media_galleries/fileapi/itunes_library_parser.cc b/chrome/browser/media_galleries/fileapi/itunes_library_parser.cc

new file mode 100644

index 0000000000000000000000000000000000000000..788ebc44eae5d23cda219f7c2a8113581fe5ff1a

--- /dev/null

+++ b/chrome/browser/media_galleries/fileapi/itunes_library_parser.cc

@@ -0,0 +1,261 @@

+// Use of this source code is governed by a BSD-style license that can be

+// found in the LICENSE file.

+#include "chrome/browser/media_galleries/fileapi/itunes_library_parser.h"

+#include <string>

+#include "base/logging.h"

+#include "base/string16.h"

+#include "base/strings/string_number_conversions.h"

+#include "base/strings/utf_string_conversions.h"

+#include "googleurl/src/gurl.h"

+#include "googleurl/src/url_canon.h"

+#include "googleurl/src/url_util.h"

+#include "third_party/libxml/chromium/libxml_utils.h"

+namespace itunes {

+namespace {

+struct TrackInfo {

+ uint32_t id;

+ base::FilePath location;

+ std::string artist;

+ std::string album;

+};

+// Traverse |reader| looking for a node named |name| at the current depth

+// of |reader|.

+bool SeekToNodeAtCurrentDepth(XmlReader* reader, const std::string& name) {

+ int depth = reader->Depth();

+ do {

+ if (!reader->SkipToElement()) {

+ // SkipToElement returns false if the current node is an end element,

+ // try to advance to the next element and then try again.

+ if (!reader->Read() || !reader->SkipToElement())

+ return false;

+ }

+ DCHECK_EQ(depth, reader->Depth());

+ if (reader->NodeName() == name)

+ return true;

+ } while (reader->Next());

+ return false;

+// Search within the dict for |key|.

+bool SeekInDict(XmlReader* reader, const std::string& key) {

+ DCHECK_EQ("dict", reader->NodeName());

+ int dict_content_depth = reader->Depth() + 1;

+ // Advance past the dict node and into the body of the dictionary.

+ if (!reader->Read())

+ return false;

+ while (reader->Depth() >= dict_content_depth) {

+ if (!SeekToNodeAtCurrentDepth(reader, "key"))

+ return false;

+ std::string found_key;

+ if (!reader->ReadElementContent(&found_key))

+ return false;

+ DCHECK_EQ(dict_content_depth, reader->Depth());

+ if (found_key == key)

+ return true;

+ }

+ return false;

+// Seek to the start of a tag and read the value into |result| if the node's

+// name is |node_name|.

+bool ReadSimpleValue(XmlReader* reader, const std::string& node_name,

+ std::string* result) {

+ if (!reader->SkipToElement()) {

+ // SkipToElement returns false if the current node is an end element,

+ // try to advance to the next element and then try again.

+ if (!reader->Read() || !reader->SkipToElement())

+ return false;

+ }

+ if (reader->NodeName() != node_name)

+ return false;

+ return reader->ReadElementContent(result);

+// Get the value out of a string node.

+bool ReadString(XmlReader* reader, std::string* result) {

+ return ReadSimpleValue(reader, "string", result);

+// Get the value out of an integer node.

+bool ReadInteger(XmlReader* reader, uint32_t* result) {

+ std::string value;

+ if (!ReadSimpleValue(reader, "integer", &value))

+ return false;

+ return base::StringToUint(value, result);

+// Walk through a dictionary filling in |result| with track information. Return

+// true if it was all found, false otherwise. In either case, the curser is

Lei Zhang 2013/05/31 04:34:35 typo - cursor

vandebo (ex-Chrome) 2013/05/31 21:41:12 Done.

+// advanced out of the dictionary.

+bool GetTrackInfoFromDict(XmlReader* reader, TrackInfo* result) {

+ DCHECK(result != NULL);

Lei Zhang 2013/05/31 04:34:35 nit: DCHECK(result);

vandebo (ex-Chrome) 2013/05/31 21:41:12 Done.

+ DCHECK_EQ("dict", reader->NodeName());

Lei Zhang 2013/05/31 04:34:35 Can there be a malformed XML file that fails this

vandebo (ex-Chrome) 2013/05/31 21:41:12 Done.

+ int dict_content_depth = reader->Depth() + 1;

Lei Zhang 2013/05/31 04:34:35 Can someone create a big, malicious XML file that

vandebo (ex-Chrome) 2013/05/31 21:41:12 Probably, but to what end? If they control the XM

Lei Zhang 2013/05/31 22:06:53 With the way it's used, I just want to make sure w

vandebo (ex-Chrome) 2013/05/31 22:21:12 In infinite loop isn't possible because we'll alwa

+ // Advance past the dict node and into the body of the dictionary.

+ if (!reader->Read())

+ return false;

+ bool found_id = false;

+ bool found_location = false;

+ bool found_artist = false;

+ bool found_album = false;

+ while (reader->Depth() >= dict_content_depth &&

+ !(found_id && found_location && found_artist && found_album)) {

+ if (!SeekToNodeAtCurrentDepth(reader, "key"))

+ break;

+ std::string found_key;

+ if (!reader->ReadElementContent(&found_key))

+ break;

+ DCHECK_EQ(dict_content_depth, reader->Depth());

+ if (found_key == "Track ID") {

+ if (found_id)

+ break;

+ if (!ReadInteger(reader, &result->id))

+ break;

+ found_id = true;

+ } else if (found_key == "Location") {

+ // TODO(vandebo): strip file::/localhost/.

+ if (found_location)

+ break;

Lei Zhang 2013/05/31 04:34:35 Are you breaking out of the while loop because it

vandebo (ex-Chrome) 2013/05/31 21:41:12 Yes. We expect one entry for each these fields. If

+ std::string value;

+ if (!ReadString(reader, &value))

+ break;

+ GURL url(value);

+ if (!url.SchemeIsFile() || url.host() != "localhost")

+ break;

+ url_canon::RawCanonOutputW<1024> decoded_location;

+ url_util::DecodeURLEscapeSequences(url.path().c_str() + 1, // Strip /.

+ url.path().length() - 1,

+ &decoded_location);

+#if defined(OS_WIN)

+ string16 location(decoded_location.data(), decoded_location.length());

+#else

+ string16 location16(decoded_location.data(), decoded_location.length());

+ std::string location = UTF16ToUTF8(location16);

+#endif

+ result->location = base::FilePath(location);

+ found_location = true;

+ } else if (found_key == "Album Artist") {

+ if (found_artist)

+ break;

+ if (!ReadString(reader, &result->artist))

+ break;

+ found_artist = true;

+ } else if (found_key == "Album") {

+ if (found_album)

+ break;

+ if (!ReadString(reader, &result->album))

+ break;

+ found_album = true;

+ } else {

+ if (!reader->SkipToElement()) {

+ // SkipToElement returns false if the current node is an end element,

+ // try to advance to the next element and then try again.

+ if (!reader->Read() || !reader->SkipToElement())

+ break;

+ }

+ std::string value;

Lei Zhang 2013/05/31 04:34:35 Explain why this value is read into and then ignor

vandebo (ex-Chrome) 2013/05/31 21:41:12 Done.

+ if (!reader->ReadElementContent(&value))

+ break;

+ }

+ // Seek to the end of the dictionary

+ while (reader->Depth() >= dict_content_depth) {

+ reader->Next();

+ }

+ return found_id && found_location && found_artist && found_album;

+} // namespace

+ITunesLibraryParser::Track::Track(uint32_t id, const base::FilePath& location)

+ : id(id),

+ location(location) {

+bool ITunesLibraryParser::Track::operator<(const Track& other) const {

+ return id < other.id;

+ITunesLibraryParser::ITunesLibraryParser() {}

+bool ITunesLibraryParser::Parse(const std::string& library_xml) {

+ XmlReader reader;

+ if (!reader.Load(library_xml))

+ return false;

+ // Find the plist node and then search within that tag.

+ if (!SeekToNodeAtCurrentDepth(&reader, "plist"))

+ return false;

+ if (!reader.Read())

+ return false;

+ if (!SeekToNodeAtCurrentDepth(&reader, "dict"))

+ return false;

+ if (!SeekInDict(&reader, "Tracks"))

+ return false;

+ // Once inside the Tracks dict, we expect track dictionaries keyed by id. i.e.

+ // <key>Tracks</key>

+ // <dict>

+ // <key>160</key>

+ // <dict>

+ // <key>Track ID</key><integer>160</integer>

+ if (!SeekToNodeAtCurrentDepth(&reader, "dict"))

+ return false;

+ int tracks_dict_depth = reader.Depth() + 1;

+ if (!reader.Read())

+ return false;

+ // Once parsing has gotten this far, return what ever is found, even if

+ // some of the data isn't extracted just right.

+ bool no_errors = true;

+ bool track_found = false;

+ while (reader.Depth() >= tracks_dict_depth) {

+ if (!SeekToNodeAtCurrentDepth(&reader, "key")) {

+ no_errors = false;

Lei Zhang 2013/05/31 04:34:35 why not just return track_found at this point?

vandebo (ex-Chrome) 2013/05/31 21:41:12 Done.

+ break;

+ }

+ std::string key; // Should match track id below.

+ if (!reader.ReadElementContent(&key)) {

+ no_errors = false;

+ break;

+ }

+ uint32_t id;

+ base::StringToUint(key, &id);

Lei Zhang 2013/05/31 04:34:35 Check return result.

vandebo (ex-Chrome) 2013/05/31 21:41:12 Done.

+ if (!reader.Read()) {

+ no_errors = false;

+ break;

+ }

+ TrackInfo track_info;

+ if (GetTrackInfoFromDict(&reader, &track_info) && id == track_info.id) {

+ Track track(track_info.id, track_info.location);

+ library_[track_info.artist][track_info.album].insert(track);

Lei Zhang 2013/05/31 04:34:35 Do you care if there already exists a track with t

vandebo (ex-Chrome) 2013/05/31 21:41:12 ids should be unique. If that's not the case, I'm

+ track_found = true;

+ } else {

+ no_errors = false;

+ }

+ return track_found || no_errors;

+} // namespace itunes