Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1149)

Unified Diff: chrome/browser/media_galleries/fileapi/itunes_library_parser.cc

Issue 16231016: Extract track information from iTunes library xml file. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: Created 7 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: chrome/browser/media_galleries/fileapi/itunes_library_parser.cc
diff --git a/chrome/browser/media_galleries/fileapi/itunes_library_parser.cc b/chrome/browser/media_galleries/fileapi/itunes_library_parser.cc
new file mode 100644
index 0000000000000000000000000000000000000000..788ebc44eae5d23cda219f7c2a8113581fe5ff1a
--- /dev/null
+++ b/chrome/browser/media_galleries/fileapi/itunes_library_parser.cc
@@ -0,0 +1,261 @@
+// Copyright (c) 2013 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "chrome/browser/media_galleries/fileapi/itunes_library_parser.h"
+
+#include <string>
+
+#include "base/logging.h"
+#include "base/string16.h"
+#include "base/strings/string_number_conversions.h"
+#include "base/strings/utf_string_conversions.h"
+#include "googleurl/src/gurl.h"
+#include "googleurl/src/url_canon.h"
+#include "googleurl/src/url_util.h"
+#include "third_party/libxml/chromium/libxml_utils.h"
+
+namespace itunes {
+
+namespace {
+
+struct TrackInfo {
+ uint32_t id;
+ base::FilePath location;
+ std::string artist;
+ std::string album;
+};
+
+// Traverse |reader| looking for a node named |name| at the current depth
+// of |reader|.
+bool SeekToNodeAtCurrentDepth(XmlReader* reader, const std::string& name) {
+ int depth = reader->Depth();
+ do {
+ if (!reader->SkipToElement()) {
+ // SkipToElement returns false if the current node is an end element,
+ // try to advance to the next element and then try again.
+ if (!reader->Read() || !reader->SkipToElement())
+ return false;
+ }
+ DCHECK_EQ(depth, reader->Depth());
+ if (reader->NodeName() == name)
+ return true;
+ } while (reader->Next());
+
+ return false;
+}
+
+// Search within the dict for |key|.
+bool SeekInDict(XmlReader* reader, const std::string& key) {
+ DCHECK_EQ("dict", reader->NodeName());
+
+ int dict_content_depth = reader->Depth() + 1;
+ // Advance past the dict node and into the body of the dictionary.
+ if (!reader->Read())
+ return false;
+
+ while (reader->Depth() >= dict_content_depth) {
+ if (!SeekToNodeAtCurrentDepth(reader, "key"))
+ return false;
+ std::string found_key;
+ if (!reader->ReadElementContent(&found_key))
+ return false;
+ DCHECK_EQ(dict_content_depth, reader->Depth());
+ if (found_key == key)
+ return true;
+ }
+ return false;
+}
+
+// Seek to the start of a tag and read the value into |result| if the node's
+// name is |node_name|.
+bool ReadSimpleValue(XmlReader* reader, const std::string& node_name,
+ std::string* result) {
+ if (!reader->SkipToElement()) {
+ // SkipToElement returns false if the current node is an end element,
+ // try to advance to the next element and then try again.
+ if (!reader->Read() || !reader->SkipToElement())
+ return false;
+ }
+ if (reader->NodeName() != node_name)
+ return false;
+ return reader->ReadElementContent(result);
+}
+
+// Get the value out of a string node.
+bool ReadString(XmlReader* reader, std::string* result) {
+ return ReadSimpleValue(reader, "string", result);
+}
+
+// Get the value out of an integer node.
+bool ReadInteger(XmlReader* reader, uint32_t* result) {
+ std::string value;
+ if (!ReadSimpleValue(reader, "integer", &value))
+ return false;
+ return base::StringToUint(value, result);
+}
+
+// Walk through a dictionary filling in |result| with track information. Return
+// true if it was all found, false otherwise. In either case, the curser is
Lei Zhang 2013/05/31 04:34:35 typo - cursor
vandebo (ex-Chrome) 2013/05/31 21:41:12 Done.
+// advanced out of the dictionary.
+bool GetTrackInfoFromDict(XmlReader* reader, TrackInfo* result) {
+ DCHECK(result != NULL);
Lei Zhang 2013/05/31 04:34:35 nit: DCHECK(result);
vandebo (ex-Chrome) 2013/05/31 21:41:12 Done.
+ DCHECK_EQ("dict", reader->NodeName());
Lei Zhang 2013/05/31 04:34:35 Can there be a malformed XML file that fails this
vandebo (ex-Chrome) 2013/05/31 21:41:12 Done.
+
+ int dict_content_depth = reader->Depth() + 1;
Lei Zhang 2013/05/31 04:34:35 Can someone create a big, malicious XML file that
vandebo (ex-Chrome) 2013/05/31 21:41:12 Probably, but to what end? If they control the XM
Lei Zhang 2013/05/31 22:06:53 With the way it's used, I just want to make sure w
vandebo (ex-Chrome) 2013/05/31 22:21:12 In infinite loop isn't possible because we'll alwa
+ // Advance past the dict node and into the body of the dictionary.
+ if (!reader->Read())
+ return false;
+
+ bool found_id = false;
+ bool found_location = false;
+ bool found_artist = false;
+ bool found_album = false;
+ while (reader->Depth() >= dict_content_depth &&
+ !(found_id && found_location && found_artist && found_album)) {
+ if (!SeekToNodeAtCurrentDepth(reader, "key"))
+ break;
+ std::string found_key;
+ if (!reader->ReadElementContent(&found_key))
+ break;
+ DCHECK_EQ(dict_content_depth, reader->Depth());
+
+ if (found_key == "Track ID") {
+ if (found_id)
+ break;
+ if (!ReadInteger(reader, &result->id))
+ break;
+ found_id = true;
+ } else if (found_key == "Location") {
+ // TODO(vandebo): strip file::/localhost/.
+ if (found_location)
+ break;
Lei Zhang 2013/05/31 04:34:35 Are you breaking out of the while loop because it
vandebo (ex-Chrome) 2013/05/31 21:41:12 Yes. We expect one entry for each these fields. If
+ std::string value;
+ if (!ReadString(reader, &value))
+ break;
+ GURL url(value);
+ if (!url.SchemeIsFile() || url.host() != "localhost")
+ break;
+ url_canon::RawCanonOutputW<1024> decoded_location;
+ url_util::DecodeURLEscapeSequences(url.path().c_str() + 1, // Strip /.
+ url.path().length() - 1,
+ &decoded_location);
+#if defined(OS_WIN)
+ string16 location(decoded_location.data(), decoded_location.length());
+#else
+ string16 location16(decoded_location.data(), decoded_location.length());
+ std::string location = UTF16ToUTF8(location16);
+#endif
+ result->location = base::FilePath(location);
+ found_location = true;
+ } else if (found_key == "Album Artist") {
+ if (found_artist)
+ break;
+ if (!ReadString(reader, &result->artist))
+ break;
+ found_artist = true;
+ } else if (found_key == "Album") {
+ if (found_album)
+ break;
+ if (!ReadString(reader, &result->album))
+ break;
+ found_album = true;
+ } else {
+ if (!reader->SkipToElement()) {
+ // SkipToElement returns false if the current node is an end element,
+ // try to advance to the next element and then try again.
+ if (!reader->Read() || !reader->SkipToElement())
+ break;
+ }
+ std::string value;
Lei Zhang 2013/05/31 04:34:35 Explain why this value is read into and then ignor
vandebo (ex-Chrome) 2013/05/31 21:41:12 Done.
+ if (!reader->ReadElementContent(&value))
+ break;
+ }
+ }
+
+ // Seek to the end of the dictionary
+ while (reader->Depth() >= dict_content_depth) {
+ reader->Next();
+ }
+
+ return found_id && found_location && found_artist && found_album;
+}
+
+} // namespace
+
+ITunesLibraryParser::Track::Track(uint32_t id, const base::FilePath& location)
+ : id(id),
+ location(location) {
+}
+
+bool ITunesLibraryParser::Track::operator<(const Track& other) const {
+ return id < other.id;
+}
+
+ITunesLibraryParser::ITunesLibraryParser() {}
+
+bool ITunesLibraryParser::Parse(const std::string& library_xml) {
+ XmlReader reader;
+
+ if (!reader.Load(library_xml))
+ return false;
+
+ // Find the plist node and then search within that tag.
+ if (!SeekToNodeAtCurrentDepth(&reader, "plist"))
+ return false;
+ if (!reader.Read())
+ return false;
+
+ if (!SeekToNodeAtCurrentDepth(&reader, "dict"))
+ return false;
+
+ if (!SeekInDict(&reader, "Tracks"))
+ return false;
+
+ // Once inside the Tracks dict, we expect track dictionaries keyed by id. i.e.
+ // <key>Tracks</key>
+ // <dict>
+ // <key>160</key>
+ // <dict>
+ // <key>Track ID</key><integer>160</integer>
+ if (!SeekToNodeAtCurrentDepth(&reader, "dict"))
+ return false;
+ int tracks_dict_depth = reader.Depth() + 1;
+ if (!reader.Read())
+ return false;
+
+ // Once parsing has gotten this far, return what ever is found, even if
+ // some of the data isn't extracted just right.
+ bool no_errors = true;
+ bool track_found = false;
+ while (reader.Depth() >= tracks_dict_depth) {
+ if (!SeekToNodeAtCurrentDepth(&reader, "key")) {
+ no_errors = false;
Lei Zhang 2013/05/31 04:34:35 why not just return track_found at this point?
vandebo (ex-Chrome) 2013/05/31 21:41:12 Done.
+ break;
+ }
+ std::string key; // Should match track id below.
+ if (!reader.ReadElementContent(&key)) {
+ no_errors = false;
+ break;
+ }
+ uint32_t id;
+ base::StringToUint(key, &id);
Lei Zhang 2013/05/31 04:34:35 Check return result.
vandebo (ex-Chrome) 2013/05/31 21:41:12 Done.
+ if (!reader.Read()) {
+ no_errors = false;
+ break;
+ }
+
+ TrackInfo track_info;
+ if (GetTrackInfoFromDict(&reader, &track_info) && id == track_info.id) {
+ Track track(track_info.id, track_info.location);
+ library_[track_info.artist][track_info.album].insert(track);
Lei Zhang 2013/05/31 04:34:35 Do you care if there already exists a track with t
vandebo (ex-Chrome) 2013/05/31 21:41:12 ids should be unique. If that's not the case, I'm
+ track_found = true;
+ } else {
+ no_errors = false;
+ }
+ }
+
+ return track_found || no_errors;
+}
+
+} // namespace itunes

Powered by Google App Engine
This is Rietveld 408576698