Index: chrome/browser/mork_reader.cc |
=================================================================== |
--- chrome/browser/mork_reader.cc (revision 2150) |
+++ chrome/browser/mork_reader.cc (working copy) |
@@ -1,581 +0,0 @@ |
-/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ |
-/* ***** BEGIN LICENSE BLOCK ***** |
- * Version: MPL 1.1/GPL 2.0/LGPL 2.1 |
- * |
- * The contents of this file are subject to the Mozilla Public License Version |
- * 1.1 (the "License"); you may not use this file except in compliance with |
- * the License. You may obtain a copy of the License at |
- * http://www.mozilla.org/MPL/ |
- * |
- * Software distributed under the License is distributed on an "AS IS" basis, |
- * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License |
- * for the specific language governing rights and limitations under the |
- * License. |
- * |
- * The Original Code is the Mork Reader. |
- * |
- * The Initial Developer of the Original Code is |
- * Google Inc. |
- * Portions created by the Initial Developer are Copyright (C) 2006 |
- * the Initial Developer. All Rights Reserved. |
- * |
- * Contributor(s): |
- * Brian Ryner <bryner@brianryner.com> (original author) |
- * |
- * Alternatively, the contents of this file may be used under the terms of |
- * either the GNU General Public License Version 2 or later (the "GPL"), or |
- * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), |
- * in which case the provisions of the GPL or the LGPL are applicable instead |
- * of those above. If you wish to allow use of your version of this file only |
- * under the terms of either the GPL or the LGPL, and not to allow others to |
- * use your version of this file under the terms of the MPL, indicate your |
- * decision by deleting the provisions above and replace them with the notice |
- * and other provisions required by the GPL or the LGPL. If you do not delete |
- * the provisions above, a recipient may use your version of this file under |
- * the terms of any one of the MPL, the GPL or the LGPL. |
- * |
- * ***** END LICENSE BLOCK ***** */ |
- |
-// Source: |
-// http://mxr.mozilla.org/firefox/source/db/morkreader/nsMorkReader.cpp |
-// This file has been converted to google style. |
- |
-#include "chrome/browser/mork_reader.h" |
- |
-#include <algorithm> |
- |
-#include "base/logging.h" |
-#include "base/string_util.h" |
-#include "chrome/browser/firefox_importer_utils.h" |
-#include "chrome/browser/history/history_types.h" |
- |
-namespace { |
- |
-// Convert a hex character (0-9, A-F) to its corresponding byte value. |
-// Returns -1 if the character is invalid. |
-inline int HexCharToInt(char c) { |
- if ('0' <= c && c <= '9') |
- return c - '0'; |
- if ('A' <= c && c <= 'F') |
- return c - 'A' + 10; |
- return -1; |
-} |
- |
-// Unescape a Mork value. Mork uses $xx escaping to encode non-ASCII |
-// characters. Additionally, '$' and '\' are backslash-escaped. |
-// The result of the unescape is in returned. |
-std::string MorkUnescape(const std::string& input) { |
- // We optimize for speed over space here -- size the result buffer to |
- // the size of the source, which is an upper bound on the size of the |
- // unescaped string. |
- std::string result; |
- size_t input_length = input.size(); |
- result.reserve(input_length); |
- |
- for (size_t i = 0; i < input_length; i++) { |
- char c = input[i]; |
- if (c == '\\') { |
- // Escaped literal, slip the backslash, append the next character. |
- i++; |
- if (i < input_length) |
- result.push_back(input[i]); |
- } else if (c == '$') { |
- // Dollar sign denotes a hex character. |
- if (i < input_length - 2) { |
- // Would be nice to use ToInteger() here, but it currently |
- // requires a null-terminated string. |
- int first = HexCharToInt(input[++i]); |
- int second = HexCharToInt(input[++i]); |
- if (first >= 0 && second >= 0) |
- result.push_back((first << 4) | second); |
- } |
- } else { |
- // Regular character, just append. |
- result.push_back(input[i]); |
- } |
- } |
- return result; |
-} |
- |
-} // namespace |
- |
-MorkReader::MorkReader() { |
-} |
- |
-MorkReader::~MorkReader() { |
- // Need to delete all the pointers to vectors we have in the table. |
- for (RowMap::iterator i = table_.begin(); i != table_.end(); ++i) |
- delete i->second; |
-} |
- |
-bool MorkReader::Read(const std::wstring& filename) { |
- stream_.open(filename.c_str()); |
- if (!stream_.is_open()) |
- return false; |
- |
- std::string line; |
- if (!ReadLine(&line) || |
- line.compare("// <!-- <mdb:mork:z v=\"1.4\"/> -->") != 0) |
- return false; // Unexpected file format. |
- |
- IndexMap column_map; |
- while (ReadLine(&line)) { |
- // Trim off leading spaces |
- size_t idx = 0; |
- size_t len = line.size(); |
- while (idx < len && line[idx] == ' ') |
- ++idx; |
- if (idx >= len) |
- continue; |
- |
- // Look at the line to figure out what section type this is |
- if (StartsWithASCII(&line[idx], "< <(a=c)>", true)) { |
- // Column map. We begin by creating a hash of column id to column name. |
- StringMap column_name_map; |
- ParseMap(line, idx, &column_name_map); |
- |
- // Now that we have the list of columns, we put them into a flat array. |
- // Rows will have value arrays of the same size, with indexes that |
- // correspond to the columns array. As we insert each column into the |
- // array, we also make an entry in columnMap so that we can look up the |
- // index given the column id. |
- columns_.reserve(column_name_map.size()); |
- |
- for (StringMap::const_iterator i = column_name_map.begin(); |
- i != column_name_map.end(); ++i) { |
- column_map[i->first] = static_cast<int>(columns_.size()); |
- MorkColumn col(i->first, i->second); |
- columns_.push_back(col); |
- } |
- } else if (StartsWithASCII(&line[idx], "<(", true)) { |
- // Value map. |
- ParseMap(line, idx, &value_map_); |
- } else if (line[idx] == '{' || line[idx] == '[') { |
- // Table / table row. |
- ParseTable(line, idx, &column_map); |
- } else { |
- // Don't know, hopefully don't care. |
- } |
- } |
- return true; |
-} |
- |
-// Parses a key/value map of the form |
-// <(k1=v1)(k2=v2)...> |
-bool MorkReader::ParseMap(const std::string& first_line, |
- size_t start_index, |
- StringMap* map) { |
- // If the first line is the a=c line (column map), just skip over it. |
- std::string line(first_line); |
- if (StartsWithASCII(line, "< <(a=c)>", true)) |
- ReadLine(&line); |
- |
- std::string key; |
- do { |
- size_t idx = start_index; |
- size_t len = line.size(); |
- size_t token_start; |
- |
- while (idx < len) { |
- switch (line[idx++]) { |
- case '(': |
- // Beginning of a key/value pair. |
- if (!key.empty()) { |
- DLOG(WARNING) << "unterminated key/value pair?"; |
- key.clear(); |
- } |
- |
- token_start = idx; |
- while (idx < len && line[idx] != '=') |
- ++idx; |
- key.assign(&line[token_start], idx - token_start); |
- break; |
- |
- case '=': { |
- // Beginning of the value. |
- if (key.empty()) { |
- DLOG(WARNING) << "stray value"; |
- break; |
- } |
- |
- token_start = idx; |
- while (idx < len && line[idx] != ')') { |
- if (line[idx] == '\\') |
- ++idx; // Skip escaped ')' characters. |
- ++idx; |
- } |
- size_t token_end = std::min(idx, len); |
- ++idx; |
- |
- std::string value = MorkUnescape( |
- std::string(&line[token_start], token_end - token_start)); |
- (*map)[key] = value; |
- key.clear(); |
- break; |
- } |
- case '>': |
- // End of the map. |
- DLOG_IF(WARNING, key.empty()) << |
- "map terminates inside of key/value pair"; |
- return true; |
- } |
- } |
- |
- // We should start reading the next line at the beginning. |
- start_index = 0; |
- } while (ReadLine(&line)); |
- |
- // We ran out of lines and the map never terminated. This probably indicates |
- // a parsing error. |
- DLOG(WARNING) << "didn't find end of key/value map"; |
- return false; |
-} |
- |
-// Parses a table row of the form [123(^45^67)..] |
-// (row id 123 has the value with id 67 for the column with id 45). |
-// A '^' prefix for a column or value references an entry in the column or |
-// value map. '=' is used as the separator when the value is a literal. |
-void MorkReader::ParseTable(const std::string& first_line, |
- size_t start_index, |
- const IndexMap* column_map) { |
- std::string line(first_line); |
- |
- // Column index of the cell we're parsing, minus one if invalid. |
- int column_index = -1; |
- |
- // Points to the current row we're parsing inside of the |table_|, will be |
- // NULL if we're not inside a row. |
- ColumnDataList* current_row = NULL; |
- |
- bool in_meta_row = false; |
- |
- do { |
- size_t idx = start_index; |
- size_t len = line.size(); |
- |
- while (idx < len) { |
- switch (line[idx++]) { |
- case '{': |
- // This marks the beginning of a table section. There's a lot of |
- // junk before the first row that looks like cell values but isn't. |
- // Skip to the first '['. |
- while (idx < len && line[idx] != '[') { |
- if (line[idx] == '{') { |
- in_meta_row = true; // The meta row is enclosed in { } |
- } else if (line[idx] == '}') { |
- in_meta_row = false; |
- } |
- ++idx; |
- } |
- break; |
- |
- case '[': { |
- // Start of a new row. Consume the row id, up to the first '('. |
- // Row edits also have a table namespace, separated from the row id |
- // by a colon. We don't make use of the namespace, but we need to |
- // make sure not to consider it part of the row id. |
- if (current_row) { |
- DLOG(WARNING) << "unterminated row?"; |
- current_row = NULL; |
- } |
- |
- // Check for a '-' at the start of the id. This signifies that |
- // if the row already exists, we should delete all columns from it |
- // before adding the new values. |
- bool cut_columns; |
- if (idx < len && line[idx] == '-') { |
- cut_columns = true; |
- ++idx; |
- } else { |
- cut_columns = false; |
- } |
- |
- // Locate the range of the ID. |
- size_t token_start = idx; // Index of the first char of the token. |
- while (idx < len && |
- line[idx] != '(' && |
- line[idx] != ']' && |
- line[idx] != ':') { |
- ++idx; |
- } |
- size_t token_end = idx; // Index of the char following the token. |
- while (idx < len && line[idx] != '(' && line[idx] != ']') { |
- ++idx; |
- } |
- |
- if (in_meta_row) { |
- // Need to create the meta row. |
- meta_row_.resize(columns_.size()); |
- current_row = &meta_row_; |
- } else { |
- // Find or create the regular row for this. |
- IDString row_id(&line[token_start], token_end - token_start); |
- RowMap::iterator found_row = table_.find(row_id); |
- if (found_row == table_.end()) { |
- // We don't already have this row, create a new one for it. |
- current_row = new ColumnDataList(columns_.size()); |
- table_[row_id] = current_row; |
- } else { |
- // The row already exists and we're adding/replacing things. |
- current_row = found_row->second; |
- } |
- } |
- if (cut_columns) { |
- for (size_t i = 0; i < current_row->size(); ++i) |
- (*current_row)[i].clear(); |
- } |
- break; |
- } |
- |
- case ']': |
- // We're done with the row. |
- current_row = NULL; |
- in_meta_row = false; |
- break; |
- |
- case '(': { |
- if (!current_row) { |
- DLOG(WARNING) << "cell value outside of row"; |
- break; |
- } |
- |
- bool column_is_atom; |
- if (line[idx] == '^') { |
- column_is_atom = true; |
- ++idx; // This is not part of the column id, advance past it. |
- } else { |
- column_is_atom = false; |
- } |
- size_t token_start = idx; |
- while (idx < len && line[idx] != '^' && line[idx] != '=') { |
- if (line[idx] == '\\') |
- ++idx; // Skip escaped characters. |
- ++idx; |
- } |
- |
- size_t token_end = std::min(idx, len); |
- |
- IDString column; |
- if (column_is_atom) |
- column.assign(&line[token_start], token_end - token_start); |
- else |
- column = MorkUnescape(line.substr(token_start, |
- token_end - token_start)); |
- |
- IndexMap::const_iterator found_column = column_map->find(column); |
- if (found_column == column_map->end()) { |
- DLOG(WARNING) << "Column not in column map, discarding it"; |
- column_index = -1; |
- } else { |
- column_index = found_column->second; |
- } |
- break; |
- } |
- |
- case '=': |
- case '^': { |
- if (column_index == -1) { |
- DLOG(WARNING) << "stray ^ or = marker"; |
- break; |
- } |
- |
- bool value_is_atom = (line[idx - 1] == '^'); |
- size_t token_start = idx - 1; // Include the '=' or '^' marker. |
- while (idx < len && line[idx] != ')') { |
- if (line[idx] == '\\') |
- ++idx; // Skip escaped characters. |
- ++idx; |
- } |
- size_t token_end = std::min(idx, len); |
- ++idx; |
- |
- if (value_is_atom) { |
- (*current_row)[column_index].assign(&line[token_start], |
- token_end - token_start); |
- } else { |
- (*current_row)[column_index] = |
- MorkUnescape(line.substr(token_start, token_end - token_start)); |
- } |
- column_index = -1; |
- } |
- break; |
- } |
- } |
- |
- // Start parsing the next line at the beginning. |
- start_index = 0; |
- } while (current_row && ReadLine(&line)); |
-} |
- |
-bool MorkReader::ReadLine(std::string* line) { |
- line->resize(256); |
- std::getline(stream_, *line); |
- if (stream_.eof() || stream_.bad()) |
- return false; |
- |
- while (!line->empty() && (*line)[line->size() - 1] == '\\') { |
- // There is a continuation for this line. Read it and append. |
- std::string new_line; |
- std::getline(stream_, new_line); |
- if (stream_.eof()) |
- return false; |
- line->erase(line->size() - 1); |
- line->append(new_line); |
- } |
- |
- return true; |
-} |
- |
-void MorkReader::NormalizeValue(std::string* value) const { |
- if (value->empty()) |
- return; |
- MorkReader::StringMap::const_iterator i; |
- switch (value->at(0)) { |
- case '^': |
- // Hex ID, lookup the name for it in the |value_map_|. |
- i = value_map_.find(value->substr(1)); |
- if (i == value_map_.end()) |
- value->clear(); |
- else |
- *value = i->second; |
- break; |
- case '=': |
- // Just use the literal after the equals sign. |
- value->erase(value->begin()); |
- break; |
- default: |
- // Anything else is invalid. |
- value->clear(); |
- break; |
- } |
-} |
- |
-// Source: |
-// http://mxr.mozilla.org/firefox/source/toolkit/components/places/src/nsMorkHistoryImporter.cpp |
- |
-// Columns for entry (non-meta) history rows |
-enum { |
- kURLColumn, |
- kNameColumn, |
- kVisitCountColumn, |
- kHiddenColumn, |
- kTypedColumn, |
- kLastVisitColumn, |
- kColumnCount // Keep me last. |
-}; |
- |
-static const char * const gColumnNames[] = { |
- "URL", "Name", "VisitCount", "Hidden", "Typed", "LastVisitDate" |
-}; |
- |
-struct TableReadClosure { |
- explicit TableReadClosure(const MorkReader& r) |
- : reader(r), |
- swap_bytes(false), |
- byte_order_column(-1) { |
- for (int i = 0; i < kColumnCount; ++i) |
- column_indexes[i] = -1; |
- } |
- |
- // Backpointers to the reader and history we're operating on. |
- const MorkReader& reader; |
- |
- // Whether we need to swap bytes (file format is other-endian). |
- bool swap_bytes; |
- |
- // Indexes of the columns that we care about. |
- int column_indexes[kColumnCount]; |
- int byte_order_column; |
-}; |
- |
-void AddToHistory(MorkReader::ColumnDataList* column_values, |
- const TableReadClosure& data, |
- std::vector<history::URLRow>* rows) { |
- std::string values[kColumnCount]; |
- |
- for (size_t i = 0; i < kColumnCount; ++i) { |
- if (data.column_indexes[i] != -1) { |
- values[i] = column_values->at(data.column_indexes[i]); |
- data.reader.NormalizeValue(&values[i]); |
- // Do not import hidden records. |
- if (i == kHiddenColumn && values[i] == "1") |
- return; |
- } |
- } |
- |
- GURL url(values[kURLColumn]); |
- |
- if (CanImportURL(url)) { |
- history::URLRow row(url); |
- |
- // title is really a UTF-16 string at this point |
- std::wstring title; |
- if (data.swap_bytes) { |
- CodepageToWide(values[kNameColumn], "UTF-16BE", |
- OnStringUtilConversionError::SKIP, &title); |
- } else { |
- CodepageToWide(values[kNameColumn], "UTF-16LE", |
- OnStringUtilConversionError::SKIP, &title); |
- } |
- row.set_title(title); |
- |
- int count = atoi(values[kVisitCountColumn].c_str()); |
- if (count == 0) |
- count = 1; |
- row.set_visit_count(count); |
- |
- time_t date = StringToInt64(values[kLastVisitColumn]); |
- if (date != 0) |
- row.set_last_visit(Time::FromTimeT(date/1000000)); |
- |
- bool is_typed = (values[kTypedColumn] == "1"); |
- if (is_typed) |
- row.set_typed_count(1); |
- |
- rows->push_back(row); |
- } |
-} |
- |
-// It sets up the file stream and loops over the lines in the file to |
-// parse them, then adds the resulting row set to history. |
-void ImportHistoryFromFirefox2(std::wstring file, MessageLoop* loop, |
- ProfileWriter* writer) { |
- MorkReader reader; |
- reader.Read(file); |
- |
- // Gather up the column ids so we don't need to find them on each row |
- TableReadClosure data(reader); |
- const MorkReader::MorkColumnList& columns = reader.columns(); |
- for (size_t i = 0; i < columns.size(); ++i) { |
- for (int j = 0; j < kColumnCount; ++j) |
- if (columns[i].name == gColumnNames[j]) { |
- data.column_indexes[j] = static_cast<int>(i); |
- break; |
- } |
- if (columns[i].name == "ByteOrder") |
- data.byte_order_column = static_cast<int>(i); |
- } |
- |
- // Determine the byte order from the table's meta-row. |
- const MorkReader::ColumnDataList& meta_row = reader.meta_row(); |
- if (!meta_row.empty() && data.byte_order_column != -1) { |
- std::string byte_order = meta_row[data.byte_order_column]; |
- if (!byte_order.empty()) { |
- // Note whether the file uses a non-native byte ordering. |
- // If it does, we'll have to swap bytes for PRUnichar values. |
- // "BE" and "LE" are the only recognized values, anything |
- // else is garbage and the file will be treated as native-endian |
- // (no swapping). |
- std::string byte_order_value(byte_order); |
- reader.NormalizeValue(&byte_order_value); |
- data.swap_bytes = (byte_order_value == "BE"); |
- } |
- } |
- |
- std::vector<history::URLRow> rows; |
- for (MorkReader::iterator i = reader.begin(); i != reader.end(); ++i) |
- AddToHistory(i->second, data, &rows); |
- if (!rows.empty()) |
- loop->PostTask(FROM_HERE, NewRunnableMethod(writer, |
- &ProfileWriter::AddHistoryPage, rows)); |
-} |