| Index: chrome/browser/mork_reader.cc
|
| ===================================================================
|
| --- chrome/browser/mork_reader.cc (revision 2150)
|
| +++ chrome/browser/mork_reader.cc (working copy)
|
| @@ -1,581 +0,0 @@
|
| -/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
|
| -/* ***** BEGIN LICENSE BLOCK *****
|
| - * Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
| - *
|
| - * The contents of this file are subject to the Mozilla Public License Version
|
| - * 1.1 (the "License"); you may not use this file except in compliance with
|
| - * the License. You may obtain a copy of the License at
|
| - * http://www.mozilla.org/MPL/
|
| - *
|
| - * Software distributed under the License is distributed on an "AS IS" basis,
|
| - * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
| - * for the specific language governing rights and limitations under the
|
| - * License.
|
| - *
|
| - * The Original Code is the Mork Reader.
|
| - *
|
| - * The Initial Developer of the Original Code is
|
| - * Google Inc.
|
| - * Portions created by the Initial Developer are Copyright (C) 2006
|
| - * the Initial Developer. All Rights Reserved.
|
| - *
|
| - * Contributor(s):
|
| - * Brian Ryner <bryner@brianryner.com> (original author)
|
| - *
|
| - * Alternatively, the contents of this file may be used under the terms of
|
| - * either the GNU General Public License Version 2 or later (the "GPL"), or
|
| - * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
| - * in which case the provisions of the GPL or the LGPL are applicable instead
|
| - * of those above. If you wish to allow use of your version of this file only
|
| - * under the terms of either the GPL or the LGPL, and not to allow others to
|
| - * use your version of this file under the terms of the MPL, indicate your
|
| - * decision by deleting the provisions above and replace them with the notice
|
| - * and other provisions required by the GPL or the LGPL. If you do not delete
|
| - * the provisions above, a recipient may use your version of this file under
|
| - * the terms of any one of the MPL, the GPL or the LGPL.
|
| - *
|
| - * ***** END LICENSE BLOCK ***** */
|
| -
|
| -// Source:
|
| -// http://mxr.mozilla.org/firefox/source/db/morkreader/nsMorkReader.cpp
|
| -// This file has been converted to google style.
|
| -
|
| -#include "chrome/browser/mork_reader.h"
|
| -
|
| -#include <algorithm>
|
| -
|
| -#include "base/logging.h"
|
| -#include "base/string_util.h"
|
| -#include "chrome/browser/firefox_importer_utils.h"
|
| -#include "chrome/browser/history/history_types.h"
|
| -
|
| -namespace {
|
| -
|
| -// Convert a hex character (0-9, A-F) to its corresponding byte value.
|
| -// Returns -1 if the character is invalid.
|
| -inline int HexCharToInt(char c) {
|
| - if ('0' <= c && c <= '9')
|
| - return c - '0';
|
| - if ('A' <= c && c <= 'F')
|
| - return c - 'A' + 10;
|
| - return -1;
|
| -}
|
| -
|
| -// Unescape a Mork value. Mork uses $xx escaping to encode non-ASCII
|
| -// characters. Additionally, '$' and '\' are backslash-escaped.
|
| -// The result of the unescape is in returned.
|
| -std::string MorkUnescape(const std::string& input) {
|
| - // We optimize for speed over space here -- size the result buffer to
|
| - // the size of the source, which is an upper bound on the size of the
|
| - // unescaped string.
|
| - std::string result;
|
| - size_t input_length = input.size();
|
| - result.reserve(input_length);
|
| -
|
| - for (size_t i = 0; i < input_length; i++) {
|
| - char c = input[i];
|
| - if (c == '\\') {
|
| - // Escaped literal, slip the backslash, append the next character.
|
| - i++;
|
| - if (i < input_length)
|
| - result.push_back(input[i]);
|
| - } else if (c == '$') {
|
| - // Dollar sign denotes a hex character.
|
| - if (i < input_length - 2) {
|
| - // Would be nice to use ToInteger() here, but it currently
|
| - // requires a null-terminated string.
|
| - int first = HexCharToInt(input[++i]);
|
| - int second = HexCharToInt(input[++i]);
|
| - if (first >= 0 && second >= 0)
|
| - result.push_back((first << 4) | second);
|
| - }
|
| - } else {
|
| - // Regular character, just append.
|
| - result.push_back(input[i]);
|
| - }
|
| - }
|
| - return result;
|
| -}
|
| -
|
| -} // namespace
|
| -
|
| -MorkReader::MorkReader() {
|
| -}
|
| -
|
| -MorkReader::~MorkReader() {
|
| - // Need to delete all the pointers to vectors we have in the table.
|
| - for (RowMap::iterator i = table_.begin(); i != table_.end(); ++i)
|
| - delete i->second;
|
| -}
|
| -
|
| -bool MorkReader::Read(const std::wstring& filename) {
|
| - stream_.open(filename.c_str());
|
| - if (!stream_.is_open())
|
| - return false;
|
| -
|
| - std::string line;
|
| - if (!ReadLine(&line) ||
|
| - line.compare("// <!-- <mdb:mork:z v=\"1.4\"/> -->") != 0)
|
| - return false; // Unexpected file format.
|
| -
|
| - IndexMap column_map;
|
| - while (ReadLine(&line)) {
|
| - // Trim off leading spaces
|
| - size_t idx = 0;
|
| - size_t len = line.size();
|
| - while (idx < len && line[idx] == ' ')
|
| - ++idx;
|
| - if (idx >= len)
|
| - continue;
|
| -
|
| - // Look at the line to figure out what section type this is
|
| - if (StartsWithASCII(&line[idx], "< <(a=c)>", true)) {
|
| - // Column map. We begin by creating a hash of column id to column name.
|
| - StringMap column_name_map;
|
| - ParseMap(line, idx, &column_name_map);
|
| -
|
| - // Now that we have the list of columns, we put them into a flat array.
|
| - // Rows will have value arrays of the same size, with indexes that
|
| - // correspond to the columns array. As we insert each column into the
|
| - // array, we also make an entry in columnMap so that we can look up the
|
| - // index given the column id.
|
| - columns_.reserve(column_name_map.size());
|
| -
|
| - for (StringMap::const_iterator i = column_name_map.begin();
|
| - i != column_name_map.end(); ++i) {
|
| - column_map[i->first] = static_cast<int>(columns_.size());
|
| - MorkColumn col(i->first, i->second);
|
| - columns_.push_back(col);
|
| - }
|
| - } else if (StartsWithASCII(&line[idx], "<(", true)) {
|
| - // Value map.
|
| - ParseMap(line, idx, &value_map_);
|
| - } else if (line[idx] == '{' || line[idx] == '[') {
|
| - // Table / table row.
|
| - ParseTable(line, idx, &column_map);
|
| - } else {
|
| - // Don't know, hopefully don't care.
|
| - }
|
| - }
|
| - return true;
|
| -}
|
| -
|
| -// Parses a key/value map of the form
|
| -// <(k1=v1)(k2=v2)...>
|
| -bool MorkReader::ParseMap(const std::string& first_line,
|
| - size_t start_index,
|
| - StringMap* map) {
|
| - // If the first line is the a=c line (column map), just skip over it.
|
| - std::string line(first_line);
|
| - if (StartsWithASCII(line, "< <(a=c)>", true))
|
| - ReadLine(&line);
|
| -
|
| - std::string key;
|
| - do {
|
| - size_t idx = start_index;
|
| - size_t len = line.size();
|
| - size_t token_start;
|
| -
|
| - while (idx < len) {
|
| - switch (line[idx++]) {
|
| - case '(':
|
| - // Beginning of a key/value pair.
|
| - if (!key.empty()) {
|
| - DLOG(WARNING) << "unterminated key/value pair?";
|
| - key.clear();
|
| - }
|
| -
|
| - token_start = idx;
|
| - while (idx < len && line[idx] != '=')
|
| - ++idx;
|
| - key.assign(&line[token_start], idx - token_start);
|
| - break;
|
| -
|
| - case '=': {
|
| - // Beginning of the value.
|
| - if (key.empty()) {
|
| - DLOG(WARNING) << "stray value";
|
| - break;
|
| - }
|
| -
|
| - token_start = idx;
|
| - while (idx < len && line[idx] != ')') {
|
| - if (line[idx] == '\\')
|
| - ++idx; // Skip escaped ')' characters.
|
| - ++idx;
|
| - }
|
| - size_t token_end = std::min(idx, len);
|
| - ++idx;
|
| -
|
| - std::string value = MorkUnescape(
|
| - std::string(&line[token_start], token_end - token_start));
|
| - (*map)[key] = value;
|
| - key.clear();
|
| - break;
|
| - }
|
| - case '>':
|
| - // End of the map.
|
| - DLOG_IF(WARNING, key.empty()) <<
|
| - "map terminates inside of key/value pair";
|
| - return true;
|
| - }
|
| - }
|
| -
|
| - // We should start reading the next line at the beginning.
|
| - start_index = 0;
|
| - } while (ReadLine(&line));
|
| -
|
| - // We ran out of lines and the map never terminated. This probably indicates
|
| - // a parsing error.
|
| - DLOG(WARNING) << "didn't find end of key/value map";
|
| - return false;
|
| -}
|
| -
|
| -// Parses a table row of the form [123(^45^67)..]
|
| -// (row id 123 has the value with id 67 for the column with id 45).
|
| -// A '^' prefix for a column or value references an entry in the column or
|
| -// value map. '=' is used as the separator when the value is a literal.
|
| -void MorkReader::ParseTable(const std::string& first_line,
|
| - size_t start_index,
|
| - const IndexMap* column_map) {
|
| - std::string line(first_line);
|
| -
|
| - // Column index of the cell we're parsing, minus one if invalid.
|
| - int column_index = -1;
|
| -
|
| - // Points to the current row we're parsing inside of the |table_|, will be
|
| - // NULL if we're not inside a row.
|
| - ColumnDataList* current_row = NULL;
|
| -
|
| - bool in_meta_row = false;
|
| -
|
| - do {
|
| - size_t idx = start_index;
|
| - size_t len = line.size();
|
| -
|
| - while (idx < len) {
|
| - switch (line[idx++]) {
|
| - case '{':
|
| - // This marks the beginning of a table section. There's a lot of
|
| - // junk before the first row that looks like cell values but isn't.
|
| - // Skip to the first '['.
|
| - while (idx < len && line[idx] != '[') {
|
| - if (line[idx] == '{') {
|
| - in_meta_row = true; // The meta row is enclosed in { }
|
| - } else if (line[idx] == '}') {
|
| - in_meta_row = false;
|
| - }
|
| - ++idx;
|
| - }
|
| - break;
|
| -
|
| - case '[': {
|
| - // Start of a new row. Consume the row id, up to the first '('.
|
| - // Row edits also have a table namespace, separated from the row id
|
| - // by a colon. We don't make use of the namespace, but we need to
|
| - // make sure not to consider it part of the row id.
|
| - if (current_row) {
|
| - DLOG(WARNING) << "unterminated row?";
|
| - current_row = NULL;
|
| - }
|
| -
|
| - // Check for a '-' at the start of the id. This signifies that
|
| - // if the row already exists, we should delete all columns from it
|
| - // before adding the new values.
|
| - bool cut_columns;
|
| - if (idx < len && line[idx] == '-') {
|
| - cut_columns = true;
|
| - ++idx;
|
| - } else {
|
| - cut_columns = false;
|
| - }
|
| -
|
| - // Locate the range of the ID.
|
| - size_t token_start = idx; // Index of the first char of the token.
|
| - while (idx < len &&
|
| - line[idx] != '(' &&
|
| - line[idx] != ']' &&
|
| - line[idx] != ':') {
|
| - ++idx;
|
| - }
|
| - size_t token_end = idx; // Index of the char following the token.
|
| - while (idx < len && line[idx] != '(' && line[idx] != ']') {
|
| - ++idx;
|
| - }
|
| -
|
| - if (in_meta_row) {
|
| - // Need to create the meta row.
|
| - meta_row_.resize(columns_.size());
|
| - current_row = &meta_row_;
|
| - } else {
|
| - // Find or create the regular row for this.
|
| - IDString row_id(&line[token_start], token_end - token_start);
|
| - RowMap::iterator found_row = table_.find(row_id);
|
| - if (found_row == table_.end()) {
|
| - // We don't already have this row, create a new one for it.
|
| - current_row = new ColumnDataList(columns_.size());
|
| - table_[row_id] = current_row;
|
| - } else {
|
| - // The row already exists and we're adding/replacing things.
|
| - current_row = found_row->second;
|
| - }
|
| - }
|
| - if (cut_columns) {
|
| - for (size_t i = 0; i < current_row->size(); ++i)
|
| - (*current_row)[i].clear();
|
| - }
|
| - break;
|
| - }
|
| -
|
| - case ']':
|
| - // We're done with the row.
|
| - current_row = NULL;
|
| - in_meta_row = false;
|
| - break;
|
| -
|
| - case '(': {
|
| - if (!current_row) {
|
| - DLOG(WARNING) << "cell value outside of row";
|
| - break;
|
| - }
|
| -
|
| - bool column_is_atom;
|
| - if (line[idx] == '^') {
|
| - column_is_atom = true;
|
| - ++idx; // This is not part of the column id, advance past it.
|
| - } else {
|
| - column_is_atom = false;
|
| - }
|
| - size_t token_start = idx;
|
| - while (idx < len && line[idx] != '^' && line[idx] != '=') {
|
| - if (line[idx] == '\\')
|
| - ++idx; // Skip escaped characters.
|
| - ++idx;
|
| - }
|
| -
|
| - size_t token_end = std::min(idx, len);
|
| -
|
| - IDString column;
|
| - if (column_is_atom)
|
| - column.assign(&line[token_start], token_end - token_start);
|
| - else
|
| - column = MorkUnescape(line.substr(token_start,
|
| - token_end - token_start));
|
| -
|
| - IndexMap::const_iterator found_column = column_map->find(column);
|
| - if (found_column == column_map->end()) {
|
| - DLOG(WARNING) << "Column not in column map, discarding it";
|
| - column_index = -1;
|
| - } else {
|
| - column_index = found_column->second;
|
| - }
|
| - break;
|
| - }
|
| -
|
| - case '=':
|
| - case '^': {
|
| - if (column_index == -1) {
|
| - DLOG(WARNING) << "stray ^ or = marker";
|
| - break;
|
| - }
|
| -
|
| - bool value_is_atom = (line[idx - 1] == '^');
|
| - size_t token_start = idx - 1; // Include the '=' or '^' marker.
|
| - while (idx < len && line[idx] != ')') {
|
| - if (line[idx] == '\\')
|
| - ++idx; // Skip escaped characters.
|
| - ++idx;
|
| - }
|
| - size_t token_end = std::min(idx, len);
|
| - ++idx;
|
| -
|
| - if (value_is_atom) {
|
| - (*current_row)[column_index].assign(&line[token_start],
|
| - token_end - token_start);
|
| - } else {
|
| - (*current_row)[column_index] =
|
| - MorkUnescape(line.substr(token_start, token_end - token_start));
|
| - }
|
| - column_index = -1;
|
| - }
|
| - break;
|
| - }
|
| - }
|
| -
|
| - // Start parsing the next line at the beginning.
|
| - start_index = 0;
|
| - } while (current_row && ReadLine(&line));
|
| -}
|
| -
|
| -bool MorkReader::ReadLine(std::string* line) {
|
| - line->resize(256);
|
| - std::getline(stream_, *line);
|
| - if (stream_.eof() || stream_.bad())
|
| - return false;
|
| -
|
| - while (!line->empty() && (*line)[line->size() - 1] == '\\') {
|
| - // There is a continuation for this line. Read it and append.
|
| - std::string new_line;
|
| - std::getline(stream_, new_line);
|
| - if (stream_.eof())
|
| - return false;
|
| - line->erase(line->size() - 1);
|
| - line->append(new_line);
|
| - }
|
| -
|
| - return true;
|
| -}
|
| -
|
| -void MorkReader::NormalizeValue(std::string* value) const {
|
| - if (value->empty())
|
| - return;
|
| - MorkReader::StringMap::const_iterator i;
|
| - switch (value->at(0)) {
|
| - case '^':
|
| - // Hex ID, lookup the name for it in the |value_map_|.
|
| - i = value_map_.find(value->substr(1));
|
| - if (i == value_map_.end())
|
| - value->clear();
|
| - else
|
| - *value = i->second;
|
| - break;
|
| - case '=':
|
| - // Just use the literal after the equals sign.
|
| - value->erase(value->begin());
|
| - break;
|
| - default:
|
| - // Anything else is invalid.
|
| - value->clear();
|
| - break;
|
| - }
|
| -}
|
| -
|
| -// Source:
|
| -// http://mxr.mozilla.org/firefox/source/toolkit/components/places/src/nsMorkHistoryImporter.cpp
|
| -
|
| -// Columns for entry (non-meta) history rows
|
| -enum {
|
| - kURLColumn,
|
| - kNameColumn,
|
| - kVisitCountColumn,
|
| - kHiddenColumn,
|
| - kTypedColumn,
|
| - kLastVisitColumn,
|
| - kColumnCount // Keep me last.
|
| -};
|
| -
|
| -static const char * const gColumnNames[] = {
|
| - "URL", "Name", "VisitCount", "Hidden", "Typed", "LastVisitDate"
|
| -};
|
| -
|
| -struct TableReadClosure {
|
| - explicit TableReadClosure(const MorkReader& r)
|
| - : reader(r),
|
| - swap_bytes(false),
|
| - byte_order_column(-1) {
|
| - for (int i = 0; i < kColumnCount; ++i)
|
| - column_indexes[i] = -1;
|
| - }
|
| -
|
| - // Backpointers to the reader and history we're operating on.
|
| - const MorkReader& reader;
|
| -
|
| - // Whether we need to swap bytes (file format is other-endian).
|
| - bool swap_bytes;
|
| -
|
| - // Indexes of the columns that we care about.
|
| - int column_indexes[kColumnCount];
|
| - int byte_order_column;
|
| -};
|
| -
|
| -void AddToHistory(MorkReader::ColumnDataList* column_values,
|
| - const TableReadClosure& data,
|
| - std::vector<history::URLRow>* rows) {
|
| - std::string values[kColumnCount];
|
| -
|
| - for (size_t i = 0; i < kColumnCount; ++i) {
|
| - if (data.column_indexes[i] != -1) {
|
| - values[i] = column_values->at(data.column_indexes[i]);
|
| - data.reader.NormalizeValue(&values[i]);
|
| - // Do not import hidden records.
|
| - if (i == kHiddenColumn && values[i] == "1")
|
| - return;
|
| - }
|
| - }
|
| -
|
| - GURL url(values[kURLColumn]);
|
| -
|
| - if (CanImportURL(url)) {
|
| - history::URLRow row(url);
|
| -
|
| - // title is really a UTF-16 string at this point
|
| - std::wstring title;
|
| - if (data.swap_bytes) {
|
| - CodepageToWide(values[kNameColumn], "UTF-16BE",
|
| - OnStringUtilConversionError::SKIP, &title);
|
| - } else {
|
| - CodepageToWide(values[kNameColumn], "UTF-16LE",
|
| - OnStringUtilConversionError::SKIP, &title);
|
| - }
|
| - row.set_title(title);
|
| -
|
| - int count = atoi(values[kVisitCountColumn].c_str());
|
| - if (count == 0)
|
| - count = 1;
|
| - row.set_visit_count(count);
|
| -
|
| - time_t date = StringToInt64(values[kLastVisitColumn]);
|
| - if (date != 0)
|
| - row.set_last_visit(Time::FromTimeT(date/1000000));
|
| -
|
| - bool is_typed = (values[kTypedColumn] == "1");
|
| - if (is_typed)
|
| - row.set_typed_count(1);
|
| -
|
| - rows->push_back(row);
|
| - }
|
| -}
|
| -
|
| -// It sets up the file stream and loops over the lines in the file to
|
| -// parse them, then adds the resulting row set to history.
|
| -void ImportHistoryFromFirefox2(std::wstring file, MessageLoop* loop,
|
| - ProfileWriter* writer) {
|
| - MorkReader reader;
|
| - reader.Read(file);
|
| -
|
| - // Gather up the column ids so we don't need to find them on each row
|
| - TableReadClosure data(reader);
|
| - const MorkReader::MorkColumnList& columns = reader.columns();
|
| - for (size_t i = 0; i < columns.size(); ++i) {
|
| - for (int j = 0; j < kColumnCount; ++j)
|
| - if (columns[i].name == gColumnNames[j]) {
|
| - data.column_indexes[j] = static_cast<int>(i);
|
| - break;
|
| - }
|
| - if (columns[i].name == "ByteOrder")
|
| - data.byte_order_column = static_cast<int>(i);
|
| - }
|
| -
|
| - // Determine the byte order from the table's meta-row.
|
| - const MorkReader::ColumnDataList& meta_row = reader.meta_row();
|
| - if (!meta_row.empty() && data.byte_order_column != -1) {
|
| - std::string byte_order = meta_row[data.byte_order_column];
|
| - if (!byte_order.empty()) {
|
| - // Note whether the file uses a non-native byte ordering.
|
| - // If it does, we'll have to swap bytes for PRUnichar values.
|
| - // "BE" and "LE" are the only recognized values, anything
|
| - // else is garbage and the file will be treated as native-endian
|
| - // (no swapping).
|
| - std::string byte_order_value(byte_order);
|
| - reader.NormalizeValue(&byte_order_value);
|
| - data.swap_bytes = (byte_order_value == "BE");
|
| - }
|
| - }
|
| -
|
| - std::vector<history::URLRow> rows;
|
| - for (MorkReader::iterator i = reader.begin(); i != reader.end(); ++i)
|
| - AddToHistory(i->second, data, &rows);
|
| - if (!rows.empty())
|
| - loop->PostTask(FROM_HERE, NewRunnableMethod(writer,
|
| - &ProfileWriter::AddHistoryPage, rows));
|
| -}
|
|
|