Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(401)

Side by Side Diff: chrome/browser/mork_reader.cc

Issue 3035: Move importer files into an importer subdirectory. (Closed) Base URL: svn://chrome-svn/chrome/trunk/src/
Patch Set: '' Created 12 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « chrome/browser/mork_reader.h ('k') | chrome/browser/title_chomper.h » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* ***** BEGIN LICENSE BLOCK *****
3 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
4 *
5 * The contents of this file are subject to the Mozilla Public License Version
6 * 1.1 (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 * http://www.mozilla.org/MPL/
9 *
10 * Software distributed under the License is distributed on an "AS IS" basis,
11 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
12 * for the specific language governing rights and limitations under the
13 * License.
14 *
15 * The Original Code is the Mork Reader.
16 *
17 * The Initial Developer of the Original Code is
18 * Google Inc.
19 * Portions created by the Initial Developer are Copyright (C) 2006
20 * the Initial Developer. All Rights Reserved.
21 *
22 * Contributor(s):
23 * Brian Ryner <bryner@brianryner.com> (original author)
24 *
25 * Alternatively, the contents of this file may be used under the terms of
26 * either the GNU General Public License Version 2 or later (the "GPL"), or
27 * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
28 * in which case the provisions of the GPL or the LGPL are applicable instead
29 * of those above. If you wish to allow use of your version of this file only
30 * under the terms of either the GPL or the LGPL, and not to allow others to
31 * use your version of this file under the terms of the MPL, indicate your
32 * decision by deleting the provisions above and replace them with the notice
33 * and other provisions required by the GPL or the LGPL. If you do not delete
34 * the provisions above, a recipient may use your version of this file under
35 * the terms of any one of the MPL, the GPL or the LGPL.
36 *
37 * ***** END LICENSE BLOCK ***** */
38
39 // Source:
40 // http://mxr.mozilla.org/firefox/source/db/morkreader/nsMorkReader.cpp
41 // This file has been converted to google style.
42
43 #include "chrome/browser/mork_reader.h"
44
45 #include <algorithm>
46
47 #include "base/logging.h"
48 #include "base/string_util.h"
49 #include "chrome/browser/firefox_importer_utils.h"
50 #include "chrome/browser/history/history_types.h"
51
52 namespace {
53
54 // Convert a hex character (0-9, A-F) to its corresponding byte value.
55 // Returns -1 if the character is invalid.
56 inline int HexCharToInt(char c) {
57 if ('0' <= c && c <= '9')
58 return c - '0';
59 if ('A' <= c && c <= 'F')
60 return c - 'A' + 10;
61 return -1;
62 }
63
64 // Unescape a Mork value. Mork uses $xx escaping to encode non-ASCII
65 // characters. Additionally, '$' and '\' are backslash-escaped.
66 // The result of the unescape is in returned.
67 std::string MorkUnescape(const std::string& input) {
68 // We optimize for speed over space here -- size the result buffer to
69 // the size of the source, which is an upper bound on the size of the
70 // unescaped string.
71 std::string result;
72 size_t input_length = input.size();
73 result.reserve(input_length);
74
75 for (size_t i = 0; i < input_length; i++) {
76 char c = input[i];
77 if (c == '\\') {
78 // Escaped literal, slip the backslash, append the next character.
79 i++;
80 if (i < input_length)
81 result.push_back(input[i]);
82 } else if (c == '$') {
83 // Dollar sign denotes a hex character.
84 if (i < input_length - 2) {
85 // Would be nice to use ToInteger() here, but it currently
86 // requires a null-terminated string.
87 int first = HexCharToInt(input[++i]);
88 int second = HexCharToInt(input[++i]);
89 if (first >= 0 && second >= 0)
90 result.push_back((first << 4) | second);
91 }
92 } else {
93 // Regular character, just append.
94 result.push_back(input[i]);
95 }
96 }
97 return result;
98 }
99
100 } // namespace
101
102 MorkReader::MorkReader() {
103 }
104
105 MorkReader::~MorkReader() {
106 // Need to delete all the pointers to vectors we have in the table.
107 for (RowMap::iterator i = table_.begin(); i != table_.end(); ++i)
108 delete i->second;
109 }
110
111 bool MorkReader::Read(const std::wstring& filename) {
112 stream_.open(filename.c_str());
113 if (!stream_.is_open())
114 return false;
115
116 std::string line;
117 if (!ReadLine(&line) ||
118 line.compare("// <!-- <mdb:mork:z v=\"1.4\"/> -->") != 0)
119 return false; // Unexpected file format.
120
121 IndexMap column_map;
122 while (ReadLine(&line)) {
123 // Trim off leading spaces
124 size_t idx = 0;
125 size_t len = line.size();
126 while (idx < len && line[idx] == ' ')
127 ++idx;
128 if (idx >= len)
129 continue;
130
131 // Look at the line to figure out what section type this is
132 if (StartsWithASCII(&line[idx], "< <(a=c)>", true)) {
133 // Column map. We begin by creating a hash of column id to column name.
134 StringMap column_name_map;
135 ParseMap(line, idx, &column_name_map);
136
137 // Now that we have the list of columns, we put them into a flat array.
138 // Rows will have value arrays of the same size, with indexes that
139 // correspond to the columns array. As we insert each column into the
140 // array, we also make an entry in columnMap so that we can look up the
141 // index given the column id.
142 columns_.reserve(column_name_map.size());
143
144 for (StringMap::const_iterator i = column_name_map.begin();
145 i != column_name_map.end(); ++i) {
146 column_map[i->first] = static_cast<int>(columns_.size());
147 MorkColumn col(i->first, i->second);
148 columns_.push_back(col);
149 }
150 } else if (StartsWithASCII(&line[idx], "<(", true)) {
151 // Value map.
152 ParseMap(line, idx, &value_map_);
153 } else if (line[idx] == '{' || line[idx] == '[') {
154 // Table / table row.
155 ParseTable(line, idx, &column_map);
156 } else {
157 // Don't know, hopefully don't care.
158 }
159 }
160 return true;
161 }
162
163 // Parses a key/value map of the form
164 // <(k1=v1)(k2=v2)...>
165 bool MorkReader::ParseMap(const std::string& first_line,
166 size_t start_index,
167 StringMap* map) {
168 // If the first line is the a=c line (column map), just skip over it.
169 std::string line(first_line);
170 if (StartsWithASCII(line, "< <(a=c)>", true))
171 ReadLine(&line);
172
173 std::string key;
174 do {
175 size_t idx = start_index;
176 size_t len = line.size();
177 size_t token_start;
178
179 while (idx < len) {
180 switch (line[idx++]) {
181 case '(':
182 // Beginning of a key/value pair.
183 if (!key.empty()) {
184 DLOG(WARNING) << "unterminated key/value pair?";
185 key.clear();
186 }
187
188 token_start = idx;
189 while (idx < len && line[idx] != '=')
190 ++idx;
191 key.assign(&line[token_start], idx - token_start);
192 break;
193
194 case '=': {
195 // Beginning of the value.
196 if (key.empty()) {
197 DLOG(WARNING) << "stray value";
198 break;
199 }
200
201 token_start = idx;
202 while (idx < len && line[idx] != ')') {
203 if (line[idx] == '\\')
204 ++idx; // Skip escaped ')' characters.
205 ++idx;
206 }
207 size_t token_end = std::min(idx, len);
208 ++idx;
209
210 std::string value = MorkUnescape(
211 std::string(&line[token_start], token_end - token_start));
212 (*map)[key] = value;
213 key.clear();
214 break;
215 }
216 case '>':
217 // End of the map.
218 DLOG_IF(WARNING, key.empty()) <<
219 "map terminates inside of key/value pair";
220 return true;
221 }
222 }
223
224 // We should start reading the next line at the beginning.
225 start_index = 0;
226 } while (ReadLine(&line));
227
228 // We ran out of lines and the map never terminated. This probably indicates
229 // a parsing error.
230 DLOG(WARNING) << "didn't find end of key/value map";
231 return false;
232 }
233
234 // Parses a table row of the form [123(^45^67)..]
235 // (row id 123 has the value with id 67 for the column with id 45).
236 // A '^' prefix for a column or value references an entry in the column or
237 // value map. '=' is used as the separator when the value is a literal.
238 void MorkReader::ParseTable(const std::string& first_line,
239 size_t start_index,
240 const IndexMap* column_map) {
241 std::string line(first_line);
242
243 // Column index of the cell we're parsing, minus one if invalid.
244 int column_index = -1;
245
246 // Points to the current row we're parsing inside of the |table_|, will be
247 // NULL if we're not inside a row.
248 ColumnDataList* current_row = NULL;
249
250 bool in_meta_row = false;
251
252 do {
253 size_t idx = start_index;
254 size_t len = line.size();
255
256 while (idx < len) {
257 switch (line[idx++]) {
258 case '{':
259 // This marks the beginning of a table section. There's a lot of
260 // junk before the first row that looks like cell values but isn't.
261 // Skip to the first '['.
262 while (idx < len && line[idx] != '[') {
263 if (line[idx] == '{') {
264 in_meta_row = true; // The meta row is enclosed in { }
265 } else if (line[idx] == '}') {
266 in_meta_row = false;
267 }
268 ++idx;
269 }
270 break;
271
272 case '[': {
273 // Start of a new row. Consume the row id, up to the first '('.
274 // Row edits also have a table namespace, separated from the row id
275 // by a colon. We don't make use of the namespace, but we need to
276 // make sure not to consider it part of the row id.
277 if (current_row) {
278 DLOG(WARNING) << "unterminated row?";
279 current_row = NULL;
280 }
281
282 // Check for a '-' at the start of the id. This signifies that
283 // if the row already exists, we should delete all columns from it
284 // before adding the new values.
285 bool cut_columns;
286 if (idx < len && line[idx] == '-') {
287 cut_columns = true;
288 ++idx;
289 } else {
290 cut_columns = false;
291 }
292
293 // Locate the range of the ID.
294 size_t token_start = idx; // Index of the first char of the token.
295 while (idx < len &&
296 line[idx] != '(' &&
297 line[idx] != ']' &&
298 line[idx] != ':') {
299 ++idx;
300 }
301 size_t token_end = idx; // Index of the char following the token.
302 while (idx < len && line[idx] != '(' && line[idx] != ']') {
303 ++idx;
304 }
305
306 if (in_meta_row) {
307 // Need to create the meta row.
308 meta_row_.resize(columns_.size());
309 current_row = &meta_row_;
310 } else {
311 // Find or create the regular row for this.
312 IDString row_id(&line[token_start], token_end - token_start);
313 RowMap::iterator found_row = table_.find(row_id);
314 if (found_row == table_.end()) {
315 // We don't already have this row, create a new one for it.
316 current_row = new ColumnDataList(columns_.size());
317 table_[row_id] = current_row;
318 } else {
319 // The row already exists and we're adding/replacing things.
320 current_row = found_row->second;
321 }
322 }
323 if (cut_columns) {
324 for (size_t i = 0; i < current_row->size(); ++i)
325 (*current_row)[i].clear();
326 }
327 break;
328 }
329
330 case ']':
331 // We're done with the row.
332 current_row = NULL;
333 in_meta_row = false;
334 break;
335
336 case '(': {
337 if (!current_row) {
338 DLOG(WARNING) << "cell value outside of row";
339 break;
340 }
341
342 bool column_is_atom;
343 if (line[idx] == '^') {
344 column_is_atom = true;
345 ++idx; // This is not part of the column id, advance past it.
346 } else {
347 column_is_atom = false;
348 }
349 size_t token_start = idx;
350 while (idx < len && line[idx] != '^' && line[idx] != '=') {
351 if (line[idx] == '\\')
352 ++idx; // Skip escaped characters.
353 ++idx;
354 }
355
356 size_t token_end = std::min(idx, len);
357
358 IDString column;
359 if (column_is_atom)
360 column.assign(&line[token_start], token_end - token_start);
361 else
362 column = MorkUnescape(line.substr(token_start,
363 token_end - token_start));
364
365 IndexMap::const_iterator found_column = column_map->find(column);
366 if (found_column == column_map->end()) {
367 DLOG(WARNING) << "Column not in column map, discarding it";
368 column_index = -1;
369 } else {
370 column_index = found_column->second;
371 }
372 break;
373 }
374
375 case '=':
376 case '^': {
377 if (column_index == -1) {
378 DLOG(WARNING) << "stray ^ or = marker";
379 break;
380 }
381
382 bool value_is_atom = (line[idx - 1] == '^');
383 size_t token_start = idx - 1; // Include the '=' or '^' marker.
384 while (idx < len && line[idx] != ')') {
385 if (line[idx] == '\\')
386 ++idx; // Skip escaped characters.
387 ++idx;
388 }
389 size_t token_end = std::min(idx, len);
390 ++idx;
391
392 if (value_is_atom) {
393 (*current_row)[column_index].assign(&line[token_start],
394 token_end - token_start);
395 } else {
396 (*current_row)[column_index] =
397 MorkUnescape(line.substr(token_start, token_end - token_start));
398 }
399 column_index = -1;
400 }
401 break;
402 }
403 }
404
405 // Start parsing the next line at the beginning.
406 start_index = 0;
407 } while (current_row && ReadLine(&line));
408 }
409
410 bool MorkReader::ReadLine(std::string* line) {
411 line->resize(256);
412 std::getline(stream_, *line);
413 if (stream_.eof() || stream_.bad())
414 return false;
415
416 while (!line->empty() && (*line)[line->size() - 1] == '\\') {
417 // There is a continuation for this line. Read it and append.
418 std::string new_line;
419 std::getline(stream_, new_line);
420 if (stream_.eof())
421 return false;
422 line->erase(line->size() - 1);
423 line->append(new_line);
424 }
425
426 return true;
427 }
428
429 void MorkReader::NormalizeValue(std::string* value) const {
430 if (value->empty())
431 return;
432 MorkReader::StringMap::const_iterator i;
433 switch (value->at(0)) {
434 case '^':
435 // Hex ID, lookup the name for it in the |value_map_|.
436 i = value_map_.find(value->substr(1));
437 if (i == value_map_.end())
438 value->clear();
439 else
440 *value = i->second;
441 break;
442 case '=':
443 // Just use the literal after the equals sign.
444 value->erase(value->begin());
445 break;
446 default:
447 // Anything else is invalid.
448 value->clear();
449 break;
450 }
451 }
452
453 // Source:
454 // http://mxr.mozilla.org/firefox/source/toolkit/components/places/src/nsMorkHis toryImporter.cpp
455
456 // Columns for entry (non-meta) history rows
457 enum {
458 kURLColumn,
459 kNameColumn,
460 kVisitCountColumn,
461 kHiddenColumn,
462 kTypedColumn,
463 kLastVisitColumn,
464 kColumnCount // Keep me last.
465 };
466
467 static const char * const gColumnNames[] = {
468 "URL", "Name", "VisitCount", "Hidden", "Typed", "LastVisitDate"
469 };
470
471 struct TableReadClosure {
472 explicit TableReadClosure(const MorkReader& r)
473 : reader(r),
474 swap_bytes(false),
475 byte_order_column(-1) {
476 for (int i = 0; i < kColumnCount; ++i)
477 column_indexes[i] = -1;
478 }
479
480 // Backpointers to the reader and history we're operating on.
481 const MorkReader& reader;
482
483 // Whether we need to swap bytes (file format is other-endian).
484 bool swap_bytes;
485
486 // Indexes of the columns that we care about.
487 int column_indexes[kColumnCount];
488 int byte_order_column;
489 };
490
491 void AddToHistory(MorkReader::ColumnDataList* column_values,
492 const TableReadClosure& data,
493 std::vector<history::URLRow>* rows) {
494 std::string values[kColumnCount];
495
496 for (size_t i = 0; i < kColumnCount; ++i) {
497 if (data.column_indexes[i] != -1) {
498 values[i] = column_values->at(data.column_indexes[i]);
499 data.reader.NormalizeValue(&values[i]);
500 // Do not import hidden records.
501 if (i == kHiddenColumn && values[i] == "1")
502 return;
503 }
504 }
505
506 GURL url(values[kURLColumn]);
507
508 if (CanImportURL(url)) {
509 history::URLRow row(url);
510
511 // title is really a UTF-16 string at this point
512 std::wstring title;
513 if (data.swap_bytes) {
514 CodepageToWide(values[kNameColumn], "UTF-16BE",
515 OnStringUtilConversionError::SKIP, &title);
516 } else {
517 CodepageToWide(values[kNameColumn], "UTF-16LE",
518 OnStringUtilConversionError::SKIP, &title);
519 }
520 row.set_title(title);
521
522 int count = atoi(values[kVisitCountColumn].c_str());
523 if (count == 0)
524 count = 1;
525 row.set_visit_count(count);
526
527 time_t date = StringToInt64(values[kLastVisitColumn]);
528 if (date != 0)
529 row.set_last_visit(Time::FromTimeT(date/1000000));
530
531 bool is_typed = (values[kTypedColumn] == "1");
532 if (is_typed)
533 row.set_typed_count(1);
534
535 rows->push_back(row);
536 }
537 }
538
539 // It sets up the file stream and loops over the lines in the file to
540 // parse them, then adds the resulting row set to history.
541 void ImportHistoryFromFirefox2(std::wstring file, MessageLoop* loop,
542 ProfileWriter* writer) {
543 MorkReader reader;
544 reader.Read(file);
545
546 // Gather up the column ids so we don't need to find them on each row
547 TableReadClosure data(reader);
548 const MorkReader::MorkColumnList& columns = reader.columns();
549 for (size_t i = 0; i < columns.size(); ++i) {
550 for (int j = 0; j < kColumnCount; ++j)
551 if (columns[i].name == gColumnNames[j]) {
552 data.column_indexes[j] = static_cast<int>(i);
553 break;
554 }
555 if (columns[i].name == "ByteOrder")
556 data.byte_order_column = static_cast<int>(i);
557 }
558
559 // Determine the byte order from the table's meta-row.
560 const MorkReader::ColumnDataList& meta_row = reader.meta_row();
561 if (!meta_row.empty() && data.byte_order_column != -1) {
562 std::string byte_order = meta_row[data.byte_order_column];
563 if (!byte_order.empty()) {
564 // Note whether the file uses a non-native byte ordering.
565 // If it does, we'll have to swap bytes for PRUnichar values.
566 // "BE" and "LE" are the only recognized values, anything
567 // else is garbage and the file will be treated as native-endian
568 // (no swapping).
569 std::string byte_order_value(byte_order);
570 reader.NormalizeValue(&byte_order_value);
571 data.swap_bytes = (byte_order_value == "BE");
572 }
573 }
574
575 std::vector<history::URLRow> rows;
576 for (MorkReader::iterator i = reader.begin(); i != reader.end(); ++i)
577 AddToHistory(i->second, data, &rows);
578 if (!rows.empty())
579 loop->PostTask(FROM_HERE, NewRunnableMethod(writer,
580 &ProfileWriter::AddHistoryPage, rows));
581 }
OLDNEW
« no previous file with comments | « chrome/browser/mork_reader.h ('k') | chrome/browser/title_chomper.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698