Chromium Code Reviews| Index: components/password_manager/core/browser/import/csv_reader.cc |
| diff --git a/components/password_manager/core/browser/import/csv_reader.cc b/components/password_manager/core/browser/import/csv_reader.cc |
| new file mode 100644 |
| index 0000000000000000000000000000000000000000..d0cb2be1e91040053676212ba0bdafbe5e4d246e |
| --- /dev/null |
| +++ b/components/password_manager/core/browser/import/csv_reader.cc |
| @@ -0,0 +1,126 @@ |
| +// Copyright 2014 The Chromium Authors. All rights reserved. |
| +// Use of this source code is governed by a BSD-style license that can be |
| +// found in the LICENSE file. |
| + |
| +#include "components/password_manager/core/browser/import/csv_reader.h" |
| + |
| +#include "base/logging.h" |
| +#include "base/strings/string_util.h" |
| +#include "third_party/re2/re2/re2.h" |
| + |
| +namespace { |
| + |
| +// Regular expression that matches and captures the first row in CSV formatted |
| +// data (i.e., until the first newline that is not enclosed in double quotes). |
| +// Will throw away the potential trailing EOL character. |
|
vabr (Chromium)
2014/09/25 14:44:27
Please explain that this RE does only understand \
engedy
2014/11/06 14:55:39
Done.
|
| +const char kFirstRowRE[] = |
| + // Match and capture sequences of 1.) arbitrary characters inside correctly |
| + // matched double-quotes, or 2.) characters other than the double quote and |
| + // EOL. Note that because literal double-quotes are escaped as two double |
| + // quotes and are always enclosed in double quotes, they do not need special |
| + // treatment. |
| + "^((?:\"[^\"]*\"|[^\"\\n])*)" |
| + // Match and throw away EOL, or match end-of-string. |
| + "(?:\n|$)"; |
| + |
| +// Regular expression that matches and captures the value of the first field in |
| +// a CSV formatted row of data. Will throw away the potential trailing comma, |
| +// but not the enclosing double quotes if the value is quoted. |
| +const char kFirstFieldRE[] = |
| + // Match and capture sequences of 1.) arbitrary characters inside correctly |
| + // matched double-quotes, or 2.) characters other than the double quote and |
| + // the field separator comma (,). |
| + "^((?:\"[^\"]*\"|[^\",])*)" |
| + // Match and throw away the field separator, or match end-of-string. |
| + "(?:,|$)"; |
| + |
| +// Encapsulates the pre-compiled regular expressions and provides the logic to |
| +// parse fields from a CSV file row by row. |
| +class CSVParser { |
| + public: |
| + CSVParser(base::StringPiece csv) |
| + : remaining_csv_piece_(csv.data(), csv.size()), |
| + first_row_regex_(kFirstRowRE), |
| + first_field_regex_(kFirstFieldRE) {} |
| + |
| + bool ParseNextCSVRow(std::vector<std::string>* fields) { |
|
vabr (Chromium)
2014/09/25 14:44:27
Please try not to inline this non-trivial method h
vabr (Chromium)
2014/09/25 14:44:27
Please comment on the return value and the argumen
engedy
2014/11/06 14:55:39
Done.
engedy
2014/11/06 14:55:39
My reasoning was the following:
* We are in an an
vabr (Chromium)
2014/11/06 16:16:02
My concrete reason is boring: The style guide, per
engedy
2014/11/07 16:26:11
Roger that. I have refactored it.
|
| + fields->clear(); |
| + |
| + re2::StringPiece row; |
| + if (!RE2::Consume(&remaining_csv_piece_, first_row_regex_, &row)) |
| + return false; |
| + |
| + re2::StringPiece remaining_row_piece(row); |
| + do { |
| + re2::StringPiece field; |
| + if (!RE2::Consume(&remaining_row_piece, first_field_regex_, &field)) |
| + return false; |
| + if (field.starts_with("\"") && field.ends_with("\"")) { |
|
vabr (Chromium)
2014/09/25 14:44:27
Should you rather do
if (field.starts_with("\""))
engedy
2014/11/06 14:55:39
Done. Yes, furthermore, the regex will not fit in
|
| + CHECK_GE(field.size(), 2); |
| + field.remove_prefix(1); |
| + field.remove_suffix(1); |
| + } |
| + std::string field_copy(field.as_string()); |
| + ReplaceSubstringsAfterOffset(&field_copy, 0, "\"\"", "\""); |
| + fields->push_back(std::string()); |
| + fields->back().swap(field_copy); |
|
vabr (Chromium)
2014/09/25 14:44:27
IMO, the importance of performance benefits of thi
engedy
2014/11/06 14:55:39
You are right. I have removed the premature optimi
|
| + } while (!remaining_row_piece.empty()); |
| + |
| + if (row.ends_with(",")) |
| + fields->push_back(std::string()); |
| + |
| + return true; |
| + } |
| + |
| + bool HasMoreRows() const { |
| + return !remaining_csv_piece_.empty(); |
| + } |
| + |
| + private: |
| + re2::StringPiece remaining_csv_piece_; |
|
vabr (Chromium)
2014/09/25 14:44:27
Please comment on the data members.
engedy
2014/11/06 14:55:39
Can you think of anything that would add value? I
vabr (Chromium)
2014/11/06 16:16:02
You are right, the names make it clear enough.
No
|
| + |
| + RE2 first_row_regex_; |
|
vabr (Chromium)
2014/09/25 14:44:27
Could those be const?
engedy
2014/11/06 14:55:39
Yeah, done.
|
| + RE2 first_field_regex_; |
| + |
| + DISALLOW_COPY_AND_ASSIGN(CSVParser); |
| +}; |
| + |
| +} // namespace |
| + |
| +namespace password_manager { |
| + |
| +// static |
| +bool CSVReader::Read(base::StringPiece csv, |
| + std::vector<std::string>* column_names, |
| + std::vector<ColumnNameToValueMap>* records) { |
| + DCHECK(column_names); |
| + DCHECK(records); |
| + |
| + column_names->clear(); |
| + records->clear(); |
| + |
| + // Normalize EOL sequences so that we uniformly use a single LF character. |
| + std::string normalized_csv(csv.as_string()); |
| + ReplaceSubstringsAfterOffset(&normalized_csv, 0, "\r\n", "\n"); |
| + |
| + // Read header row. |
| + CSVParser parser(normalized_csv); |
| + if (!parser.ParseNextCSVRow(column_names)) |
| + return false; |
| + |
| + // Reader data records rows. |
| + std::vector<std::string> fields; |
| + while (parser.HasMoreRows()) { |
| + if (!parser.ParseNextCSVRow(&fields)) |
| + return false; |
| + |
| + records->push_back(ColumnNameToValueMap()); |
| + for (size_t i = 0; i < column_names->size() && i < fields.size(); ++i) { |
| + records->back()[(*column_names)[i]].swap(fields[i]); |
| + } |
| + } |
| + |
| + return true; |
| +} |
| + |
| +} // namespace password_manager |