components/password_manager/core/browser/import/csv_reader.cc - Issue 447763002: Implement CSVReader and CSVWriter to be used for password import and export.

Unified Diff: components/password_manager/core/browser/import/csv_reader.cc

Issue 447763002: Implement CSVReader and CSVWriter to be used for password import and export. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src

Patch Set: Add build config header. Created 5 years, 11 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

« no previous file with comments | « components/password_manager/core/browser/import/csv_reader.h ('k') | components/password_manager/core/browser/import/csv_reader_unittest.cc » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

Index: components/password_manager/core/browser/import/csv_reader.cc

diff --git a/components/password_manager/core/browser/import/csv_reader.cc b/components/password_manager/core/browser/import/csv_reader.cc

new file mode 100644

index 0000000000000000000000000000000000000000..a32ef507e2685c0b838deedc28a9525378e0d549

--- /dev/null

+++ b/components/password_manager/core/browser/import/csv_reader.cc

@@ -0,0 +1,132 @@

+// Use of this source code is governed by a BSD-style license that can be

+// found in the LICENSE file.

+#include "components/password_manager/core/browser/import/csv_reader.h"

+#include "base/logging.h"

+#include "base/strings/string_util.h"

+#include "third_party/re2/re2/re2.h"

+namespace {

+// Regular expression that matches and captures the first row in CSV formatted

+// data (i.e., until the first newline that is not enclosed in double quotes).

+// Will throw away the potential trailing EOL character (which is expected to

+// have already been normalized to a single '\n').

+const char kFirstRowRE[] =

+ // Match and capture sequences of 1.) arbitrary characters inside correctly

+ // matched double-quotes, or 2.) characters other than the double quote and

+ // EOL. Note that because literal double-quotes are escaped as two double

+ // quotes and are always enclosed in double quotes, they do not need special

+ // treatment as far as splitting on EOL is concerned. However, this RE will

+ // still accept inputs such as: "a"b"c"\n.

+ "^((?:\"[^\"]*\"|[^\"\\n])*)"

+ // Match and throw away EOL, or match end-of-string.

+ "(?:\n|$)";

+// Regular expression that matches and captures the value of the first field in

+// a CSV formatted row of data. Will throw away the potential trailing comma,

+// but not the enclosing double quotes if the value is quoted.

+const char kFirstFieldRE[] =

+ // Match and capture sequences of 1.) arbitrary characters inside correctly

+ // matched double-quotes, or 2.) characters other than the double quote and

+ // the field separator comma (,). We do not allow a mix of both kinds so as

+ // to reject inputs like: "a"b"c".

+ "^((?:\"[^\"]*\")*|[^\",]*)"

+ // Match and throw away the field separator, or match end-of-string.

+ "(?:,|$)";

+// Encapsulates the pre-compiled regular expressions and provides the logic to

+// parse fields from a CSV file row by row.

+class CSVParser {

+ public:

+ CSVParser(base::StringPiece csv)

+ : remaining_csv_piece_(csv.data(), csv.size()),

+ first_row_regex_(kFirstRowRE),

+ first_field_regex_(kFirstFieldRE) {}

+ // Reads and unescapes values from the next row, and writes them to |fields|.

+ // Consumes the EOL terminator. Returns false on syntax error.

+ bool ParseNextCSVRow(std::vector<std::string>* fields);

+ bool HasMoreRows() const {

+ return !remaining_csv_piece_.empty();

+ }

+ private:

+ re2::StringPiece remaining_csv_piece_;

+ const RE2 first_row_regex_;

+ const RE2 first_field_regex_;

+ DISALLOW_COPY_AND_ASSIGN(CSVParser);

+};

+bool CSVParser::ParseNextCSVRow(std::vector<std::string>* fields) {

+ fields->clear();

+ re2::StringPiece row;

+ if (!RE2::Consume(&remaining_csv_piece_, first_row_regex_, &row))

+ return false;

+ re2::StringPiece remaining_row_piece(row);

+ do {

+ re2::StringPiece field;

+ if (!RE2::Consume(&remaining_row_piece, first_field_regex_, &field))

+ return false;

+ if (field.starts_with("\"")) {

+ CHECK(field.ends_with("\""));

+ CHECK_GE(field.size(), 2);

+ field.remove_prefix(1);

+ field.remove_suffix(1);

+ }

+ std::string field_copy(field.as_string());

+ ReplaceSubstringsAfterOffset(&field_copy, 0, "\"\"", "\"");

+ fields->push_back(field_copy);

+ } while (!remaining_row_piece.empty());

+ if (row.ends_with(","))

+ fields->push_back(std::string());

+ return true;

+} // namespace

+namespace password_manager {

+bool ReadCSV(base::StringPiece csv,

+ std::vector<std::string>* column_names,

+ std::vector<std::map<std::string, std::string>>* records) {

+ DCHECK(column_names);

+ DCHECK(records);

+ column_names->clear();

+ records->clear();

+ // Normalize EOL sequences so that we uniformly use a single LF character.

+ std::string normalized_csv(csv.as_string());

+ ReplaceSubstringsAfterOffset(&normalized_csv, 0, "\r\n", "\n");

+ // Read header row.

+ CSVParser parser(normalized_csv);

+ if (!parser.ParseNextCSVRow(column_names))

+ return false;

+ // Reader data records rows.

+ std::vector<std::string> fields;

+ while (parser.HasMoreRows()) {

+ if (!parser.ParseNextCSVRow(&fields))

+ return false;

+ records->resize(records->size() + 1);

+ for (size_t i = 0; i < column_names->size() && i < fields.size(); ++i) {

+ records->back()[(*column_names)[i]].swap(fields[i]);

+ }

+ return true;

+} // namespace password_manager