Chromium Code Reviews| Index: components/password_manager/core/browser/import/csv_reader.cc |
| diff --git a/components/password_manager/core/browser/import/csv_reader.cc b/components/password_manager/core/browser/import/csv_reader.cc |
| new file mode 100644 |
| index 0000000000000000000000000000000000000000..8e6ebbdfc1cf541d2cf5bc9cb7bc163e07fd0b3a |
| --- /dev/null |
| +++ b/components/password_manager/core/browser/import/csv_reader.cc |
| @@ -0,0 +1,106 @@ |
| +// Copyright 2014 The Chromium Authors. All rights reserved. |
| +// Use of this source code is governed by a BSD-style license that can be |
| +// found in the LICENSE file. |
| + |
| +#include "components/password_manager/core/browser/import/csv_reader.h" |
| + |
| +#include "base/logging.h" |
| +#include "base/strings/string_util.h" |
| +#include "third_party/re2/re2/re2.h" |
| + |
| +namespace { |
| + |
| +// Regular expression that matches and captures the first row in CSV formatted |
| +// data. Will throw away the potential trailing EOL character. |
| +const char kFirstRowRE[] = |
| + // Non-greedily match and capture sequences of 1.) arbitrary characters |
| + // inside correctly matched double-quotes, or 2.) characters other than the |
| + // double-quote and EOL. Note that because literal double-quotes are escaped |
| + // as two double-quotes, they do not need special treatment. |
|
vasilii
2014/08/06 17:42:17
Though the comment matches the regexp, it's not cl
engedy
2014/08/08 17:06:13
PTAL.
|
| + "^((?:\"[^\"]*\"|[^\"\\n])*?)" |
| + // Match and throw away EOL, or match end-of-string. |
| + "(?:\n|$)"; |
| + |
| +// Regular expression that matches and captures the value of the first field in |
| +// a CSV formatted row of data. Will throw away the potential trailing comma, |
| +// but not the enclosing double-quotes if the value is quoted. |
| +const char kFirstFieldRE[] = |
| + // Non-greedily match and capture sequences of 1.) arbitrary characters |
| + // inside correctly matched double-quotes, or 2.) characters other than the |
| + // double-quote and the field separator comma (,). |
| + "^((?:\"[^\"]*\"|[^\",])*?)" |
| + // Match and throw away the field separator, or match end-of-string. |
| + "(?:,|$)"; |
| + |
| +// Parses and stores the fields of the next row in |remaining_csv_piece| into |
| +// |fields|. |
| +bool ParseNextCSVRow(re2::StringPiece* remaining_csv_piece, |
| + std::vector<std::string>* fields) { |
| + static RE2 first_row_regex(kFirstRowRE); |
| + static RE2 first_field_regex(kFirstFieldRE); |
|
vasilii
2014/08/06 17:42:17
This is thread unsafe.
I'm also pretty sure that w
engedy
2014/08/08 17:06:13
I have moved this as non-static local variables to
vasilii
2014/08/11 11:56:38
Do you really need it? If yes, there is LazyInstan
engedy
2014/08/12 11:27:11
I think LazyInstance<> cannot be used with classes
vasilii
2014/08/12 12:20:01
You could hardcode the const char* constant into t
engedy
2014/08/12 13:02:53
Agreed. The current solution with CSVParse seems f
|
| + |
| + fields->clear(); |
| + |
| + std::string row; |
| + if (!RE2::Consume(remaining_csv_piece, first_row_regex, &row)) |
| + return false; |
| + |
| + re2::StringPiece field; |
| + re2::StringPiece remaining_row_piece(row); |
| + while (!remaining_row_piece.empty()) { |
| + if (!RE2::Consume(&remaining_row_piece, first_field_regex, &field)) |
| + return false; |
| + if (field.starts_with("\"") && field.ends_with("\"")) { |
| + CHECK_GE(field.size(), 2); |
| + field.remove_prefix(1); |
| + field.remove_suffix(1); |
| + } |
| + std::string field_copy(field.as_string()); |
| + ReplaceSubstringsAfterOffset(&field_copy, 0, "\"\"", "\""); |
| + fields->push_back(std::string()); |
| + fields->back().swap(field_copy); |
| + } |
| + return true; |
| +} |
| + |
| +} // namespace |
| + |
| +namespace password_manager { |
| + |
| +// static |
| +bool CSVReader::Read(const base::StringPiece& csv, |
| + std::vector<std::string>* column_names, |
| + std::vector<ColumnNameToValueMap>* records) { |
| + DCHECK(column_names); |
| + DCHECK(records); |
| + |
| + column_names->clear(); |
| + records->clear(); |
| + |
| + // Normalize EOL sequences so that we uniformly use a single LF character. |
| + std::string normalized_csv(csv.as_string()); |
| + ReplaceSubstringsAfterOffset(&normalized_csv, 0, "\r\n", "\n"); |
|
vasilii
2014/08/06 17:42:17
What if the field contain \r\n. It's valid.
engedy
2014/08/08 17:06:13
Hmm, do you have a particular case in mind where t
vasilii
2014/08/11 11:56:38
Not really. Do you want to support the case when w
engedy
2014/08/12 11:27:11
I do not think that this would be a particularly f
|
| + |
| + // Read header row. |
| + re2::StringPiece remaining_csv_piece(normalized_csv); |
| + if (!ParseNextCSVRow(&remaining_csv_piece, column_names)) |
| + return false; |
| + |
| + // Reader data records rows. |
| + std::vector<std::string> fields; |
| + while (!remaining_csv_piece.empty()) { |
| + if (!ParseNextCSVRow(&remaining_csv_piece, &fields)) |
| + return false; |
|
vasilii
2014/08/06 17:42:17
indent
engedy
2014/08/08 17:06:13
Done.
|
| + |
| + records->push_back(ColumnNameToValueMap()); |
| + for (size_t i = 0; i < column_names->size(); ++i) { |
| + std::string& target = records->back()[(*column_names)[i]]; |
|
vasilii
2014/08/06 17:42:16
column_names->at(i) looks nicer IMO
tfarina
2014/08/06 18:00:52
might be nicer, but it throws an exception. I'd sa
Peter Kasting
2014/08/07 03:59:50
Well, it doesn't actually throw, since we've disab
engedy
2014/08/08 17:06:13
Acknowledged.
|
| + if (i < fields.size()) |
| + target.swap(fields[i]); |
|
vasilii
2014/08/06 17:42:17
What if there are more fields than columns?
engedy
2014/08/08 17:06:13
I have changed to behavior, PTAL: Extra fields are
|
| + } |
| + } |
| + |
| + return true; |
| +} |
| + |
| +} // namespace password_manager |