Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(647)

Side by Side Diff: components/feedback/anonymizer_tool.cc

Issue 1543633003: Added anonymization patterns for URLs and email addresses (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@bug-567870-introduce-anonymizer
Patch Set: Fixed renaming of one variable and resulting compiler error Created 4 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « components/feedback/anonymizer_tool.h ('k') | components/feedback/anonymizer_tool_unittest.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2015 The Chromium Authors. All rights reserved. 1 // Copyright 2015 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "components/feedback/anonymizer_tool.h" 5 #include "components/feedback/anonymizer_tool.h"
6 6
7 #include <base/strings/string_number_conversions.h> 7 #include <utility>
8 #include <base/strings/string_util.h> 8
9 #include <base/strings/stringprintf.h> 9 #include "base/strings/string_number_conversions.h"
10 #include "base/strings/string_util.h"
11 #include "base/strings/stringprintf.h"
10 12
11 #include "third_party/re2/src/re2/re2.h" 13 #include "third_party/re2/src/re2/re2.h"
12 14
13 using re2::RE2; 15 using re2::RE2;
14 16
15 namespace feedback { 17 namespace feedback {
16 18
17 namespace { 19 namespace {
18 20
19 // The |kCustomPatterns| array defines patterns to match and anonymize. Each 21 // The |kCustomPatternsWithContext| array defines patterns to match and
20 // pattern needs to define three capturing parentheses groups: 22 // anonymize. Each pattern needs to define three capturing parentheses groups:
21 // 23 //
22 // - a group for the pattern before the identifier to be anonymized; 24 // - a group for the pattern before the identifier to be anonymized;
23 // - a group for the identifier to be anonymized; 25 // - a group for the identifier to be anonymized;
24 // - a group for the pattern after the identifier to be anonymized. 26 // - a group for the pattern after the identifier to be anonymized.
25 // 27 //
28 // The first and the last capture group are the origin of the "WithContext"
29 // suffix in the name of this constant.
30 //
26 // Every matched identifier (in the context of the whole pattern) is anonymized 31 // Every matched identifier (in the context of the whole pattern) is anonymized
27 // by replacing it with an incremental instance identifier. Every different 32 // by replacing it with an incremental instance identifier. Every different
28 // pattern defines a separate instance identifier space. See the unit test for 33 // pattern defines a separate instance identifier space. See the unit test for
29 // AnonymizerTool::AnonymizeCustomPattern for pattern anonymization examples. 34 // AnonymizerTool::AnonymizeCustomPattern for pattern anonymization examples.
30 // 35 //
31 // Useful regular expression syntax: 36 // Useful regular expression syntax:
32 // 37 //
33 // +? is a non-greedy (lazy) +. 38 // +? is a non-greedy (lazy) +.
34 // \b matches a word boundary. 39 // \b matches a word boundary.
35 // (?i) turns on case insensitivy for the remainder of the regex. 40 // (?i) turns on case insensitivy for the remainder of the regex.
36 // (?-s) turns off "dot matches newline" for the remainder of the regex. 41 // (?-s) turns off "dot matches newline" for the remainder of the regex.
37 // (?:regex) denotes non-capturing parentheses group. 42 // (?:regex) denotes non-capturing parentheses group.
38 const char* kCustomPatterns[] = { 43 const char* kCustomPatternsWithContext[] = {
39 "(\\bCell ID: ')([0-9a-fA-F]+)(')", // ModemManager 44 "(\\bCell ID: ')([0-9a-fA-F]+)(')", // ModemManager
40 "(\\bLocation area code: ')([0-9a-fA-F]+)(')", // ModemManager 45 "(\\bLocation area code: ')([0-9a-fA-F]+)(')", // ModemManager
41 "(?i-s)(\\bssid[= ]')(.+)(')", // wpa_supplicant 46 "(?i-s)(\\bssid[= ]')(.+)(')", // wpa_supplicant
42 "(?-s)(\\bSSID - hexdump\\(len=[0-9]+\\): )(.+)()", // wpa_supplicant 47 "(?-s)(\\bSSID - hexdump\\(len=[0-9]+\\): )(.+)()", // wpa_supplicant
43 "(?-s)(\\[SSID=)(.+?)(\\])", // shill 48 "(?-s)(\\[SSID=)(.+?)(\\])", // shill
44 }; 49 };
45 50
51 // Helper macro: Non capturing group
52 #define NCG(x) "(?:" x ")"
53 // Helper macro: Optional non capturing group
54 #define OPT_NCG(x) NCG(x) "?"
55
56 //////////////////////////////////////////////////////////////////////////
57 // Patterns for URLs, or better IRIs, based on RFC 3987 with an artificial
58 // limitation on the scheme to increase precision. Otherwise anything
59 // like "ID:" would be considered an IRI.
60
61 #define UNRESERVED "[-a-z0-9._~]"
62 #define RESERVED NGC(GEN_DELIMS "|" SUB_DELIMS)
63 #define SUB_DELIMS "[!$&'()*+,;=]"
64 #define GEN_DELIMS "[:/?#[\\]@]"
65
66 #define DIGIT "[0-9]"
67 #define HEXDIG "[0-9a-f]"
68
69 #define PCT_ENCODED "%" HEXDIG HEXDIG
70
71 #define DEC_OCTET NCG("[0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-9]")
72
73 #define IPV4ADDRESS DEC_OCTET "\\." DEC_OCTET "\\." DEC_OCTET "\\." DEC_OCTET
74
75 #define H16 NCG(HEXDIG) "{1,4}"
76 #define LS32 NCG(H16 ":" H16 "|" IPV4ADDRESS)
77
78 #define IPV6ADDRESS NCG( \
79 NCG(H16 ":") "{6}" LS32 "|" \
80 "::" NCG(H16 ":") "{5}" LS32 "|" \
81 OPT_NCG( H16) "::" NCG(H16 ":") "{4}" LS32 "|" \
82 OPT_NCG( NCG(H16 ":") "{0,1}" H16) "::" NCG(H16 ":") "{3}" LS32 "|" \
83 OPT_NCG( NCG(H16 ":") "{0,2}" H16) "::" NCG(H16 ":") "{2}" LS32 "|" \
84 OPT_NCG( NCG(H16 ":") "{0,3}" H16) "::" NCG(H16 ":") LS32 "|" \
85 OPT_NCG( NCG(H16 ":") "{0,4}" H16) "::" LS32 "|" \
86 OPT_NCG( NCG(H16 ":") "{0,5}" H16) "::" H16 "|" \
87 OPT_NCG( NCG(H16 ":") "{0,6}" H16) "::")
88
89 #define IPVFUTURE \
90 "v" HEXDIG \
91 "+" \
92 "\\." NCG(UNRESERVED "|" SUB_DELIMS \
93 "|" \
94 ":") "+"
95
96 #define IP_LITERAL "\\[" NCG(IPV6ADDRESS "|" IPVFUTURE) "\\]"
97
98 #define PORT DIGIT "*"
99
100 // This is a diversion of RFC 3987
101 #define SCHEME NCG("http|https|ftp|chrome|chrome-extension|android")
102
103 #define IPRIVATE \
104 "[" \
105 "\\x{E000}-\\x{F8FF}" \
106 "\\x{F0000}-\\x{FFFFD}" \
107 "\\x{100000}-\\x{10FFFD}" \
108 "]"
109
110 #define UCSCHAR \
111 "[" "\\x{A0}-\\x{D7FF}" "\\x{F900}-\\x{FDCF}" "\\x{FDF0}-\\x{FFEF}" \
112 "\\x{10000}-\\x{1FFFD}" "\\x{20000}-\\x{2FFFD}" "\\x{30000}-\\x{3FFFD}" \
113 "\\x{40000}-\\x{4FFFD}" "\\x{50000}-\\x{5FFFD}" "\\x{60000}-\\x{6FFFD}" \
114 "\\x{70000}-\\x{7FFFD}" "\\x{80000}-\\x{8FFFD}" "\\x{90000}-\\x{9FFFD}" \
115 "\\x{A0000}-\\x{AFFFD}" "\\x{B0000}-\\x{BFFFD}" "\\x{C0000}-\\x{CFFFD}" \
116 "\\x{D0000}-\\x{DFFFD}" "\\x{E1000}-\\x{EFFFD}" "]"
117
118 #define IUNRESERVED NCG("[-a-z0-9._~]" "|" UCSCHAR)
119
120 #define IPCHAR NCG(IUNRESERVED "|" PCT_ENCODED "|" SUB_DELIMS "|" "[:@]")
121 #define IFRAGMENT NCG(IPCHAR "|" "[/?]") "*"
122 #define IQUERY NCG(IPCHAR "|" IPRIVATE "|" "[/?]") "*"
123
124 #define ISEGMENT IPCHAR "*"
125 #define ISEGMENT_NZ IPCHAR "+"
126 #define ISEGMENT_NZ_NC \
127 NCG(IUNRESERVED "|" PCT_ENCODED "|" SUB_DELIMS \
128 "|" "@") "+"
129
130 #define IPATH_EMPTY ""
131 #define IPATH_ROOTLESS ISEGMENT_NZ NCG("/" ISEGMENT) "*"
132 #define IPATH_NOSCHEME ISEGMENT_NZ_NC NCG("/" ISEGMENT) "*"
133 #define IPATH_ABSOLUTE "/" OPT_NCG(ISEGMENT_NZ NCG("/" ISEGMENT) "*")
134 #define IPATH_ABEMPTY NCG("/" ISEGMENT) "*"
135
136 #define IPATH NCG(IPATH_ABEMPTY "|" IPATH_ABSOLUTE "|" IPATH_NOSCHEME "|" \
137 IPATH_ROOTLESS "|" IPATH_EMPTY)
138
139 #define IREG_NAME NCG(IUNRESERVED "|" PCT_ENCODED "|" SUB_DELIMS) "*"
140
141 #define IHOST NCG(IP_LITERAL "|" IPV4ADDRESS "|" IREG_NAME)
142 #define IUSERINFO NCG(IUNRESERVED "|" PCT_ENCODED "|" SUB_DELIMS "|" ":") "*"
143 #define IAUTHORITY OPT_NCG(IUSERINFO "@") IHOST OPT_NCG(":" PORT)
144
145 #define IRELATIVE_PART NCG("//" IAUTHORITY IPATH_ABEMPTY "|" IPATH_ABSOLUTE \
146 "|" IPATH_NOSCHEME "|" IPATH_EMPTY)
147
148 #define IRELATIVE_REF IRELATIVE_PART OPT_NCG("?" IQUERY) OPT_NCG("#" IFRAGMENT)
149
150 // RFC 3987 requires IPATH_EMPTY here but it is omitted so that statements
151 // that end with "Android:" for example are not considered a URL.
152 #define IHIER_PART NCG("//" IAUTHORITY IPATH_ABEMPTY "|" IPATH_ABSOLUTE \
153 "|" IPATH_ROOTLESS)
154
155 #define ABSOLUTE_IRI SCHEME ":" IHIER_PART OPT_NCG("?" IQUERY)
156
157 #define IRI SCHEME ":" IHIER_PART OPT_NCG("\\?" IQUERY) OPT_NCG("#" IFRAGMENT)
158
159 #define IRI_REFERENCE NCG(IRI "|" IRELATIVE_REF)
160
161 // TODO(battre): Use http://tools.ietf.org/html/rfc5322 to represent email
162 // addresses. Capture names as well ("First Lastname" <foo@bar.com>).
163
164 // The |kCustomPatternWithoutContext| array defines further patterns to match
165 // and anonymize. Each pattern consists of a single capturing group.
166 CustomPatternWithoutContext kCustomPatternsWithoutContext[] = {
167 {"URL", "(?i)(" IRI ")"},
168 // Email Addresses need to come after URLs because they can be part
169 // of a query parameter.
170 {"email", "(?i)([0-9a-z._%+-]+@[a-z0-9.-]+\\.[a-z]{2,6})"},
171 // IP filter rules need to come after URLs so that they don't disturb the
172 // URL pattern in case the IP address is part of a URL.
173 {"IPv4", "(?i)(" IPV4ADDRESS ")"},
174 {"IPv6", "(?i)(" IPV6ADDRESS ")"},
175 };
176
177 // Functor template that allows calling a function with a set of paramters
178 // that get wrapped into an array. This is a variation of RE2's
179 // VariadicFunction2 with an extra parameter.
180 template <typename Result, typename Param0, typename Param1, typename Param2,
181 typename Arg,
182 Result (*Func)(Param0, Param1, Param2, Arg*[], int count)>
183 class VariadicFunction3 {
184 public:
185 Result operator()(Param0 p0, Param1 p1, Param2 p2) const {
186 return Func(p0, p1, p2, nullptr, 0);
187 }
188
189 Result operator()(Param0 p0, Param1 p1, Param2 p2, Arg* a0) const {
190 Arg* args[] = {a0};
191 return Func(p0, p1, p2, args, 1);
192 }
193
194 Result operator()(Param0 p0, Param1 p1, Param2 p2, Arg* a0, Arg* a1) const {
195 Arg* args[] = {a0, a1};
196 return Func(p0, p1, p2, args, 2);
197 }
198
199 Result operator()(Param0 p0, Param1 p1, Param2 p2, Arg* a0, Arg* a1, Arg* a2)
200 const {
201 Arg* args[] = {a0, a1, a2};
202 return Func(p0, p1, p2, args, 3);
vasilii 2016/01/08 17:02:26 I think you can collapse these definition using a
battre 2016/01/11 09:02:20 I don't even need this anymore once I use a variad
203 }
204 };
205
206 // Like RE2's FindAndConsume, searches for the first occurrence of |pattern| in
207 // |input| and consumes the bytes until the end of the pattern matching. Unlike
208 // FindAndConsume, the bytes skipped before the match of |pattern| are stored
209 // in |skipped_input|.
210 bool FindAndConsumeAndGetSkippedN(re2::StringPiece* input,
211 const re2::RE2& pattern,
212 re2::StringPiece* skipped_input,
213 re2::StringPiece* args[],
214 int argc) {
215 re2::StringPiece old_input = *input;
216
217 re2::RE2::Arg a0(argc > 0 ? args[0] : nullptr);
218 re2::RE2::Arg a1(argc > 1 ? args[1] : nullptr);
219 re2::RE2::Arg a2(argc > 2 ? args[2] : nullptr);
220 const re2::RE2::Arg* const wrapped_args[] = {&a0, &a1, &a2};
221 CHECK_LE(argc, 3);
222
223 bool result = re2::RE2::FindAndConsumeN(input, pattern, wrapped_args, argc);
224
225 if (skipped_input && result && argc > 0) {
226 size_t bytes_skipped = args[0]->data() - old_input.data();
227 *skipped_input = re2::StringPiece(old_input.data(), bytes_skipped);
228 }
229 return result;
230 }
231
232 const VariadicFunction3<bool,
233 re2::StringPiece*,
234 const re2::RE2&,
235 re2::StringPiece*,
236 re2::StringPiece,
237 &FindAndConsumeAndGetSkippedN>
238 FindAndConsumeAndGetSkipped = {};
239
46 } // namespace 240 } // namespace
47 241
48 AnonymizerTool::AnonymizerTool() 242 AnonymizerTool::AnonymizerTool()
49 : custom_patterns_(arraysize(kCustomPatterns)) {} 243 : custom_patterns_with_context_(arraysize(kCustomPatternsWithContext)),
244 custom_patterns_without_context_(
245 arraysize(kCustomPatternsWithoutContext)) {}
50 246
51 AnonymizerTool::~AnonymizerTool() {} 247 AnonymizerTool::~AnonymizerTool() {}
52 248
53 std::string AnonymizerTool::Anonymize(const std::string& input) { 249 std::string AnonymizerTool::Anonymize(const std::string& input) {
54 std::string anonymized = AnonymizeMACAddresses(input); 250 std::string anonymized = AnonymizeMACAddresses(input);
55 anonymized = AnonymizeCustomPatterns(std::move(anonymized)); 251 anonymized = AnonymizeCustomPatterns(std::move(anonymized));
56 return anonymized; 252 return anonymized;
57 } 253 }
58 254
255 RE2* AnonymizerTool::GetRegExp(const std::string& pattern) {
256 if (regexp_cache_.find(pattern) == regexp_cache_.end()) {
257 RE2::Options options;
258 // set_multiline of pcre is not supported by RE2, yet.
259 options.set_dot_nl(true); // Dot matches a new line.
260 scoped_ptr<RE2> re = make_scoped_ptr(new RE2(pattern, options));
261 DCHECK_EQ(re2::RE2::NoError, re->error_code())
262 << "Failed to parse:\n" << pattern << "\n" << re->error();
263 regexp_cache_[pattern] = std::move(re);
264 }
265 return regexp_cache_[pattern].get();
266 }
267
59 std::string AnonymizerTool::AnonymizeMACAddresses(const std::string& input) { 268 std::string AnonymizerTool::AnonymizeMACAddresses(const std::string& input) {
60 // This regular expression finds the next MAC address. It splits the data into 269 // This regular expression finds the next MAC address. It splits the data into
61 // a section preceding the MAC address, an OUI (Organizationally Unique 270 // an OUI (Organizationally Unique Identifier) part and a NIC (Network
62 // Identifier) part and a NIC (Network Interface Controller) specific part. 271 // Interface Controller) specific part.
63 272
64 RE2::Options options; 273 RE2* mac_re = GetRegExp(
65 // set_multiline of pcre is not supported by RE2, yet. 274 "([0-9a-fA-F][0-9a-fA-F]:"
66 options.set_dot_nl(true); // Dot matches a new line.
67 RE2 mac_re(
68 "(.*?)("
69 "[0-9a-fA-F][0-9a-fA-F]:"
70 "[0-9a-fA-F][0-9a-fA-F]:" 275 "[0-9a-fA-F][0-9a-fA-F]:"
71 "[0-9a-fA-F][0-9a-fA-F]):(" 276 "[0-9a-fA-F][0-9a-fA-F]):("
72 "[0-9a-fA-F][0-9a-fA-F]:" 277 "[0-9a-fA-F][0-9a-fA-F]:"
73 "[0-9a-fA-F][0-9a-fA-F]:" 278 "[0-9a-fA-F][0-9a-fA-F]:"
74 "[0-9a-fA-F][0-9a-fA-F])", 279 "[0-9a-fA-F][0-9a-fA-F])");
75 options);
76 280
77 std::string result; 281 std::string result;
78 result.reserve(input.size()); 282 result.reserve(input.size());
79 283
80 // Keep consuming, building up a result string as we go. 284 // Keep consuming, building up a result string as we go.
81 re2::StringPiece text(input); 285 re2::StringPiece text(input);
82 std::string pre_mac, oui, nic; 286 re2::StringPiece skipped;
83 while (re2::RE2::Consume(&text, mac_re, RE2::Arg(&pre_mac), RE2::Arg(&oui), 287 re2::StringPiece pre_mac, oui, nic;
84 RE2::Arg(&nic))) { 288 while (FindAndConsumeAndGetSkipped(&text, *mac_re, &skipped, &oui, &nic)) {
85 // Look up the MAC address in the hash. 289 // Look up the MAC address in the hash.
86 oui = base::ToLowerASCII(oui); 290 std::string oui_string = base::ToLowerASCII(oui.as_string());
87 nic = base::ToLowerASCII(nic); 291 std::string nic_string = base::ToLowerASCII(nic.as_string());
88 std::string mac = oui + ":" + nic; 292 std::string mac = oui_string + ":" + nic_string;
89 std::string replacement_mac = mac_addresses_[mac]; 293 std::string replacement_mac = mac_addresses_[mac];
90 if (replacement_mac.empty()) { 294 if (replacement_mac.empty()) {
91 // If not found, build up a replacement MAC address by generating a new 295 // If not found, build up a replacement MAC address by generating a new
92 // NIC part. 296 // NIC part.
93 int mac_id = mac_addresses_.size(); 297 int mac_id = mac_addresses_.size();
94 replacement_mac = base::StringPrintf( 298 replacement_mac = base::StringPrintf(
95 "%s:%02x:%02x:%02x", oui.c_str(), (mac_id & 0x00ff0000) >> 16, 299 "%s:%02x:%02x:%02x", oui_string.c_str(), (mac_id & 0x00ff0000) >> 16,
96 (mac_id & 0x0000ff00) >> 8, (mac_id & 0x000000ff)); 300 (mac_id & 0x0000ff00) >> 8, (mac_id & 0x000000ff));
97 mac_addresses_[mac] = replacement_mac; 301 mac_addresses_[mac] = replacement_mac;
98 } 302 }
99 303
100 result += pre_mac; 304 skipped.AppendToString(&result);
101 result += replacement_mac; 305 result += replacement_mac;
102 } 306 }
103 307
104 text.AppendToString(&result); 308 text.AppendToString(&result);
105 return result; 309 return result;
106 } 310 }
107 311
312
108 std::string AnonymizerTool::AnonymizeCustomPatterns(std::string input) { 313 std::string AnonymizerTool::AnonymizeCustomPatterns(std::string input) {
109 for (size_t i = 0; i < arraysize(kCustomPatterns); i++) { 314 for (size_t i = 0; i < arraysize(kCustomPatternsWithContext); i++) {
110 input = 315 input =
111 AnonymizeCustomPattern(input, kCustomPatterns[i], &custom_patterns_[i]); 316 AnonymizeCustomPatternWithContext(input, kCustomPatternsWithContext[i],
317 &custom_patterns_with_context_[i]);
318 }
319 for (size_t i = 0; i < arraysize(kCustomPatternsWithoutContext); i++) {
320 input = AnonymizeCustomPatternWithoutContext(
321 input, kCustomPatternsWithoutContext[i],
322 &custom_patterns_without_context_[i]);
112 } 323 }
113 return input; 324 return input;
114 } 325 }
115 326
116 // static 327 std::string AnonymizerTool::AnonymizeCustomPatternWithContext(
117 std::string AnonymizerTool::AnonymizeCustomPattern(
118 const std::string& input, 328 const std::string& input,
119 const std::string& pattern, 329 const std::string& pattern,
120 std::map<std::string, std::string>* identifier_space) { 330 std::map<std::string, std::string>* identifier_space) {
121 RE2::Options options; 331 RE2* re = GetRegExp(pattern);
122 // set_multiline of pcre is not supported by RE2, yet. 332 DCHECK_EQ(3, re->NumberOfCapturingGroups());
123 options.set_dot_nl(true); // Dot matches a new line.
124 RE2 re("(.*?)" + pattern, options);
125 DCHECK_EQ(4, re.NumberOfCapturingGroups());
126 333
127 std::string result; 334 std::string result;
128 result.reserve(input.size()); 335 result.reserve(input.size());
129 336
130 // Keep consuming, building up a result string as we go. 337 // Keep consuming, building up a result string as we go.
131 re2::StringPiece text(input); 338 re2::StringPiece text(input);
132 std::string pre_match, pre_matched_id, matched_id, post_matched_id; 339 re2::StringPiece skipped;
133 while (RE2::Consume(&text, re, RE2::Arg(&pre_match), 340 re2::StringPiece pre_match, pre_matched_id, matched_id, post_matched_id;
134 RE2::Arg(&pre_matched_id), RE2::Arg(&matched_id), 341 while (FindAndConsumeAndGetSkipped(&text, *re, &skipped, &pre_matched_id,
135 RE2::Arg(&post_matched_id))) { 342 &matched_id, &post_matched_id)) {
136 std::string replacement_id = (*identifier_space)[matched_id]; 343 std::string matched_id_as_string = matched_id.as_string();
344 std::string replacement_id = (*identifier_space)[matched_id_as_string];
137 if (replacement_id.empty()) { 345 if (replacement_id.empty()) {
138 replacement_id = base::IntToString(identifier_space->size()); 346 replacement_id = base::IntToString(identifier_space->size());
139 (*identifier_space)[matched_id] = replacement_id; 347 (*identifier_space)[matched_id_as_string] = replacement_id;
140 } 348 }
141 349
142 result += pre_match; 350 skipped.AppendToString(&result);
143 result += pre_matched_id; 351 pre_matched_id.AppendToString(&result);
144 result += replacement_id; 352 result += replacement_id;
145 result += post_matched_id; 353 post_matched_id.AppendToString(&result);
146 } 354 }
147 text.AppendToString(&result); 355 text.AppendToString(&result);
148 return result; 356 return result;
357 }
358
359 std::string AnonymizerTool::AnonymizeCustomPatternWithoutContext(
360 const std::string& input,
361 const CustomPatternWithoutContext& pattern,
362 std::map<std::string, std::string>* identifier_space) {
363 RE2* re = GetRegExp(pattern.pattern);
364 DCHECK_EQ(1, re->NumberOfCapturingGroups());
365
366 std::string result;
367 result.reserve(input.size());
368
369 // Keep consuming, building up a result string as we go.
370 re2::StringPiece text(input);
371 re2::StringPiece skipped;
372 re2::StringPiece matched_id;
373 while (FindAndConsumeAndGetSkipped(&text, *re, &skipped, &matched_id)) {
374 std::string matched_id_as_string = matched_id.as_string();
375 std::string replacement_id = (*identifier_space)[matched_id_as_string];
376 if (replacement_id.empty()) {
377 // The weird Uint64toString trick is because Windows does not like to deal
378 // with %zu and a size_t in printf, nor does it support %llu.
379 replacement_id = base::StringPrintf(
380 "<%s: %s>", pattern.alias,
381 base::Uint64ToString(identifier_space->size()).c_str());
382 (*identifier_space)[matched_id_as_string] = replacement_id;
383 }
384
385 skipped.AppendToString(&result);
386 result += replacement_id;
387 }
388 text.AppendToString(&result);
389 return result;
149 } 390 }
150 391
151 } // namespace feedback 392 } // namespace feedback
OLDNEW
« no previous file with comments | « components/feedback/anonymizer_tool.h ('k') | components/feedback/anonymizer_tool_unittest.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698