Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1)

Side by Side Diff: components/feedback/anonymizer_tool.cc

Issue 1530403003: Add anonymizer tool (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Rebase to ToT Created 4 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « components/feedback/anonymizer_tool.h ('k') | components/feedback/anonymizer_tool_unittest.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 // Copyright 2015 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "components/feedback/anonymizer_tool.h"
6
7 #include <base/strings/string_number_conversions.h>
8 #include <base/strings/string_util.h>
9 #include <base/strings/stringprintf.h>
10
11 #include "third_party/re2/src/re2/re2.h"
12
13 using re2::RE2;
14
15 namespace feedback {
16
17 namespace {
18
19 // The |kCustomPatterns| array defines patterns to match and anonymize. Each
20 // pattern needs to define three capturing parentheses groups:
21 //
22 // - a group for the pattern before the identifier to be anonymized;
23 // - a group for the identifier to be anonymized;
24 // - a group for the pattern after the identifier to be anonymized.
25 //
26 // Every matched identifier (in the context of the whole pattern) is anonymized
27 // by replacing it with an incremental instance identifier. Every different
28 // pattern defines a separate instance identifier space. See the unit test for
29 // AnonymizerTool::AnonymizeCustomPattern for pattern anonymization examples.
30 //
31 // Useful regular expression syntax:
32 //
33 // +? is a non-greedy (lazy) +.
34 // \b matches a word boundary.
35 // (?i) turns on case insensitivy for the remainder of the regex.
36 // (?-s) turns off "dot matches newline" for the remainder of the regex.
37 // (?:regex) denotes non-capturing parentheses group.
38 const char* kCustomPatterns[] = {
39 "(\\bCell ID: ')([0-9a-fA-F]+)(')", // ModemManager
40 "(\\bLocation area code: ')([0-9a-fA-F]+)(')", // ModemManager
41 "(?i-s)(\\bssid[= ]')(.+)(')", // wpa_supplicant
42 "(?-s)(\\bSSID - hexdump\\(len=[0-9]+\\): )(.+)()", // wpa_supplicant
43 "(?-s)(\\[SSID=)(.+?)(\\])", // shill
44 };
45
46 } // namespace
47
48 AnonymizerTool::AnonymizerTool()
49 : custom_patterns_(arraysize(kCustomPatterns)) {}
50
51 AnonymizerTool::~AnonymizerTool() {}
52
53 std::string AnonymizerTool::Anonymize(const std::string& input) {
54 std::string anonymized = AnonymizeMACAddresses(input);
55 anonymized = AnonymizeCustomPatterns(std::move(anonymized));
56 return anonymized;
57 }
58
59 std::string AnonymizerTool::AnonymizeMACAddresses(const std::string& input) {
60 // This regular expression finds the next MAC address. It splits the data into
61 // a section preceding the MAC address, an OUI (Organizationally Unique
62 // Identifier) part and a NIC (Network Interface Controller) specific part.
63
64 RE2::Options options;
65 // set_multiline of pcre is not supported by RE2, yet.
66 options.set_dot_nl(true); // Dot matches a new line.
67 RE2 mac_re(
68 "(.*?)("
69 "[0-9a-fA-F][0-9a-fA-F]:"
70 "[0-9a-fA-F][0-9a-fA-F]:"
71 "[0-9a-fA-F][0-9a-fA-F]):("
72 "[0-9a-fA-F][0-9a-fA-F]:"
73 "[0-9a-fA-F][0-9a-fA-F]:"
74 "[0-9a-fA-F][0-9a-fA-F])",
75 options);
76
77 std::string result;
78 result.reserve(input.size());
79
80 // Keep consuming, building up a result string as we go.
81 re2::StringPiece text(input);
82 std::string pre_mac, oui, nic;
83 while (re2::RE2::Consume(&text, mac_re, RE2::Arg(&pre_mac), RE2::Arg(&oui),
84 RE2::Arg(&nic))) {
85 // Look up the MAC address in the hash.
86 oui = base::ToLowerASCII(oui);
87 nic = base::ToLowerASCII(nic);
88 std::string mac = oui + ":" + nic;
89 std::string replacement_mac = mac_addresses_[mac];
90 if (replacement_mac.empty()) {
91 // If not found, build up a replacement MAC address by generating a new
92 // NIC part.
93 int mac_id = mac_addresses_.size();
94 replacement_mac = base::StringPrintf(
95 "%s:%02x:%02x:%02x", oui.c_str(), (mac_id & 0x00ff0000) >> 16,
96 (mac_id & 0x0000ff00) >> 8, (mac_id & 0x000000ff));
97 mac_addresses_[mac] = replacement_mac;
98 }
99
100 result += pre_mac;
101 result += replacement_mac;
102 }
103
104 text.AppendToString(&result);
105 return result;
106 }
107
108 std::string AnonymizerTool::AnonymizeCustomPatterns(std::string input) {
109 for (size_t i = 0; i < arraysize(kCustomPatterns); i++) {
110 input =
111 AnonymizeCustomPattern(input, kCustomPatterns[i], &custom_patterns_[i]);
112 }
113 return input;
114 }
115
116 // static
117 std::string AnonymizerTool::AnonymizeCustomPattern(
118 const std::string& input,
119 const std::string& pattern,
120 std::map<std::string, std::string>* identifier_space) {
121 RE2::Options options;
122 // set_multiline of pcre is not supported by RE2, yet.
123 options.set_dot_nl(true); // Dot matches a new line.
124 RE2 re("(.*?)" + pattern, options);
125 DCHECK_EQ(4, re.NumberOfCapturingGroups());
126
127 std::string result;
128 result.reserve(input.size());
129
130 // Keep consuming, building up a result string as we go.
131 re2::StringPiece text(input);
132 std::string pre_match, pre_matched_id, matched_id, post_matched_id;
133 while (RE2::Consume(&text, re, RE2::Arg(&pre_match),
134 RE2::Arg(&pre_matched_id), RE2::Arg(&matched_id),
135 RE2::Arg(&post_matched_id))) {
136 std::string replacement_id = (*identifier_space)[matched_id];
137 if (replacement_id.empty()) {
138 replacement_id = base::IntToString(identifier_space->size());
139 (*identifier_space)[matched_id] = replacement_id;
140 }
141
142 result += pre_match;
143 result += pre_matched_id;
144 result += replacement_id;
145 result += post_matched_id;
146 }
147 text.AppendToString(&result);
148 return result;
149 }
150
151 } // namespace feedback
OLDNEW
« no previous file with comments | « components/feedback/anonymizer_tool.h ('k') | components/feedback/anonymizer_tool_unittest.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698