Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1448)

Side by Side Diff: components/feedback/anonymizer_tool.h

Issue 1543633003: Added anonymization patterns for URLs and email addresses (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@bug-567870-introduce-anonymizer
Patch Set: Hopefully final clarification Created 4 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright 2015 The Chromium Authors. All rights reserved. 1 // Copyright 2015 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #ifndef COMPONENTS_FEEDBACK_ANONYMIZER_TOOL_H_ 5 #ifndef COMPONENTS_FEEDBACK_ANONYMIZER_TOOL_H_
6 #define COMPONENTS_FEEDBACK_ANONYMIZER_TOOL_H_ 6 #define COMPONENTS_FEEDBACK_ANONYMIZER_TOOL_H_
7 7
8 #include <map> 8 #include <map>
9 #include <string> 9 #include <string>
10 #include <vector> 10 #include <vector>
11 11
12 #include <base/macros.h> 12 #include "base/macros.h"
13 #include "base/memory/scoped_ptr.h"
14
15 namespace re2 {
16 class RE2;
17 }
13 18
14 namespace feedback { 19 namespace feedback {
15 20
21 struct CustomPatternWithoutContext {
22 // A string literal used in anonymized tests. Matches to the |pattern| are
23 // replaced with <|alias|: 1>, <|alias|: 2>, ...
24 const char* alias;
25 // A RE2 regexp with exactly one capture group. Matches will be replaced by
26 // the alias reference described above.
27 const char* pattern;
28 };
29
16 class AnonymizerTool { 30 class AnonymizerTool {
17 public: 31 public:
18 AnonymizerTool(); 32 AnonymizerTool();
19 ~AnonymizerTool(); 33 ~AnonymizerTool();
20 34
21 // Returns an anonymized version of |input|. PII-sensitive data (such as MAC 35 // Returns an anonymized version of |input|. PII-sensitive data (such as MAC
22 // addresses) in |input| is replaced with unique identifiers. 36 // addresses) in |input| is replaced with unique identifiers.
23 std::string Anonymize(const std::string& input); 37 std::string Anonymize(const std::string& input);
24 38
25 private: 39 private:
26 friend class AnonymizerToolTest; 40 friend class AnonymizerToolTest;
27 41
42 re2::RE2* GetRegExp(const std::string& pattern);
43
28 std::string AnonymizeMACAddresses(const std::string& input); 44 std::string AnonymizeMACAddresses(const std::string& input);
29 std::string AnonymizeCustomPatterns(std::string input); 45 std::string AnonymizeCustomPatterns(std::string input);
30 static std::string AnonymizeCustomPattern( 46 std::string AnonymizeCustomPatternWithContext(
31 const std::string& input, 47 const std::string& input,
32 const std::string& pattern, 48 const std::string& pattern,
33 std::map<std::string, std::string>* identifier_space); 49 std::map<std::string, std::string>* identifier_space);
50 std::string AnonymizeCustomPatternWithoutContext(
51 const std::string& input,
52 const CustomPatternWithoutContext& pattern,
53 std::map<std::string, std::string>* identifier_space);
34 54
35 // Map of MAC addresses discovered in anonymized strings to anonymized 55 // Map of MAC addresses discovered in anonymized strings to anonymized
36 // representations. 11:22:33:44:55:66 gets anonymized to 11:22:33:00:00:01, 56 // representations. 11:22:33:44:55:66 gets anonymized to 11:22:33:00:00:01,
37 // where the first three bytes represent the manufacturer. The last three 57 // where the first three bytes represent the manufacturer. The last three
38 // bytes are used to distinguish different MAC addresses and are incremented 58 // bytes are used to distinguish different MAC addresses and are incremented
39 // for each newly discovered MAC address. 59 // for each newly discovered MAC address.
40 std::map<std::string, std::string> mac_addresses_; 60 std::map<std::string, std::string> mac_addresses_;
41 61
42 // Like mac addresses, identifiers in custom patterns are anonymized. 62 // Like mac addresses, identifiers in custom patterns are anonymized.
43 // custom_patterns_[i] contains a map of original identifier to anonymized 63 // custom_patterns_with_context_[i] contains a map of original identifier to
44 // identifier for custom pattern number i. 64 // anonymized identifier for custom pattern number i.
45 std::vector<std::map<std::string, std::string>> custom_patterns_; 65 std::vector<std::map<std::string, std::string>> custom_patterns_with_context_;
66 std::vector<std::map<std::string, std::string>>
67 custom_patterns_without_context_;
68
69 // Cache to prevent the repeated compilation of the same regular expression
70 // pattern. Key is the string representation of the RegEx.
71 std::map<std::string, scoped_ptr<re2::RE2>> regexp_cache_;
46 72
47 DISALLOW_COPY_AND_ASSIGN(AnonymizerTool); 73 DISALLOW_COPY_AND_ASSIGN(AnonymizerTool);
48 }; 74 };
49 75
50 } // namespace feedback 76 } // namespace feedback
51 77
52 #endif // COMPONENTS_FEEDBACK_ANONYMIZER_TOOL_H_ 78 #endif // COMPONENTS_FEEDBACK_ANONYMIZER_TOOL_H_
OLDNEW
« no previous file with comments | « no previous file | components/feedback/anonymizer_tool.cc » ('j') | components/feedback/anonymizer_tool.cc » ('J')

Powered by Google App Engine
This is Rietveld 408576698