OLD | NEW |
1 // Copyright 2015 The Chromium Authors. All rights reserved. | 1 // Copyright 2015 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #ifndef COMPONENTS_FEEDBACK_ANONYMIZER_TOOL_H_ | 5 #ifndef COMPONENTS_FEEDBACK_ANONYMIZER_TOOL_H_ |
6 #define COMPONENTS_FEEDBACK_ANONYMIZER_TOOL_H_ | 6 #define COMPONENTS_FEEDBACK_ANONYMIZER_TOOL_H_ |
7 | 7 |
8 #include <map> | 8 #include <map> |
9 #include <string> | 9 #include <string> |
10 #include <vector> | 10 #include <vector> |
11 | 11 |
12 #include <base/macros.h> | 12 #include "base/macros.h" |
| 13 #include "base/memory/scoped_ptr.h" |
| 14 |
| 15 namespace re2 { |
| 16 class RE2; |
| 17 } |
13 | 18 |
14 namespace feedback { | 19 namespace feedback { |
15 | 20 |
| 21 struct CustomPatternWithoutContext { |
| 22 // A string literal used in anonymized tests. Matches to the |pattern| are |
| 23 // replaced with <|alias|: 1>, <|alias|: 2>, ... |
| 24 const char* alias; |
| 25 // A RE2 regexp with exactly one capture group. Matches will be replaced by |
| 26 // the alias reference described above. |
| 27 const char* pattern; |
| 28 }; |
| 29 |
16 class AnonymizerTool { | 30 class AnonymizerTool { |
17 public: | 31 public: |
18 AnonymizerTool(); | 32 AnonymizerTool(); |
19 ~AnonymizerTool(); | 33 ~AnonymizerTool(); |
20 | 34 |
21 // Returns an anonymized version of |input|. PII-sensitive data (such as MAC | 35 // Returns an anonymized version of |input|. PII-sensitive data (such as MAC |
22 // addresses) in |input| is replaced with unique identifiers. | 36 // addresses) in |input| is replaced with unique identifiers. |
23 std::string Anonymize(const std::string& input); | 37 std::string Anonymize(const std::string& input); |
24 | 38 |
25 private: | 39 private: |
26 friend class AnonymizerToolTest; | 40 friend class AnonymizerToolTest; |
27 | 41 |
| 42 re2::RE2* GetRegExp(const std::string& pattern); |
| 43 |
28 std::string AnonymizeMACAddresses(const std::string& input); | 44 std::string AnonymizeMACAddresses(const std::string& input); |
29 std::string AnonymizeCustomPatterns(std::string input); | 45 std::string AnonymizeCustomPatterns(std::string input); |
30 static std::string AnonymizeCustomPattern( | 46 std::string AnonymizeCustomPatternWithContext( |
31 const std::string& input, | 47 const std::string& input, |
32 const std::string& pattern, | 48 const std::string& pattern, |
33 std::map<std::string, std::string>* identifier_space); | 49 std::map<std::string, std::string>* identifier_space); |
| 50 std::string AnonymizeCustomPatternWithoutContext( |
| 51 const std::string& input, |
| 52 const CustomPatternWithoutContext& pattern, |
| 53 std::map<std::string, std::string>* identifier_space); |
34 | 54 |
35 // Map of MAC addresses discovered in anonymized strings to anonymized | 55 // Map of MAC addresses discovered in anonymized strings to anonymized |
36 // representations. 11:22:33:44:55:66 gets anonymized to 11:22:33:00:00:01, | 56 // representations. 11:22:33:44:55:66 gets anonymized to 11:22:33:00:00:01, |
37 // where the first three bytes represent the manufacturer. The last three | 57 // where the first three bytes represent the manufacturer. The last three |
38 // bytes are used to distinguish different MAC addresses and are incremented | 58 // bytes are used to distinguish different MAC addresses and are incremented |
39 // for each newly discovered MAC address. | 59 // for each newly discovered MAC address. |
40 std::map<std::string, std::string> mac_addresses_; | 60 std::map<std::string, std::string> mac_addresses_; |
41 | 61 |
42 // Like mac addresses, identifiers in custom patterns are anonymized. | 62 // Like mac addresses, identifiers in custom patterns are anonymized. |
43 // custom_patterns_[i] contains a map of original identifier to anonymized | 63 // custom_patterns_with_context_[i] contains a map of original identifier to |
44 // identifier for custom pattern number i. | 64 // anonymized identifier for custom pattern number i. |
45 std::vector<std::map<std::string, std::string>> custom_patterns_; | 65 std::vector<std::map<std::string, std::string>> custom_patterns_with_context_; |
| 66 std::vector<std::map<std::string, std::string>> |
| 67 custom_patterns_without_context_; |
| 68 |
| 69 // Cache to prevent the repeated compilation of the same regular expression |
| 70 // pattern. Key is the string representation of the RegEx. |
| 71 std::map<std::string, scoped_ptr<re2::RE2>> regexp_cache_; |
46 | 72 |
47 DISALLOW_COPY_AND_ASSIGN(AnonymizerTool); | 73 DISALLOW_COPY_AND_ASSIGN(AnonymizerTool); |
48 }; | 74 }; |
49 | 75 |
50 } // namespace feedback | 76 } // namespace feedback |
51 | 77 |
52 #endif // COMPONENTS_FEEDBACK_ANONYMIZER_TOOL_H_ | 78 #endif // COMPONENTS_FEEDBACK_ANONYMIZER_TOOL_H_ |
OLD | NEW |