| OLD | NEW |
| 1 // Copyright 2015 The Chromium Authors. All rights reserved. | 1 // Copyright 2015 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #ifndef COMPONENTS_FEEDBACK_ANONYMIZER_TOOL_H_ | 5 #ifndef COMPONENTS_FEEDBACK_ANONYMIZER_TOOL_H_ |
| 6 #define COMPONENTS_FEEDBACK_ANONYMIZER_TOOL_H_ | 6 #define COMPONENTS_FEEDBACK_ANONYMIZER_TOOL_H_ |
| 7 | 7 |
| 8 #include <map> | 8 #include <map> |
| 9 #include <string> | 9 #include <string> |
| 10 #include <vector> | 10 #include <vector> |
| 11 | 11 |
| 12 #include <base/macros.h> | 12 #include "base/macros.h" |
| 13 #include "base/memory/scoped_ptr.h" |
| 14 |
| 15 namespace re2 { |
| 16 class RE2; |
| 17 } |
| 13 | 18 |
| 14 namespace feedback { | 19 namespace feedback { |
| 15 | 20 |
| 21 struct CustomPatternWithoutContext { |
| 22 // A string literal used in anonymized tests. Matches to the |pattern| are |
| 23 // replaced with <|alias|: 1>, <|alias|: 2>, ... |
| 24 const char* alias; |
| 25 // A RE2 regexp with exactly one capture group. Matches will be replaced by |
| 26 // the alias reference described above. |
| 27 const char* pattern; |
| 28 }; |
| 29 |
| 16 class AnonymizerTool { | 30 class AnonymizerTool { |
| 17 public: | 31 public: |
| 18 AnonymizerTool(); | 32 AnonymizerTool(); |
| 19 ~AnonymizerTool(); | 33 ~AnonymizerTool(); |
| 20 | 34 |
| 21 // Returns an anonymized version of |input|. PII-sensitive data (such as MAC | 35 // Returns an anonymized version of |input|. PII-sensitive data (such as MAC |
| 22 // addresses) in |input| is replaced with unique identifiers. | 36 // addresses) in |input| is replaced with unique identifiers. |
| 23 std::string Anonymize(const std::string& input); | 37 std::string Anonymize(const std::string& input); |
| 24 | 38 |
| 25 private: | 39 private: |
| 26 friend class AnonymizerToolTest; | 40 friend class AnonymizerToolTest; |
| 27 | 41 |
| 42 re2::RE2* GetRegExp(const std::string& pattern); |
| 43 |
| 28 std::string AnonymizeMACAddresses(const std::string& input); | 44 std::string AnonymizeMACAddresses(const std::string& input); |
| 29 std::string AnonymizeCustomPatterns(std::string input); | 45 std::string AnonymizeCustomPatterns(std::string input); |
| 30 static std::string AnonymizeCustomPattern( | 46 std::string AnonymizeCustomPatternWithContext( |
| 31 const std::string& input, | 47 const std::string& input, |
| 32 const std::string& pattern, | 48 const std::string& pattern, |
| 33 std::map<std::string, std::string>* identifier_space); | 49 std::map<std::string, std::string>* identifier_space); |
| 50 std::string AnonymizeCustomPatternWithoutContext( |
| 51 const std::string& input, |
| 52 const CustomPatternWithoutContext& pattern, |
| 53 std::map<std::string, std::string>* identifier_space); |
| 34 | 54 |
| 35 // Map of MAC addresses discovered in anonymized strings to anonymized | 55 // Map of MAC addresses discovered in anonymized strings to anonymized |
| 36 // representations. 11:22:33:44:55:66 gets anonymized to 11:22:33:00:00:01, | 56 // representations. 11:22:33:44:55:66 gets anonymized to 11:22:33:00:00:01, |
| 37 // where the first three bytes represent the manufacturer. The last three | 57 // where the first three bytes represent the manufacturer. The last three |
| 38 // bytes are used to distinguish different MAC addresses and are incremented | 58 // bytes are used to distinguish different MAC addresses and are incremented |
| 39 // for each newly discovered MAC address. | 59 // for each newly discovered MAC address. |
| 40 std::map<std::string, std::string> mac_addresses_; | 60 std::map<std::string, std::string> mac_addresses_; |
| 41 | 61 |
| 42 // Like mac addresses, identifiers in custom patterns are anonymized. | 62 // Like mac addresses, identifiers in custom patterns are anonymized. |
| 43 // custom_patterns_[i] contains a map of original identifier to anonymized | 63 // custom_patterns_with_context_[i] contains a map of original identifier to |
| 44 // identifier for custom pattern number i. | 64 // anonymized identifier for custom pattern number i. |
| 45 std::vector<std::map<std::string, std::string>> custom_patterns_; | 65 std::vector<std::map<std::string, std::string>> custom_patterns_with_context_; |
| 66 std::vector<std::map<std::string, std::string>> |
| 67 custom_patterns_without_context_; |
| 68 |
| 69 // Cache to prevent the repeated compilation of the same regular expression |
| 70 // pattern. Key is the string representation of the RegEx. |
| 71 std::map<std::string, scoped_ptr<re2::RE2>> regexp_cache_; |
| 46 | 72 |
| 47 DISALLOW_COPY_AND_ASSIGN(AnonymizerTool); | 73 DISALLOW_COPY_AND_ASSIGN(AnonymizerTool); |
| 48 }; | 74 }; |
| 49 | 75 |
| 50 } // namespace feedback | 76 } // namespace feedback |
| 51 | 77 |
| 52 #endif // COMPONENTS_FEEDBACK_ANONYMIZER_TOOL_H_ | 78 #endif // COMPONENTS_FEEDBACK_ANONYMIZER_TOOL_H_ |
| OLD | NEW |