| OLD | NEW |
| 1 // Copyright 2015 The Chromium Authors. All rights reserved. | 1 // Copyright 2015 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "components/feedback/anonymizer_tool.h" | 5 #include "components/feedback/anonymizer_tool.h" |
| 6 | 6 |
| 7 #include <utility> | 7 #include <utility> |
| 8 | 8 |
| 9 #include "base/memory/ptr_util.h" | 9 #include "base/memory/ptr_util.h" |
| 10 #include "base/strings/string_number_conversions.h" | 10 #include "base/strings/string_number_conversions.h" |
| (...skipping 147 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 158 #define IRI SCHEME ":" IHIER_PART OPT_NCG("\\?" IQUERY) OPT_NCG("#" IFRAGMENT) | 158 #define IRI SCHEME ":" IHIER_PART OPT_NCG("\\?" IQUERY) OPT_NCG("#" IFRAGMENT) |
| 159 | 159 |
| 160 #define IRI_REFERENCE NCG(IRI "|" IRELATIVE_REF) | 160 #define IRI_REFERENCE NCG(IRI "|" IRELATIVE_REF) |
| 161 | 161 |
| 162 // TODO(battre): Use http://tools.ietf.org/html/rfc5322 to represent email | 162 // TODO(battre): Use http://tools.ietf.org/html/rfc5322 to represent email |
| 163 // addresses. Capture names as well ("First Lastname" <foo@bar.com>). | 163 // addresses. Capture names as well ("First Lastname" <foo@bar.com>). |
| 164 | 164 |
| 165 // The |kCustomPatternWithoutContext| array defines further patterns to match | 165 // The |kCustomPatternWithoutContext| array defines further patterns to match |
| 166 // and anonymize. Each pattern consists of a single capturing group. | 166 // and anonymize. Each pattern consists of a single capturing group. |
| 167 CustomPatternWithoutContext kCustomPatternsWithoutContext[] = { | 167 CustomPatternWithoutContext kCustomPatternsWithoutContext[] = { |
| 168 {"URL", "(?i)(" IRI ")"}, | 168 {"URL", "(?i)(" IRI ")"}, |
| 169 // Email Addresses need to come after URLs because they can be part | 169 // Email Addresses need to come after URLs because they can be part |
| 170 // of a query parameter. | 170 // of a query parameter. |
| 171 {"email", "(?i)([0-9a-z._%+-]+@[a-z0-9.-]+\\.[a-z]{2,6})"}, | 171 {"email", "(?i)([0-9a-z._%+-]+@[a-z0-9.-]+\\.[a-z]{2,6})"}, |
| 172 // IP filter rules need to come after URLs so that they don't disturb the | 172 // IP filter rules need to come after URLs so that they don't disturb the |
| 173 // URL pattern in case the IP address is part of a URL. | 173 // URL pattern in case the IP address is part of a URL. |
| 174 {"IPv4", "(?i)(" IPV4ADDRESS ")"}, | 174 {"IPv4", "(?i)(" IPV4ADDRESS ")"}, |
| 175 {"IPv6", "(?i)(" IPV6ADDRESS ")"}, | 175 {"IPv6", "(?i)(" IPV6ADDRESS ")"}, |
| 176 // Universal Unique Identifiers (UUIDs). |
| 177 {"UUID", |
| 178 "(?i)([0-9a-zA-Z]{8}-[0-9a-zA-Z]{4}-[0-9a-zA-Z]{4}-[0-9a-zA-Z]{4}-" |
| 179 "[0-9a-zA-Z]{12})"}, |
| 176 }; | 180 }; |
| 177 | 181 |
| 178 // Like RE2's FindAndConsume, searches for the first occurrence of |pattern| in | 182 // Like RE2's FindAndConsume, searches for the first occurrence of |pattern| in |
| 179 // |input| and consumes the bytes until the end of the pattern matching. Unlike | 183 // |input| and consumes the bytes until the end of the pattern matching. Unlike |
| 180 // FindAndConsume, the bytes skipped before the match of |pattern| are stored | 184 // FindAndConsume, the bytes skipped before the match of |pattern| are stored |
| 181 // in |skipped_input|. |args| needs to contain at least one element. | 185 // in |skipped_input|. |args| needs to contain at least one element. |
| 182 // Returns whether a match was found. | 186 // Returns whether a match was found. |
| 183 // | 187 // |
| 184 // Example: input = "aaabbbc", pattern = "(b+)" leads to skipped_input = "aaa", | 188 // Example: input = "aaabbbc", pattern = "(b+)" leads to skipped_input = "aaa", |
| 185 // args[0] = "bbb", and the beginning input is moved to the right so that it | 189 // args[0] = "bbb", and the beginning input is moved to the right so that it |
| (...skipping 179 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 365 } | 369 } |
| 366 | 370 |
| 367 skipped.AppendToString(&result); | 371 skipped.AppendToString(&result); |
| 368 result += replacement_id; | 372 result += replacement_id; |
| 369 } | 373 } |
| 370 text.AppendToString(&result); | 374 text.AppendToString(&result); |
| 371 return result; | 375 return result; |
| 372 } | 376 } |
| 373 | 377 |
| 374 } // namespace feedback | 378 } // namespace feedback |
| OLD | NEW |