OLD | NEW |
| (Empty) |
1 // Copyright 2016 The Chromium Authors. All rights reserved. | |
2 // Use of this source code is governed by a BSD-style license that can be | |
3 // found in the LICENSE file. | |
4 | |
5 #include "components/autofill/core/browser/address_rewriter.h" | |
6 | |
7 #include <memory> | |
8 #include <unordered_map> | |
9 | |
10 #include "base/i18n/case_conversion.h" | |
11 #include "base/memory/singleton.h" | |
12 #include "base/strings/utf_string_conversions.h" | |
13 #include "third_party/re2/src/re2/re2.h" | |
14 | |
15 namespace autofill { | |
16 namespace { | |
17 | |
18 // Import in the internal rule table symbols. The data is defined in | |
19 // components/autofill/core/browser/address_rewriter_rules.cc | |
20 using internal::Rule; | |
21 using internal::RegionInfo; | |
22 using internal::kRuleTable; | |
23 using internal::kRuleTableSize; | |
24 | |
25 // Aliases for the types used by the compiled rules cache. | |
26 using CompiledRule = std::pair<std::unique_ptr<re2::RE2>, re2::StringPiece>; | |
27 using CompiledRuleVector = std::vector<CompiledRule>; | |
28 using CompiledRuleCache = std::unordered_map<std::string, CompiledRuleVector>; | |
29 | |
30 // Helper function to find the rules associated with |region|. Note that this | |
31 // requires that kRuleTable be sorted by region. | |
32 static const RegionInfo* GetRegionInfo(const base::StringPiece& region) { | |
33 const RegionInfo* begin = kRuleTable; | |
34 const RegionInfo* end = kRuleTable + kRuleTableSize; | |
35 const RegionInfo* iter = std::lower_bound(begin, end, region); | |
36 if (iter != end && region == iter->region) | |
37 return iter; | |
38 return nullptr; | |
39 } | |
40 | |
41 // The cache of compiled string replacement rules, keyed by region. This class | |
42 // is a singleton that compiles the rules for a given region the first time | |
43 // they are requested. | |
44 class Cache { | |
45 public: | |
46 // Return the singleton instance of the cache. | |
47 static Cache* GetInstance() { return base::Singleton<Cache>::get(); } | |
48 | |
49 // If the rules for |region| have already been compiled and cached, return a | |
50 // pointer to them. Otherwise, find the rules for |region| (returning nullptr | |
51 // if there are no such rules exist), compile them, cache them, and return a | |
52 // pointer to the cached rules. | |
53 const CompiledRuleVector* GetRulesForRegion(const std::string& region) { | |
54 // Take the lock so that we don't update the data cache concurrently. Note | |
55 // that the returned data is const and can be concurrently accessed, just | |
56 // not the data cache. | |
57 base::AutoLock auto_lock(lock_); | |
58 | |
59 // If we find a cached set of rules, return a pointer to the data. | |
60 CompiledRuleCache::iterator cache_iter = data_.find(region); | |
61 if (cache_iter != data_.end()) | |
62 return &cache_iter->second; | |
63 | |
64 // Cache miss. Look for the raw rules. If none, then return nullptr. | |
65 const RegionInfo* region_info = GetRegionInfo(region); | |
66 if (region_info == nullptr) | |
67 return nullptr; | |
68 | |
69 // Add a new rule vector the the cache and populate it with compiled rules. | |
70 re2::RE2::Options options; | |
71 options.set_utf8(true); | |
72 options.set_word_boundary(true); | |
73 CompiledRuleVector& compiled_rules = data_[region]; | |
74 compiled_rules.reserve(region_info->num_rules); | |
75 for (size_t i = 0; i < region_info->num_rules; ++i) { | |
76 const Rule& rule = region_info->rules[i]; | |
77 std::unique_ptr<re2::RE2> pattern(new re2::RE2(rule.pattern, options)); | |
78 re2::StringPiece rewrite(rule.rewrite); | |
79 compiled_rules.emplace_back(std::move(pattern), std::move(rewrite)); | |
80 } | |
81 | |
82 // Return a pointer to the data. | |
83 return &compiled_rules; | |
84 } | |
85 | |
86 private: | |
87 Cache() {} | |
88 | |
89 // Synchronizes access to |data_|, ensuring that a given set of rules is | |
90 // only compiled once. | |
91 base::Lock lock_; | |
92 | |
93 // The cache of compiled rules, keyed by region. | |
94 CompiledRuleCache data_; | |
95 | |
96 friend struct base::DefaultSingletonTraits<Cache>; | |
97 DISALLOW_COPY_AND_ASSIGN(Cache); | |
98 }; | |
99 | |
100 } // namespace | |
101 | |
102 AddressRewriter AddressRewriter::ForCountryCode( | |
103 const base::string16& country_code) { | |
104 const std::string region = | |
105 base::UTF16ToUTF8(base::i18n::ToUpper(country_code)); | |
106 const CompiledRuleVector* rules = | |
107 Cache::GetInstance()->GetRulesForRegion(region); | |
108 AddressRewriter rewriter; | |
109 rewriter.impl_ = rules; | |
110 return rewriter; | |
111 } | |
112 | |
113 base::string16 AddressRewriter::Rewrite(const base::string16& text) const { | |
114 if (impl_ == nullptr) | |
115 return base::CollapseWhitespace(text, true); | |
116 | |
117 // Apply all of the string replacement rules. We don't have to worry about | |
118 // whitespace during these passes because the patterns are all whitespace | |
119 // tolerant regular expressions. | |
120 std::string utf8_text = base::UTF16ToUTF8(text); | |
121 for (const auto& rule : *static_cast<const CompiledRuleVector*>(impl_)) { | |
122 RE2::GlobalReplace(&utf8_text, *rule.first, rule.second); | |
123 } | |
124 | |
125 // Collapse whitespace before returning the final value. | |
126 return base::UTF8ToUTF16(base::CollapseWhitespaceASCII(utf8_text, true)); | |
127 } | |
128 | |
129 } // namespace autofill | |
OLD | NEW |