OLD | NEW |
(Empty) | |
| 1 // Copyright 2016 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. |
| 4 |
| 5 #include "components/autofill/core/browser/address_rewriter.h" |
| 6 |
| 7 #include <memory> |
| 8 #include <unordered_map> |
| 9 |
| 10 #include "base/i18n/case_conversion.h" |
| 11 #include "base/memory/singleton.h" |
| 12 #include "base/strings/utf_string_conversions.h" |
| 13 #include "third_party/re2/src/re2/re2.h" |
| 14 |
| 15 namespace autofill { |
| 16 namespace { |
| 17 |
| 18 // Import in the internal rule table symbols. The data is defined in |
| 19 // components/autofill/core/browser/address_rewriter_rules.cc |
| 20 using internal::Rule; |
| 21 using internal::RegionInfo; |
| 22 using internal::kRuleTable; |
| 23 using internal::kRuleTableSize; |
| 24 |
| 25 // Aliases for the types used by the compiled rules cache. |
| 26 using CompiledRule = std::pair<std::unique_ptr<re2::RE2>, re2::StringPiece>; |
| 27 using CompiledRuleVector = std::vector<CompiledRule>; |
| 28 using CompiledRuleCache = std::unordered_map<std::string, CompiledRuleVector>; |
| 29 |
| 30 // Helper function to find the rules associated with |region|. Note that this |
| 31 // requires that kRuleTable be sorted by region. |
| 32 static const RegionInfo* GetRegionInfo(const base::StringPiece& region) { |
| 33 const RegionInfo* begin = kRuleTable; |
| 34 const RegionInfo* end = kRuleTable + kRuleTableSize; |
| 35 const RegionInfo* iter = std::lower_bound(begin, end, region); |
| 36 if (iter != end && region == iter->region) |
| 37 return iter; |
| 38 return nullptr; |
| 39 } |
| 40 |
| 41 // The cache of compiled string replacement rules, keyed by region. This class |
| 42 // is a singleton that compiles the rules for a given region the first time |
| 43 // they are requested. |
| 44 class Cache { |
| 45 public: |
| 46 // Return the singleton instance of the cache. |
| 47 static Cache* GetInstance() { return base::Singleton<Cache>::get(); } |
| 48 |
| 49 // If the rules for |region| have already been compiled and cached, return a |
| 50 // pointer to them. Otherwise, find the rules for |region| (returning nullptr |
| 51 // if there are no such rules exist), compile them, cache them, and return a |
| 52 // pointer to the cached rules. |
| 53 const CompiledRuleVector* GetRulesForRegion(const std::string& region) { |
| 54 // Take the lock so that we don't update the data cache concurrently. Note |
| 55 // that the returned data is const and can be concurrently accessed, just |
| 56 // not the data cache. |
| 57 base::AutoLock auto_lock(lock_); |
| 58 |
| 59 // If we find a cached set of rules, return a pointer to the data. |
| 60 CompiledRuleCache::iterator cache_iter = data_.find(region); |
| 61 if (cache_iter != data_.end()) |
| 62 return &cache_iter->second; |
| 63 |
| 64 // Cache miss. Look for the raw rules. If none, then return nullptr. |
| 65 const RegionInfo* region_info = GetRegionInfo(region); |
| 66 if (region_info == nullptr) |
| 67 return nullptr; |
| 68 |
| 69 // Add a new rule vector the the cache and populate it with compiled rules. |
| 70 re2::RE2::Options options; |
| 71 options.set_utf8(true); |
| 72 options.set_word_boundary(true); |
| 73 CompiledRuleVector& compiled_rules = data_[region]; |
| 74 compiled_rules.reserve(region_info->num_rules); |
| 75 for (size_t i = 0; i < region_info->num_rules; ++i) { |
| 76 const Rule& rule = region_info->rules[i]; |
| 77 std::unique_ptr<re2::RE2> pattern(new re2::RE2(rule.pattern, options)); |
| 78 re2::StringPiece rewrite(rule.rewrite); |
| 79 compiled_rules.emplace_back(std::move(pattern), std::move(rewrite)); |
| 80 } |
| 81 |
| 82 // Return a pointer to the data. |
| 83 return &compiled_rules; |
| 84 } |
| 85 |
| 86 private: |
| 87 Cache() {} |
| 88 |
| 89 // Synchronizes access to |data_|, ensuring that a given set of rules is |
| 90 // only compiled once. |
| 91 base::Lock lock_; |
| 92 |
| 93 // The cache of compiled rules, keyed by region. |
| 94 CompiledRuleCache data_; |
| 95 |
| 96 friend struct base::DefaultSingletonTraits<Cache>; |
| 97 DISALLOW_COPY_AND_ASSIGN(Cache); |
| 98 }; |
| 99 |
| 100 } // namespace |
| 101 |
| 102 AddressRewriter AddressRewriter::ForCountryCode( |
| 103 const base::string16& country_code) { |
| 104 const std::string region = |
| 105 base::UTF16ToUTF8(base::i18n::ToUpper(country_code)); |
| 106 const CompiledRuleVector* rules = |
| 107 Cache::GetInstance()->GetRulesForRegion(region); |
| 108 AddressRewriter rewriter; |
| 109 rewriter.impl_ = rules; |
| 110 return rewriter; |
| 111 } |
| 112 |
| 113 base::string16 AddressRewriter::Rewrite(const base::string16& text) const { |
| 114 if (impl_ == nullptr) |
| 115 return base::CollapseWhitespace(text, true); |
| 116 |
| 117 // Apply all of the string replacement rules. We don't have to worry about |
| 118 // whitespace during these passes because the patterns are all whitespace |
| 119 // tolerant regular expressions. |
| 120 std::string utf8_text = base::UTF16ToUTF8(text); |
| 121 for (const auto& rule : *static_cast<const CompiledRuleVector*>(impl_)) { |
| 122 RE2::GlobalReplace(&utf8_text, *rule.first, rule.second); |
| 123 } |
| 124 |
| 125 // Collapse whitespace before returning the final value. |
| 126 return base::UTF8ToUTF16(base::CollapseWhitespaceASCII(utf8_text, true)); |
| 127 } |
| 128 |
| 129 } // namespace autofill |
OLD | NEW |