| Index: components/autofill/core/browser/address_rewriter.cc
|
| diff --git a/components/autofill/core/browser/address_rewriter.cc b/components/autofill/core/browser/address_rewriter.cc
|
| new file mode 100644
|
| index 0000000000000000000000000000000000000000..4cd255c807c0b46236c64788afebffd957af2c1b
|
| --- /dev/null
|
| +++ b/components/autofill/core/browser/address_rewriter.cc
|
| @@ -0,0 +1,129 @@
|
| +// Copyright 2016 The Chromium Authors. All rights reserved.
|
| +// Use of this source code is governed by a BSD-style license that can be
|
| +// found in the LICENSE file.
|
| +
|
| +#include "components/autofill/core/browser/address_rewriter.h"
|
| +
|
| +#include <memory>
|
| +#include <unordered_map>
|
| +
|
| +#include "base/i18n/case_conversion.h"
|
| +#include "base/memory/singleton.h"
|
| +#include "base/strings/utf_string_conversions.h"
|
| +#include "third_party/re2/src/re2/re2.h"
|
| +
|
| +namespace autofill {
|
| +namespace {
|
| +
|
| +// Import in the internal rule table symbols. The data is defined in
|
| +// components/autofill/core/browser/address_rewriter_rules.cc
|
| +using internal::Rule;
|
| +using internal::RegionInfo;
|
| +using internal::kRuleTable;
|
| +using internal::kRuleTableSize;
|
| +
|
| +// Aliases for the types used by the compiled rules cache.
|
| +using CompiledRule = std::pair<std::unique_ptr<re2::RE2>, re2::StringPiece>;
|
| +using CompiledRuleVector = std::vector<CompiledRule>;
|
| +using CompiledRuleCache = std::unordered_map<std::string, CompiledRuleVector>;
|
| +
|
| +// Helper function to find the rules associated with |region|. Note that this
|
| +// requires that kRuleTable be sorted by region.
|
| +static const RegionInfo* GetRegionInfo(const base::StringPiece& region) {
|
| + const RegionInfo* begin = kRuleTable;
|
| + const RegionInfo* end = kRuleTable + kRuleTableSize;
|
| + const RegionInfo* iter = std::lower_bound(begin, end, region);
|
| + if (iter != end && region == iter->region)
|
| + return iter;
|
| + return nullptr;
|
| +}
|
| +
|
| +// The cache of compiled string replacement rules, keyed by region. This class
|
| +// is a singleton that compiles the rules for a given region the first time
|
| +// they are requested.
|
| +class Cache {
|
| + public:
|
| + // Return the singleton instance of the cache.
|
| + static Cache* GetInstance() { return base::Singleton<Cache>::get(); }
|
| +
|
| + // If the rules for |region| have already been compiled and cached, return a
|
| + // pointer to them. Otherwise, find the rules for |region| (returning nullptr
|
| + // if there are no such rules exist), compile them, cache them, and return a
|
| + // pointer to the cached rules.
|
| + const CompiledRuleVector* GetRulesForRegion(const std::string& region) {
|
| + // Take the lock so that we don't update the data cache concurrently. Note
|
| + // that the returned data is const and can be concurrently accessed, just
|
| + // not the data cache.
|
| + base::AutoLock auto_lock(lock_);
|
| +
|
| + // If we find a cached set of rules, return a pointer to the data.
|
| + CompiledRuleCache::iterator cache_iter = data_.find(region);
|
| + if (cache_iter != data_.end())
|
| + return &cache_iter->second;
|
| +
|
| + // Cache miss. Look for the raw rules. If none, then return nullptr.
|
| + const RegionInfo* region_info = GetRegionInfo(region);
|
| + if (region_info == nullptr)
|
| + return nullptr;
|
| +
|
| + // Add a new rule vector the the cache and populate it with compiled rules.
|
| + re2::RE2::Options options;
|
| + options.set_utf8(true);
|
| + options.set_word_boundary(true);
|
| + CompiledRuleVector& compiled_rules = data_[region];
|
| + compiled_rules.reserve(region_info->num_rules);
|
| + for (size_t i = 0; i < region_info->num_rules; ++i) {
|
| + const Rule& rule = region_info->rules[i];
|
| + std::unique_ptr<re2::RE2> pattern(new re2::RE2(rule.pattern, options));
|
| + re2::StringPiece rewrite(rule.rewrite);
|
| + compiled_rules.emplace_back(std::move(pattern), std::move(rewrite));
|
| + }
|
| +
|
| + // Return a pointer to the data.
|
| + return &compiled_rules;
|
| + }
|
| +
|
| + private:
|
| + Cache() {}
|
| +
|
| + // Synchronizes access to |data_|, ensuring that a given set of rules is
|
| + // only compiled once.
|
| + base::Lock lock_;
|
| +
|
| + // The cache of compiled rules, keyed by region.
|
| + CompiledRuleCache data_;
|
| +
|
| + friend struct base::DefaultSingletonTraits<Cache>;
|
| + DISALLOW_COPY_AND_ASSIGN(Cache);
|
| +};
|
| +
|
| +} // namespace
|
| +
|
| +AddressRewriter AddressRewriter::ForCountryCode(
|
| + const base::string16& country_code) {
|
| + const std::string region =
|
| + base::UTF16ToUTF8(base::i18n::ToUpper(country_code));
|
| + const CompiledRuleVector* rules =
|
| + Cache::GetInstance()->GetRulesForRegion(region);
|
| + AddressRewriter rewriter;
|
| + rewriter.impl_ = rules;
|
| + return rewriter;
|
| +}
|
| +
|
| +base::string16 AddressRewriter::Rewrite(const base::string16& text) const {
|
| + if (impl_ == nullptr)
|
| + return base::CollapseWhitespace(text, true);
|
| +
|
| + // Apply all of the string replacement rules. We don't have to worry about
|
| + // whitespace during these passes because the patterns are all whitespace
|
| + // tolerant regular expressions.
|
| + std::string utf8_text = base::UTF16ToUTF8(text);
|
| + for (const auto& rule : *static_cast<const CompiledRuleVector*>(impl_)) {
|
| + RE2::GlobalReplace(&utf8_text, *rule.first, rule.second);
|
| + }
|
| +
|
| + // Collapse whitespace before returning the final value.
|
| + return base::UTF8ToUTF16(base::CollapseWhitespaceASCII(utf8_text, true));
|
| +}
|
| +
|
| +} // namespace autofill
|
|
|