Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(70)

Unified Diff: components/autofill/core/browser/address_rewriter.cc

Issue 2121253002: Embed address normalization rewriting rules. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: fix try bots Created 4 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: components/autofill/core/browser/address_rewriter.cc
diff --git a/components/autofill/core/browser/address_rewriter.cc b/components/autofill/core/browser/address_rewriter.cc
new file mode 100644
index 0000000000000000000000000000000000000000..c6cad09131dab31cd7939735209293d1809da0a3
--- /dev/null
+++ b/components/autofill/core/browser/address_rewriter.cc
@@ -0,0 +1,128 @@
+// Copyright 2016 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "components/autofill/core/browser/address_rewriter.h"
+
+#include <memory>
+#include <unordered_map>
+
+#include "base/i18n/case_conversion.h"
+#include "base/memory/singleton.h"
+#include "base/strings/utf_string_conversions.h"
+#include "third_party/re2/src/re2/re2.h"
+
+namespace autofill {
+namespace {
+
+// Import in the internal rule table symbols. The data is defined in
+// components/autofill/core/browser/address_rewriter_rules.cc
+using internal::Rule;
+using internal::RegionInfo;
+using internal::kRuleTable;
+using internal::kRuleTableSize;
+
+// Aliases for the types used by the compiled rules cache.
+using CompiledRule = std::pair<std::unique_ptr<re2::RE2>, re2::StringPiece>;
+using CompiledRuleVector = std::vector<CompiledRule>;
+using CompiledRuleCache = std::unordered_map<std::string, CompiledRuleVector>;
+
+// Helper function to find the rules associated with |region|. Note that this
+// requires that kRuleTable be sorted by region.
+static const RegionInfo* GetRegionInfo(const base::StringPiece& region) {
+ const RegionInfo* begin = kRuleTable;
+ const RegionInfo* end = kRuleTable + kRuleTableSize;
+ const RegionInfo* iter = std::lower_bound(begin, end, region);
+ if (iter != end && region == iter->region)
+ return iter;
+ return nullptr;
+}
+
+// The cache of compiled string replacement rules, keyed by region. This class
+// is a singleton that compiles the rules for a given region the first time
+// they are requested.
+class Cache {
+ public:
+ // Return the sigleton instance of the cache.
Mathieu 2016/07/06 17:50:46 nit:singleton
Roger McFarlane (Chromium) 2016/07/06 21:28:38 Done.
+ static Cache* GetInstance() { return base::Singleton<Cache>::get(); }
+
+ // If the rules for |region| have already been compiled and cached, return a
+ // pointer to them. Otherwise, find the rules for |region| (returning nullptr
+ // if there are no such rules exist), compile them, cache them, and return a
+ // pointer to the cached rules.
+ const CompiledRuleVector* GetRulesForRegion(const std::string& region) {
+ // Take the lock so that we don't update the data cache concurrently. Note
+ // that the returned data is const and can be concurrently accesses, just
Mathieu 2016/07/06 17:50:46 *accessed
Roger McFarlane (Chromium) 2016/07/06 21:28:38 Done.
+ // not the data cache.
+ base::AutoLock auto_lock(lock_);
+
+ // If we find a cached set of rules, return a pointer to the data.
+ CompiledRuleCache::iterator cache_iter = data_.find(region);
+ if (cache_iter != data_.end())
+ return &cache_iter->second;
+
+ // Cache miss. Look for the raw rules. If none, then return nullptr.
+ const RegionInfo* region_info = GetRegionInfo(region);
+ if (region_info == nullptr)
+ return nullptr;
+
+ // Add a new rule vector the the cache and populate it with compiled rules.
+ re2::RE2::Options options;
+ options.set_utf8(true);
+ options.set_word_boundary(true);
+ CompiledRuleVector& compiled_rules = data_[region];
+ compiled_rules.reserve(region_info->num_rules);
+ for (size_t i = 0; i < region_info->num_rules; ++i) {
+ const Rule& rule = region_info->rules[i];
+ std::unique_ptr<re2::RE2> pattern(new re2::RE2(rule.pattern, options));
+ re2::StringPiece rewrite(rule.rewrite);
+ compiled_rules.emplace_back(std::move(pattern), std::move(rewrite));
+ }
+
+ // Return a pointer to the data.
+ return &compiled_rules;
+ }
+
+ private:
+ Cache() {}
+
+ // Synchronizes access to |data_|, ensuring that a given set of rules is
+ // only compiled once.
+ base::Lock lock_;
+
+ // The cache of compiled rules, keyed by region.
+ CompiledRuleCache data_;
+
+ friend struct base::DefaultSingletonTraits<Cache>;
+ DISALLOW_COPY_AND_ASSIGN(Cache);
+};
+
+} // namespace
+
+AddressRewriter AddressRewriter::ForCountryCode(
+ const base::string16& country_code) {
+ const std::string region = UTF16ToUTF8(base::i18n::ToUpper(country_code));
Mathieu 2016/07/06 17:50:46 base::UTF16ToUTF8 ?
Roger McFarlane (Chromium) 2016/07/06 21:28:38 Done.
+ const CompiledRuleVector* rules =
+ Cache::GetInstance()->GetRulesForRegion(region);
+ AddressRewriter rewriter;
+ rewriter.impl_ = rules;
+ return rewriter;
+}
+
+base::string16 AddressRewriter::Rewrite(const base::string16& text) const {
+ if (impl_ == nullptr)
+ return base::CollapseWhitespace(text, true);
+
+ // Apply all of the string replacement rules. We don't have to worry about
+ // whitespace during these passes because the patterns are all whitespace
+ // tolerant regular expressions.
+ std::string utf8_text = base::UTF16ToUTF8(text);
+ for (const auto& rule : *reinterpret_cast<const CompiledRuleVector*>(impl_)) {
+ RE2::GlobalReplace(&utf8_text, *rule.first, rule.second);
+ }
+
+ // Collapse whitespace before returning the final value.
+ return base::UTF8ToUTF16(base::CollapseWhitespaceASCII(utf8_text, true));
+}
+
+} // namespace autofill

Powered by Google App Engine
This is Rietveld 408576698