chrome/browser/ui/app_list/search/tokenized_string_match.cc - Issue 522683003: Move basic string operations to ui/app_list/search

Unified Diff: chrome/browser/ui/app_list/search/tokenized_string_match.cc

Issue 522683003: Move basic string operations to ui/app_list/search (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master

Patch Set: gn Created 6 years, 3 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

« no previous file with comments | « chrome/browser/ui/app_list/search/tokenized_string_match.h ('k') | chrome/browser/ui/app_list/search/tokenized_string_match_unittest.cc » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

Index: chrome/browser/ui/app_list/search/tokenized_string_match.cc

diff --git a/chrome/browser/ui/app_list/search/tokenized_string_match.cc b/chrome/browser/ui/app_list/search/tokenized_string_match.cc

deleted file mode 100644

index e63a6d92eac5b8f7956d23eae10ce63886737fcb..0000000000000000000000000000000000000000

--- a/chrome/browser/ui/app_list/search/tokenized_string_match.cc

+++ /dev/null

@@ -1,236 +0,0 @@

-// Use of this source code is governed by a BSD-style license that can be

-// found in the LICENSE file.

-#include "chrome/browser/ui/app_list/search/tokenized_string_match.h"

-#include "base/i18n/string_search.h"

-#include "base/logging.h"

-#include "chrome/browser/ui/app_list/search/tokenized_string_char_iterator.h"

-namespace app_list {

-namespace {

-// The factors below are applied when the current char of query matches

-// the current char of the text to be matched. Different factors are chosen

-// based on where the match happens. kIsPrefixMultiplier is used when the

-// matched portion is a prefix of both the query and the text, which implies

-// that the matched chars are at the same position in query and text. This is

-// the most preferred case thus it has the highest score. When the current char

-// of the query and the text does not match, the algorithm moves to the next

-// token in the text and try to match from there. kIsFrontOfWordMultipler will

-// be used if the first char of the token matches the current char of the query.

-// Otherwise, the match is considered as weak and kIsWeakHitMultiplier is

-// used.

-// Examples:

-// Suppose the text to be matched is 'Google Chrome'.

-// Query 'go' would yield kIsPrefixMultiplier for each char.

-// Query 'gc' would use kIsPrefixMultiplier for 'g' and

-// kIsFrontOfWordMultipler for 'c'.

-// Query 'ch' would use kIsFrontOfWordMultipler for 'c' and

-// kIsWeakHitMultiplier for 'h'.

-// Query 'oo' does not match any prefix and would use the substring match

-// fallback, thus kIsSubstringMultiplier is used for each char.

-const double kIsPrefixMultiplier = 1.0;

-const double kIsFrontOfWordMultipler = 0.8;

-const double kIsWeakHitMultiplier = 0.6;

-const double kIsSubstringMultiplier = 0.4;

-// A relevance score that represents no match.

-const double kNoMatchScore = 0.0;

-// PrefixMatcher matches the chars of a given query as prefix of tokens in

-// a given text or as prefix of the acronyms of those text tokens.

-class PrefixMatcher {

- public:

- PrefixMatcher(const TokenizedString& query,

- const TokenizedString& text)

- : query_iter_(query),

- text_iter_(text),

- current_match_(gfx::Range::InvalidRange()),

- current_relevance_(kNoMatchScore) {

- }

- // Invokes RunMatch to perform prefix match. Use |states_| as a stack to

- // perform DFS (depth first search) so that all possible matches are

- // attempted. Stops on the first full match and returns true. Otherwise,

- // returns false to indicate no match.

- bool Match() {

- while (!RunMatch()) {

- // No match found and no more states to try. Bail out.

- if (states_.empty()) {

- current_relevance_ = kNoMatchScore;

- current_hits_.clear();

- return false;

- }

- PopState();

- // Skip restored match to try other possibilites.

- AdvanceToNextTextToken();

- }

- if (current_match_.IsValid())

- current_hits_.push_back(current_match_);

- return true;

- }

- double relevance() const { return current_relevance_; }

- const TokenizedStringMatch::Hits& hits() const { return current_hits_; }

- private:

- // Context record of a match.

- struct State {

- State() : relevance(kNoMatchScore) {}

- State(double relevance,

- const gfx::Range& current_match,

- const TokenizedStringMatch::Hits& hits,

- const TokenizedStringCharIterator& query_iter,

- const TokenizedStringCharIterator& text_iter)

- : relevance(relevance),

- current_match(current_match),

- hits(hits.begin(), hits.end()),

- query_iter_state(query_iter.GetState()),

- text_iter_state(text_iter.GetState()) {}

- // The current score of the processed query chars.

- double relevance;

- // Current matching range.

- gfx::Range current_match;

- // Completed matching ranges of the processed query chars.

- TokenizedStringMatch::Hits hits;

- // States of the processed query and text chars.

- TokenizedStringCharIterator::State query_iter_state;

- TokenizedStringCharIterator::State text_iter_state;

- };

- typedef std::vector<State> States;

- // Match chars from the query and text one by one. For each matching char,

- // calculate relevance and matching ranges. And the current stats is

- // recorded so that the match could be skipped later to try other

- // possiblities. Repeat until any of the iterators run out. Return true if

- // query iterator runs out, i.e. all chars in query are matched.

- bool RunMatch() {

- while (!query_iter_.end() && !text_iter_.end()) {

- if (query_iter_.Get() == text_iter_.Get()) {

- PushState();

- if (query_iter_.GetArrayPos() == text_iter_.GetArrayPos())

- current_relevance_ += kIsPrefixMultiplier;

- else if (text_iter_.IsFirstCharOfToken())

- current_relevance_ += kIsFrontOfWordMultipler;

- else

- current_relevance_ += kIsWeakHitMultiplier;

- if (!current_match_.IsValid())

- current_match_.set_start(text_iter_.GetArrayPos());

- current_match_.set_end(text_iter_.GetArrayPos() +

- text_iter_.GetCharSize());

- query_iter_.NextChar();

- text_iter_.NextChar();

- } else {

- AdvanceToNextTextToken();

- }

- return query_iter_.end();

- }

- // Skip to the next text token and close current match. Invoked when a

- // mismatch happens or to skip a restored match.

- void AdvanceToNextTextToken() {

- if (current_match_.IsValid()) {

- current_hits_.push_back(current_match_);

- current_match_ = gfx::Range::InvalidRange();

- }

- text_iter_.NextToken();

- }

- void PushState() {

- states_.push_back(State(current_relevance_,

- current_match_,

- current_hits_,

- query_iter_,

- text_iter_));

- }

- void PopState() {

- DCHECK(!states_.empty());

- State& last_match = states_.back();

- current_relevance_ = last_match.relevance;

- current_match_ = last_match.current_match;

- current_hits_.swap(last_match.hits);

- query_iter_.SetState(last_match.query_iter_state);

- text_iter_.SetState(last_match.text_iter_state);

- states_.pop_back();

- }

- TokenizedStringCharIterator query_iter_;

- TokenizedStringCharIterator text_iter_;

- States states_;

- gfx::Range current_match_;

- double current_relevance_;

- TokenizedStringMatch::Hits current_hits_;

- DISALLOW_COPY_AND_ASSIGN(PrefixMatcher);

-};

-} // namespace

-TokenizedStringMatch::TokenizedStringMatch()

- : relevance_(kNoMatchScore) {}

-TokenizedStringMatch::~TokenizedStringMatch() {}

-bool TokenizedStringMatch::Calculate(const TokenizedString& query,

- const TokenizedString& text) {

- relevance_ = kNoMatchScore;

- hits_.clear();

- PrefixMatcher matcher(query, text);

- if (matcher.Match()) {

- relevance_ = matcher.relevance();

- hits_.assign(matcher.hits().begin(), matcher.hits().end());

- }

- // Substring match as a fallback.

- if (relevance_ == kNoMatchScore) {

- size_t substr_match_start = 0;

- size_t substr_match_length = 0;

- if (base::i18n::StringSearchIgnoringCaseAndAccents(query.text(),

- text.text(),

- &substr_match_start,

- &substr_match_length)) {

- relevance_ = kIsSubstringMultiplier * substr_match_length;

- hits_.push_back(gfx::Range(substr_match_start,

- substr_match_start + substr_match_length));

- }

- // Using length() for normalizing is not 100% correct but should be good

- // enough compared with using real char count of the text.

- if (text.text().length())

- relevance_ /= text.text().length();

- return relevance_ > kNoMatchScore;

-bool TokenizedStringMatch::Calculate(const base::string16& query,

- const base::string16& text) {

- const TokenizedString tokenized_query(query);

- const TokenizedString tokenized_text(text);

- return Calculate(tokenized_query, tokenized_text);

-} // namespace app_list