Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(597)

Side by Side Diff: net/base/lookup_string_in_fixed_set.h

Issue 2784933002: Mitigate spoofing attempt using Latin letters. (Closed)
Patch Set: add similarity check unittests Created 3 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 // Copyright 2015 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #ifndef NET_BASE_LOOKUP_STRING_IN_FIXED_SET_H_
6 #define NET_BASE_LOOKUP_STRING_IN_FIXED_SET_H_
7
8 #include <stddef.h>
9
10 #include "net/base/net_export.h"
11
12 namespace net {
13
14 enum {
15 kDafsaNotFound = -1, // key is not in set
16 kDafsaFound = 0, // key is in set
17 // The following return values are used by the implementation of
18 // GetDomainAndRegistry() and are probably not generally useful.
19 kDafsaExceptionRule = 1, // key excluded from set via exception
20 kDafsaWildcardRule = 2, // key matched a wildcard rule
21 kDafsaPrivateRule = 4, // key matched a private rule
22 };
23
24 // Looks up the string |key| with length |key_length| in a fixed set of
25 // strings. The set of strings must be known at compile time. It is converted to
26 // a graph structure named a DAFSA (Deterministic Acyclic Finite State
27 // Automaton) by the script make_dafsa.py during compilation. This permits
28 // efficient (in time and space) lookup. The graph generated by make_dafsa.py
29 // takes the form of a constant byte array which should be supplied via the
30 // |graph| and |length| parameters. The return value is kDafsaNotFound,
31 // kDafsaFound, or a bitmap consisting of one or more of kDafsaExceptionRule,
32 // kDafsaWildcardRule and kDafsaPrivateRule ORed together.
33 //
34 // TODO(nick): Replace this with FixedSetIncrementalLookup everywhere.
35 NET_EXPORT int LookupStringInFixedSet(const unsigned char* graph,
36 size_t length,
37 const char* key,
38 size_t key_length);
39
40 // FixedSetIncrementalLookup provides efficient membership and prefix queries
41 // against a fixed set of strings. The set of strings must be known at compile
42 // time. The set is converted to a graph structure named a DAFSA (Deterministic
43 // Acyclic Finite State Automaton) by the script //net/tools/dafsa/make_dafsa.py
44 // during compilation. The conversion generates a C++ header file defining the
45 // encoded graph as a constant byte array. This class provides a fast, constant-
46 // space lookup operation against such byte arrays.
47 //
48 // The lookup proceeds incrementally, with input characters provided one at a
49 // time. This approach allow queries of the form: "given an input string, which
50 // prefixes of that string that appear in the fixed set?" As the matching
51 // prefixes (and their result codes) are enumerated, the most suitable match
52 // among them can be selected in a single pass.
53 //
54 // This class can also be used to perform suffix queries (instead of prefix
55 // queries) against a fixed set, so long as the DAFSA is constructed on reversed
56 // values, and the input is provided in reverse order.
57 //
58 // Example usage for simple membership query; |input| is a std::string:
59 //
60 // FixedSetIncrementalLookup lookup(kDafsa, sizeof(kDafsa));
61 // for (char c : input) {
62 // if (!lookup.Advance(c))
63 // return false;
64 // }
65 // return lookup.GetResultForCurrentSequence() != kDafsaNotFound;
66 //
67 // Example usage for 'find longest prefix in set with result code == 3' query:
68 //
69 // FixedSetIncrementalLookup prefix_lookup(kDafsa, sizeof(kDafsa));
70 // size_t longest_match_end = 0;
71 // for (size_t i = 0; i < input.length(); ++i) {
72 // if (!prefix_lookup.Advance(input[i]))
73 // break;
74 // if (prefix_lookup.GetResultForCurrentSequence() == 3)
75 // longest_match_end = (i + 1);
76 // }
77 // return input.substr(0, longest_match_end);
78 //
79 class NET_EXPORT FixedSetIncrementalLookup {
80 public:
81 // Begin a lookup against the provided fixed set. |graph| and |length|
82 // describe a byte buffer generated by the make_dafsa.py script, as described
83 // in the class comment.
84 //
85 // FixedSetIncrementalLookup is initialized to a state corresponding to the
86 // empty input sequence. Calling GetResultForCurrentSequence() in the initial
87 // state would indicate whether the empty string appears in the fixed set.
88 // Characters can be added to the sequence by calling Advance(), and the
89 // lookup result can be checked after each addition by calling
90 // GetResultForCurrentSequence().
91 FixedSetIncrementalLookup(const unsigned char* graph, size_t length);
92
93 // FixedSetIncrementalLookup is copyable so that callers can save/restore
94 // their position in the search. This is for cases where branching or
95 // backtracking might be required (e.g. to probe for the presence of a
96 // designated wildcard character).
97 FixedSetIncrementalLookup(const FixedSetIncrementalLookup&);
98 FixedSetIncrementalLookup& operator=(const FixedSetIncrementalLookup&);
99
100 ~FixedSetIncrementalLookup();
101
102 // Advance the query by adding a character to the input sequence. |input| can
103 // be any char value, but only ASCII characters will ever result in matches,
104 // since the fixed set itself is limited to ASCII strings.
105 //
106 // Returns true if the resulting input sequence either appears in the fixed
107 // set itself, or is a prefix of some longer string in the fixed set. Returns
108 // false otherwise, implying that the graph is exhausted and
109 // GetResultForCurrentSequence() will return kDafsaNotFound.
110 //
111 // Once Advance() has returned false, the caller can safely stop feeding more
112 // characters, as subsequent calls to Advance() will return false and have no
113 // effect.
114 bool Advance(char input);
115
116 // Returns the result code corresponding to the input sequence provided thus
117 // far to Advance().
118 //
119 // If the sequence does not appear in the fixed set, the return value is
120 // kDafsaNotFound. Otherwise, the value is a non-negative integer (currently
121 // limited to 0-7) corresponding to the result code for that string, as listed
122 // in the .gperf file from which the DAFSA was generated. For
123 // GetDomainAndRegistry DAFSAs, these values should be interpreted as a
124 // bitmask of kDafsaExceptionRule, kDafsaWildcardRule, and kDafsaPrivateRule.
125 //
126 // It is okay to call this function, and then extend the sequence further by
127 // calling Advance().
128 int GetResultForCurrentSequence() const;
129
130 private:
131 // Pointer to the current position in the graph indicating the current state
132 // of the automaton, or nullptr if the graph is exhausted.
133 const unsigned char* pos_;
134
135 // Pointer just past the end of the graph. |pos_| should never get here. This
136 // is used only in DCHECKs.
137 const unsigned char* end_;
138
139 // Contains the current decoder state. If true, |pos_| points to a label
140 // character or a return code. If false, |pos_| points to a sequence of
141 // offsets that indicate the child nodes of the current state.
142 bool pos_is_label_character_;
143 };
144
145 } // namespace net
146
147 #endif // NET_BASE_LOOKUP_STRING_IN_FIXED_SET_H_
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698