Index: net/base/lookup_string_in_fixed_set.h |
diff --git a/net/base/lookup_string_in_fixed_set.h b/net/base/lookup_string_in_fixed_set.h |
index 793a782ed148d5c3641709cec9bc204c1eee23f6..11ef6e7d5d5eed0233e94209575e578e415afee0 100644 |
--- a/net/base/lookup_string_in_fixed_set.h |
+++ b/net/base/lookup_string_in_fixed_set.h |
@@ -35,6 +35,69 @@ NET_EXPORT int LookupStringInFixedSet(const unsigned char* graph, |
const char* key, |
size_t key_length); |
+// The class FixedSetIncrementalLookup provides a generalized version of |
+// LookupStringInFixedSet() where the query string is provided incrementally, |
+// character by character. At each step, the lookup result can be cheaply |
+// checked on the currently-provided prefix. |
+// |
+// This variant allows efficient (in time and space) prefix queries against a |
+// set of strings that is known at compile time. Alternatively, longest-suffix |
+// queries can be supported by constructing the DAFSA on reversed keys. |
Ryan Sleevi
2017/01/25 19:11:04
First Glance: Could you expand on this by includin
ncarter (slow)
2017/01/26 23:29:11
I've added examples to the class comment.
I've al
|
+class NET_EXPORT FixedSetIncrementalLookup { |
+ public: |
+ // Creates a lookup into a set of strings, which must be known at compile |
+ // time. |graph| and |length| describe a constant byte array holding a graph |
+ // structure called a DAFSA (Deterministic Acyclic Finite State Automaton) |
+ // which was generated by the script make_dafsa.py during compilation. |
Ryan Sleevi
2017/01/25 19:11:04
Can you reference the full path to make_dafsa.py h
ncarter (slow)
2017/01/26 23:29:11
Done.
|
+ FixedSetIncrementalLookup(const unsigned char* graph, size_t length); |
+ |
+ // FixedSetIncrementalLookup is copyable so that callers can save/restore |
+ // their position in the search. This is inexpensive. |
Ryan Sleevi
2017/01/25 19:11:04
Just from the header description, it's not entirel
ncarter (slow)
2017/01/26 23:29:11
Done.
|
+ FixedSetIncrementalLookup(const FixedSetIncrementalLookup&); |
+ FixedSetIncrementalLookup& operator=(const FixedSetIncrementalLookup&); |
+ |
+ ~FixedSetIncrementalLookup(); |
+ |
+ // Advance the query by adding a character to the input sequence. |input| can |
+ // be any char value, but only ASCII printable characters will ever result in |
+ // matches (since the DAFSA format is currently limited to those). |
Ryan Sleevi
2017/01/25 19:11:04
comment nit: I'm curious why we don't make that a
ncarter (slow)
2017/01/26 23:29:11
Historically, having input char values in the bad
|
+ // |
+ // Returns false once the search has exhausted the DAFSA, meaning that |
+ // the currently-provided sequence is not a prefix of any string in the fixed |
+ // set. Returns true otherwise. |
Ryan Sleevi
2017/01/25 19:11:04
Comment nit: Trying to work out the negative case,
ncarter (slow)
2017/01/26 23:29:11
Done.
|
+ bool Advance(char input); |
+ |
+ // Returns the result code corresponding to the input sequence provided thus |
+ // far to Advance(). |
Ryan Sleevi
2017/01/25 19:11:04
So one thing that is left unclear is what happens
ncarter (slow)
2017/01/26 23:29:11
I've clarified the documentation of Advance() that
|
+ // |
+ // If the string does not appear in the fixed set, the return value is |
+ // kDafsaNotFound. Otherwise, the value is a non-negative integer (currently |
+ // limited to 0-7) corresponding to the result code for that string, as listed |
+ // in the .gperf file from which the DAFSA was generated. For |
+ // GetDomainAndRegistry DAFSAs, these values should be interpreted as a |
+ // bitmask of kDafsaExceptionRule, kDafsaWildcardRule, and kDafsaPrivateRule. |
+ // |
+ // It is okay to call this function, and then extend the sequence further by |
+ // calling Advance(). |
+ int GetResultForCurrentSequence() const; |
+ |
+ private: |
+ // Pointer to our current position in the graph, or nullptr if we've exhausted |
Ryan Sleevi
2017/01/25 19:11:04
s/our/the/
s/we've exhausted the graph/the graph i
ncarter (slow)
2017/01/26 23:29:11
I've rewritten the comments, since you asked. But
|
+ // the graph. |
+ const unsigned char* pos_; |
+ |
+ // Pointer just past the end of the graph. |pos_| should never get here. This |
+ // is used only in DCHECKs. When paired with a unittest that explores the |
+ // entire graph, these DCHECKS ensure that the DAFSA contains no out-of-bounds |
+ // offsets. |
Ryan Sleevi
2017/01/25 19:11:04
"When paired with" seems to be describing an imple
ncarter (slow)
2017/01/26 23:29:11
Done.
|
+ const unsigned char* end_; |
+ |
+ // Contains the current decoder state. If true, |pos_| points to a label |
+ // character or a return code. If false, |pos_| points to a sequence of |
+ // offsets that indicate the child nodes of the current state. |
+ bool pos_is_label_character_; |
+}; |
+ |
} // namespace net |
#endif // NET_BASE_LOOKUP_STRING_IN_FIXED_SET_H_ |