OLD | NEW |
---|---|
1 // Copyright 2015 The Chromium Authors. All rights reserved. | 1 // Copyright 2015 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #ifndef NET_BASE_LOOKUP_STRING_IN_FIXED_SET_H_ | 5 #ifndef NET_BASE_LOOKUP_STRING_IN_FIXED_SET_H_ |
6 #define NET_BASE_LOOKUP_STRING_IN_FIXED_SET_H_ | 6 #define NET_BASE_LOOKUP_STRING_IN_FIXED_SET_H_ |
7 | 7 |
8 #include <stddef.h> | 8 #include <stddef.h> |
9 | 9 |
10 #include "net/base/net_export.h" | 10 #include "net/base/net_export.h" |
(...skipping 17 matching lines...) Expand all Loading... | |
28 // efficient (in time and space) lookup. The graph generated by make_dafsa.py | 28 // efficient (in time and space) lookup. The graph generated by make_dafsa.py |
29 // takes the form of a constant byte array which should be supplied via the | 29 // takes the form of a constant byte array which should be supplied via the |
30 // |graph| and |length| parameters. The return value is kDafsaNotFound, | 30 // |graph| and |length| parameters. The return value is kDafsaNotFound, |
31 // kDafsaFound, or a bitmap consisting of one or more of kDafsaExceptionRule, | 31 // kDafsaFound, or a bitmap consisting of one or more of kDafsaExceptionRule, |
32 // kDafsaWildcardRule and kDafsaPrivateRule ORed together. | 32 // kDafsaWildcardRule and kDafsaPrivateRule ORed together. |
33 NET_EXPORT int LookupStringInFixedSet(const unsigned char* graph, | 33 NET_EXPORT int LookupStringInFixedSet(const unsigned char* graph, |
34 size_t length, | 34 size_t length, |
35 const char* key, | 35 const char* key, |
36 size_t key_length); | 36 size_t key_length); |
37 | 37 |
38 // The class FixedSetIncrementalLookup provides a generalized version of | |
39 // LookupStringInFixedSet() where the query string is provided incrementally, | |
40 // character by character. At each step, the lookup result can be cheaply | |
41 // checked on the currently-provided prefix. | |
42 // | |
43 // This variant allows efficient (in time and space) prefix queries against a | |
44 // set of strings that is known at compile time. Alternatively, longest-suffix | |
45 // queries can be supported by constructing the DAFSA on reversed keys. | |
Ryan Sleevi
2017/01/25 19:11:04
First Glance: Could you expand on this by includin
ncarter (slow)
2017/01/26 23:29:11
I've added examples to the class comment.
I've al
| |
46 class NET_EXPORT FixedSetIncrementalLookup { | |
47 public: | |
48 // Creates a lookup into a set of strings, which must be known at compile | |
49 // time. |graph| and |length| describe a constant byte array holding a graph | |
50 // structure called a DAFSA (Deterministic Acyclic Finite State Automaton) | |
51 // which was generated by the script make_dafsa.py during compilation. | |
Ryan Sleevi
2017/01/25 19:11:04
Can you reference the full path to make_dafsa.py h
ncarter (slow)
2017/01/26 23:29:11
Done.
| |
52 FixedSetIncrementalLookup(const unsigned char* graph, size_t length); | |
53 | |
54 // FixedSetIncrementalLookup is copyable so that callers can save/restore | |
55 // their position in the search. This is inexpensive. | |
Ryan Sleevi
2017/01/25 19:11:04
Just from the header description, it's not entirel
ncarter (slow)
2017/01/26 23:29:11
Done.
| |
56 FixedSetIncrementalLookup(const FixedSetIncrementalLookup&); | |
57 FixedSetIncrementalLookup& operator=(const FixedSetIncrementalLookup&); | |
58 | |
59 ~FixedSetIncrementalLookup(); | |
60 | |
61 // Advance the query by adding a character to the input sequence. |input| can | |
62 // be any char value, but only ASCII printable characters will ever result in | |
63 // matches (since the DAFSA format is currently limited to those). | |
Ryan Sleevi
2017/01/25 19:11:04
comment nit: I'm curious why we don't make that a
ncarter (slow)
2017/01/26 23:29:11
Historically, having input char values in the bad
| |
64 // | |
65 // Returns false once the search has exhausted the DAFSA, meaning that | |
66 // the currently-provided sequence is not a prefix of any string in the fixed | |
67 // set. Returns true otherwise. | |
Ryan Sleevi
2017/01/25 19:11:04
Comment nit: Trying to work out the negative case,
ncarter (slow)
2017/01/26 23:29:11
Done.
| |
68 bool Advance(char input); | |
69 | |
70 // Returns the result code corresponding to the input sequence provided thus | |
71 // far to Advance(). | |
Ryan Sleevi
2017/01/25 19:11:04
So one thing that is left unclear is what happens
ncarter (slow)
2017/01/26 23:29:11
I've clarified the documentation of Advance() that
| |
72 // | |
73 // If the string does not appear in the fixed set, the return value is | |
74 // kDafsaNotFound. Otherwise, the value is a non-negative integer (currently | |
75 // limited to 0-7) corresponding to the result code for that string, as listed | |
76 // in the .gperf file from which the DAFSA was generated. For | |
77 // GetDomainAndRegistry DAFSAs, these values should be interpreted as a | |
78 // bitmask of kDafsaExceptionRule, kDafsaWildcardRule, and kDafsaPrivateRule. | |
79 // | |
80 // It is okay to call this function, and then extend the sequence further by | |
81 // calling Advance(). | |
82 int GetResultForCurrentSequence() const; | |
83 | |
84 private: | |
85 // Pointer to our current position in the graph, or nullptr if we've exhausted | |
Ryan Sleevi
2017/01/25 19:11:04
s/our/the/
s/we've exhausted the graph/the graph i
ncarter (slow)
2017/01/26 23:29:11
I've rewritten the comments, since you asked. But
| |
86 // the graph. | |
87 const unsigned char* pos_; | |
88 | |
89 // Pointer just past the end of the graph. |pos_| should never get here. This | |
90 // is used only in DCHECKs. When paired with a unittest that explores the | |
91 // entire graph, these DCHECKS ensure that the DAFSA contains no out-of-bounds | |
92 // offsets. | |
Ryan Sleevi
2017/01/25 19:11:04
"When paired with" seems to be describing an imple
ncarter (slow)
2017/01/26 23:29:11
Done.
| |
93 const unsigned char* end_; | |
94 | |
95 // Contains the current decoder state. If true, |pos_| points to a label | |
96 // character or a return code. If false, |pos_| points to a sequence of | |
97 // offsets that indicate the child nodes of the current state. | |
98 bool pos_is_label_character_; | |
99 }; | |
100 | |
38 } // namespace net | 101 } // namespace net |
39 | 102 |
40 #endif // NET_BASE_LOOKUP_STRING_IN_FIXED_SET_H_ | 103 #endif // NET_BASE_LOOKUP_STRING_IN_FIXED_SET_H_ |
OLD | NEW |