| OLD | NEW |
| (Empty) |
| 1 // Copyright 2015 The Chromium Authors. All rights reserved. | |
| 2 // Use of this source code is governed by a BSD-style license that can be | |
| 3 // found in the LICENSE file. | |
| 4 | |
| 5 #include "net/base/lookup_string_in_fixed_set.h" | |
| 6 | |
| 7 #include "base/logging.h" | |
| 8 | |
| 9 namespace net { | |
| 10 | |
| 11 namespace { | |
| 12 | |
| 13 // Read next offset from |pos|, increment |offset| by that amount, and increment | |
| 14 // |pos| either to point to the start of the next encoded offset in its node, or | |
| 15 // nullptr, if there are no remaining offsets. | |
| 16 // | |
| 17 // Returns true if an offset could be read; false otherwise. | |
| 18 inline bool GetNextOffset(const unsigned char** pos, | |
| 19 const unsigned char** offset) { | |
| 20 if (*pos == nullptr) | |
| 21 return false; | |
| 22 | |
| 23 size_t bytes_consumed; | |
| 24 switch (**pos & 0x60) { | |
| 25 case 0x60: // Read three byte offset | |
| 26 *offset += (((*pos)[0] & 0x1F) << 16) | ((*pos)[1] << 8) | (*pos)[2]; | |
| 27 bytes_consumed = 3; | |
| 28 break; | |
| 29 case 0x40: // Read two byte offset | |
| 30 *offset += (((*pos)[0] & 0x1F) << 8) | (*pos)[1]; | |
| 31 bytes_consumed = 2; | |
| 32 break; | |
| 33 default: | |
| 34 *offset += (*pos)[0] & 0x3F; | |
| 35 bytes_consumed = 1; | |
| 36 } | |
| 37 if ((**pos & 0x80) != 0) { | |
| 38 *pos = nullptr; | |
| 39 } else { | |
| 40 *pos += bytes_consumed; | |
| 41 } | |
| 42 return true; | |
| 43 } | |
| 44 | |
| 45 // Check if byte at |offset| is last in label. | |
| 46 bool IsEOL(const unsigned char* offset) { | |
| 47 return (*offset & 0x80) != 0; | |
| 48 } | |
| 49 | |
| 50 // Check if byte at |offset| matches key. This version matches both end-of-label | |
| 51 // chars and not-end-of-label chars. | |
| 52 bool IsMatch(const unsigned char* offset, char key) { | |
| 53 return (*offset & 0x7F) == key; | |
| 54 } | |
| 55 | |
| 56 // Read return value at |offset|, if it is a return value. Returns true if a | |
| 57 // return value could be read, false otherwise. | |
| 58 bool GetReturnValue(const unsigned char* offset, int* return_value) { | |
| 59 // Return values are always encoded as end-of-label chars (so the high bit is | |
| 60 // set). So byte values in the inclusive range [0x80, 0x9F] encode the return | |
| 61 // values 0 through 31 (though make_dafsa.py doesn't currently encode values | |
| 62 // higher than 7). The following code does that translation. | |
| 63 if ((*offset & 0xE0) == 0x80) { | |
| 64 *return_value = *offset & 0x1F; | |
| 65 return true; | |
| 66 } | |
| 67 return false; | |
| 68 } | |
| 69 | |
| 70 } // namespace | |
| 71 | |
| 72 FixedSetIncrementalLookup::FixedSetIncrementalLookup(const unsigned char* graph, | |
| 73 size_t length) | |
| 74 : pos_(graph), end_(graph + length), pos_is_label_character_(false) {} | |
| 75 | |
| 76 FixedSetIncrementalLookup::FixedSetIncrementalLookup( | |
| 77 const FixedSetIncrementalLookup& other) = default; | |
| 78 | |
| 79 FixedSetIncrementalLookup& FixedSetIncrementalLookup::operator=( | |
| 80 const FixedSetIncrementalLookup& other) = default; | |
| 81 | |
| 82 FixedSetIncrementalLookup::~FixedSetIncrementalLookup() {} | |
| 83 | |
| 84 bool FixedSetIncrementalLookup::Advance(char input) { | |
| 85 if (!pos_) { | |
| 86 // A previous input exhausted the graph, so there are no possible matches. | |
| 87 return false; | |
| 88 } | |
| 89 | |
| 90 // Only ASCII printable chars are supported by the current DAFSA format -- the | |
| 91 // high bit (values 0x80-0xFF) is reserved as a label-end signifier, and the | |
| 92 // low values (values 0x00-0x1F) are reserved to encode the return values. So | |
| 93 // values outside this range will never be in the dictionary. | |
| 94 if (input >= 0x20) { | |
| 95 if (pos_is_label_character_) { | |
| 96 // Currently processing a label, so it is only necessary to check the byte | |
| 97 // at |pos_| to see if it encodes a character matching |input|. | |
| 98 bool is_last_char_in_label = IsEOL(pos_); | |
| 99 bool is_match = IsMatch(pos_, input); | |
| 100 if (is_match) { | |
| 101 // If this is not the last character in the label, the next byte should | |
| 102 // be interpreted as a character or return value. Otherwise, the next | |
| 103 // byte should be interpreted as a list of child node offsets. | |
| 104 ++pos_; | |
| 105 DCHECK(pos_ < end_); | |
| 106 pos_is_label_character_ = !is_last_char_in_label; | |
| 107 return true; | |
| 108 } | |
| 109 } else { | |
| 110 const unsigned char* offset = pos_; | |
| 111 // Read offsets from |pos_| until the label of the child node at |offset| | |
| 112 // matches |input|, or until there are no more offsets. | |
| 113 while (GetNextOffset(&pos_, &offset)) { | |
| 114 DCHECK(offset < end_); | |
| 115 DCHECK((pos_ == nullptr) || (pos_ < end_)); | |
| 116 | |
| 117 // |offset| points to a DAFSA node that is a child of the original node. | |
| 118 // | |
| 119 // The low 7 bits of a node encodes a character value; the high bit | |
| 120 // indicates whether it's the last character in the label. | |
| 121 // | |
| 122 // Note that |*offset| could also be a result code value, but these are | |
| 123 // really just out-of-range ASCII values, encoded the same way as | |
| 124 // characters. Since |input| was already validated as a printable ASCII | |
| 125 // value ASCII value, IsMatch will never return true if |offset| is a | |
| 126 // result code. | |
| 127 bool is_last_char_in_label = IsEOL(offset); | |
| 128 bool is_match = IsMatch(offset, input); | |
| 129 | |
| 130 if (is_match) { | |
| 131 // If this is not the last character in the label, the next byte | |
| 132 // should be interpreted as a character or return value. Otherwise, | |
| 133 // the next byte should be interpreted as a list of child node | |
| 134 // offsets. | |
| 135 pos_ = offset + 1; | |
| 136 DCHECK(pos_ < end_); | |
| 137 pos_is_label_character_ = !is_last_char_in_label; | |
| 138 return true; | |
| 139 } | |
| 140 } | |
| 141 } | |
| 142 } | |
| 143 | |
| 144 // If no match was found, then end of the DAFSA has been reached. | |
| 145 pos_ = nullptr; | |
| 146 pos_is_label_character_ = false; | |
| 147 return false; | |
| 148 } | |
| 149 | |
| 150 int FixedSetIncrementalLookup::GetResultForCurrentSequence() const { | |
| 151 int value = kDafsaNotFound; | |
| 152 // Look to see if there is a next character that's a return value. | |
| 153 if (pos_is_label_character_) { | |
| 154 // Currently processing a label, so it is only necessary to check the byte | |
| 155 // at |pos_| to see if encodes a return value. | |
| 156 GetReturnValue(pos_, &value); | |
| 157 } else { | |
| 158 // Otherwise, |pos_| is an offset list (or nullptr). Explore the list of | |
| 159 // child nodes (given by their offsets) to find one whose label is a result | |
| 160 // code. | |
| 161 // | |
| 162 // This search uses a temporary copy of |pos_|, since mutating |pos_| could | |
| 163 // skip over a node that would be important to a subsequent Advance() call. | |
| 164 const unsigned char* temp_pos = pos_; | |
| 165 | |
| 166 // Read offsets from |temp_pos| until either |temp_pos| is nullptr or until | |
| 167 // the byte at |offset| contains a result code (encoded as an ASCII | |
| 168 // character below 0x20). | |
| 169 const unsigned char* offset = pos_; | |
| 170 while (GetNextOffset(&temp_pos, &offset)) { | |
| 171 DCHECK(offset < end_); | |
| 172 DCHECK((temp_pos == nullptr) || temp_pos < end_); | |
| 173 if (GetReturnValue(offset, &value)) | |
| 174 break; | |
| 175 } | |
| 176 } | |
| 177 return value; | |
| 178 } | |
| 179 | |
| 180 int LookupStringInFixedSet(const unsigned char* graph, | |
| 181 size_t length, | |
| 182 const char* key, | |
| 183 size_t key_length) { | |
| 184 // Do an incremental lookup until either the end of the graph is reached, or | |
| 185 // until every character in |key| is consumed. | |
| 186 FixedSetIncrementalLookup lookup(graph, length); | |
| 187 const char* key_end = key + key_length; | |
| 188 while (key != key_end) { | |
| 189 if (!lookup.Advance(*key)) | |
| 190 return kDafsaNotFound; | |
| 191 key++; | |
| 192 } | |
| 193 // The entire input was consumed without reaching the end of the graph. Return | |
| 194 // the result code (if present) for the current position, or kDafsaNotFound. | |
| 195 return lookup.GetResultForCurrentSequence(); | |
| 196 } | |
| 197 | |
| 198 } // namespace net | |
| OLD | NEW |