Chromium Code Reviews| Index: net/base/dafsa/lookup_string.cc |
| diff --git a/net/base/dafsa/lookup_string.cc b/net/base/dafsa/lookup_string.cc |
| new file mode 100644 |
| index 0000000000000000000000000000000000000000..4858a5e00c78dd3f9467839f15da358b11eb8a07 |
| --- /dev/null |
| +++ b/net/base/dafsa/lookup_string.cc |
| @@ -0,0 +1,151 @@ |
| +// Copyright 2015 The Chromium Authors. All rights reserved. |
| +// Use of this source code is governed by a BSD-style license that can be |
| +// found in the LICENSE file. |
| + |
| +#include "net/base/dafsa/lookup_string.h" |
| + |
| +#include "base/logging.h" |
| + |
| +namespace net { |
| + |
| +namespace { |
| + |
| +// Read next offset from pos. |
| +// Returns true if an offset could be read, false otherwise. |
| +bool GetNextOffset(const unsigned char** pos, |
| + const unsigned char* end, |
| + const unsigned char** offset) { |
| + if (*pos == end) |
| + return false; |
| + |
| + // When reading an offset the byte array must always contain at least |
| + // three more bytes to consume. First the offset to read, then a node |
| + // to skip over and finally a destination node. No object can be smaller |
| + // than one byte. |
| + CHECK_LT(*pos + 2, end); |
| + size_t bytes_consumed; |
| + switch (**pos & 0x60) { |
| + case 0x60: // Read three byte offset |
| + *offset += (((*pos)[0] & 0x1F) << 16) | ((*pos)[1] << 8) | (*pos)[2]; |
| + bytes_consumed = 3; |
| + break; |
| + case 0x40: // Read two byte offset |
| + *offset += (((*pos)[0] & 0x1F) << 8) | (*pos)[1]; |
| + bytes_consumed = 2; |
| + break; |
| + default: |
| + *offset += (*pos)[0] & 0x3F; |
| + bytes_consumed = 1; |
| + } |
| + if ((**pos & 0x80) != 0) { |
| + *pos = end; |
| + } else { |
| + *pos += bytes_consumed; |
| + } |
| + return true; |
| +} |
| + |
| +// Check if byte at offset is last in label. |
| +bool IsEOL(const unsigned char* offset, const unsigned char* end) { |
| + CHECK_LT(offset, end); |
| + return (*offset & 0x80) != 0; |
| +} |
| + |
| +// Check if byte at offset matches first character in key. |
| +// This version matches characters not last in label. |
| +bool IsMatch(const unsigned char* offset, |
| + const unsigned char* end, |
| + const char* key) { |
| + CHECK_LT(offset, end); |
| + return *offset == *key; |
| +} |
| + |
| +// Check if byte at offset matches first character in key. |
| +// This version matches characters last in label. |
| +bool IsEndCharMatch(const unsigned char* offset, |
| + const unsigned char* end, |
| + const char* key) { |
| + CHECK_LT(offset, end); |
| + return *offset == (*key | 0x80); |
| +} |
| + |
| +// Read return value at offset. |
| +// Returns true if a return value could be read, false otherwise. |
| +bool GetReturnValue(const unsigned char* offset, |
| + const unsigned char* end, |
| + int* return_value) { |
| + CHECK_LT(offset, end); |
| + if ((*offset & 0xE0) == 0x80) { |
| + *return_value = *offset & 0x0F; |
| + return true; |
| + } |
| + return false; |
| +} |
| + |
| +} // namespace |
| + |
| +// Lookup a domain key in a byte array generated by make_dafsa.py. |
| +// The rule type is returned if key is found, otherwise kNotFound is returned. |
| +int LookupString(const unsigned char* graph, |
|
tyoshino (SeeGerritForStatus)
2015/10/19 07:15:10
You've moved this out of the anonymous namespace.
Adam Rice
2015/10/19 12:03:07
Good point, thank you. I changed the name to Looku
|
| + size_t length, |
| + const char* key, |
| + size_t key_length) { |
| + const unsigned char* pos = graph; |
| + const unsigned char* end = graph + length; |
| + const unsigned char* offset = pos; |
| + const char* key_end = key + key_length; |
| + while (GetNextOffset(&pos, end, &offset)) { |
| + // char <char>+ end_char offsets |
| + // char <char>+ return value |
| + // char end_char offsets |
| + // char return value |
| + // end_char offsets |
| + // return_value |
| + bool did_consume = false; |
| + if (key != key_end && !IsEOL(offset, end)) { |
| + // Leading <char> is not a match. Don't dive into this child |
| + if (!IsMatch(offset, end, key)) |
| + continue; |
| + did_consume = true; |
| + ++offset; |
| + ++key; |
| + // Possible matches at this point: |
| + // <char>+ end_char offsets |
| + // <char>+ return value |
| + // end_char offsets |
| + // return value |
| + // Remove all remaining <char> nodes possible |
| + while (!IsEOL(offset, end) && key != key_end) { |
| + if (!IsMatch(offset, end, key)) |
| + return kNotFound; |
| + ++key; |
| + ++offset; |
| + } |
| + } |
| + // Possible matches at this point: |
| + // end_char offsets |
| + // return_value |
| + // If one or more <char> elements were consumed, a failure |
| + // to match is terminal. Otherwise, try the next node. |
| + if (key == key_end) { |
| + int return_value; |
| + if (GetReturnValue(offset, end, &return_value)) |
| + return return_value; |
| + // The DAFSA guarantees that if the first char is a match, all |
| + // remaining char elements MUST match if the key is truly present. |
| + if (did_consume) |
| + return kNotFound; |
| + continue; |
| + } |
| + if (!IsEndCharMatch(offset, end, key)) { |
| + if (did_consume) |
| + return kNotFound; // Unexpected |
| + continue; |
| + } |
| + ++key; |
| + pos = ++offset; // Dive into child |
| + } |
| + return kNotFound; // No match |
| +} |
| + |
| +} // namespace net |