| Index: net/base/dafsa/lookup_string.cc
|
| diff --git a/net/base/dafsa/lookup_string.cc b/net/base/dafsa/lookup_string.cc
|
| new file mode 100644
|
| index 0000000000000000000000000000000000000000..344fe222843679b40cdc4168a777f0adecfa44ee
|
| --- /dev/null
|
| +++ b/net/base/dafsa/lookup_string.cc
|
| @@ -0,0 +1,152 @@
|
| +// Copyright 2015 The Chromium Authors. All rights reserved.
|
| +// Use of this source code is governed by a BSD-style license that can be
|
| +// found in the LICENSE file.
|
| +
|
| +#include "net/base/dafsa/lookup_string.h"
|
| +
|
| +#include "base/logging.h"
|
| +
|
| +namespace net {
|
| +
|
| +namespace {
|
| +
|
| +// Read next offset from pos.
|
| +// Returns true if an offset could be read, false otherwise.
|
| +bool GetNextOffset(const unsigned char** pos,
|
| + const unsigned char* end,
|
| + const unsigned char** offset) {
|
| + if (*pos == end)
|
| + return false;
|
| +
|
| + // When reading an offset the byte array must always contain at least
|
| + // three more bytes to consume. First the offset to read, then a node
|
| + // to skip over and finally a destination node. No object can be smaller
|
| + // than one byte.
|
| + CHECK_LT(*pos + 2, end);
|
| + size_t bytes_consumed;
|
| + switch (**pos & 0x60) {
|
| + case 0x60: // Read three byte offset
|
| + *offset += (((*pos)[0] & 0x1F) << 16) | ((*pos)[1] << 8) | (*pos)[2];
|
| + bytes_consumed = 3;
|
| + break;
|
| + case 0x40: // Read two byte offset
|
| + *offset += (((*pos)[0] & 0x1F) << 8) | (*pos)[1];
|
| + bytes_consumed = 2;
|
| + break;
|
| + default:
|
| + *offset += (*pos)[0] & 0x3F;
|
| + bytes_consumed = 1;
|
| + }
|
| + if ((**pos & 0x80) != 0) {
|
| + *pos = end;
|
| + } else {
|
| + *pos += bytes_consumed;
|
| + }
|
| + return true;
|
| +}
|
| +
|
| +// Check if byte at offset is last in label.
|
| +bool IsEOL(const unsigned char* offset, const unsigned char* end) {
|
| + CHECK_LT(offset, end);
|
| + return (*offset & 0x80) != 0;
|
| +}
|
| +
|
| +// Check if byte at offset matches first character in key.
|
| +// This version matches characters not last in label.
|
| +bool IsMatch(const unsigned char* offset,
|
| + const unsigned char* end,
|
| + const char* key) {
|
| + CHECK_LT(offset, end);
|
| + return *offset == *key;
|
| +}
|
| +
|
| +// Check if byte at offset matches first character in key.
|
| +// This version matches characters last in label.
|
| +bool IsEndCharMatch(const unsigned char* offset,
|
| + const unsigned char* end,
|
| + const char* key) {
|
| + CHECK_LT(offset, end);
|
| + return *offset == (*key | 0x80);
|
| +}
|
| +
|
| +// Read return value at offset.
|
| +// Returns true if a return value could be read, false otherwise.
|
| +bool GetReturnValue(const unsigned char* offset,
|
| + const unsigned char* end,
|
| + int* return_value) {
|
| + CHECK_LT(offset, end);
|
| + if ((*offset & 0xE0) == 0x80) {
|
| + *return_value = *offset & 0x0F;
|
| + return true;
|
| + }
|
| + return false;
|
| +}
|
| +
|
| +} // namespace
|
| +
|
| +// Lookup a domain key in a byte array generated by make_dafsa.py.
|
| +// The rule type is returned if key is found, otherwise kDafsaNotFound is
|
| +// returned.
|
| +int LookupStringInDafsa(const unsigned char* graph,
|
| + size_t length,
|
| + const char* key,
|
| + size_t key_length) {
|
| + const unsigned char* pos = graph;
|
| + const unsigned char* end = graph + length;
|
| + const unsigned char* offset = pos;
|
| + const char* key_end = key + key_length;
|
| + while (GetNextOffset(&pos, end, &offset)) {
|
| + // char <char>+ end_char offsets
|
| + // char <char>+ return value
|
| + // char end_char offsets
|
| + // char return value
|
| + // end_char offsets
|
| + // return_value
|
| + bool did_consume = false;
|
| + if (key != key_end && !IsEOL(offset, end)) {
|
| + // Leading <char> is not a match. Don't dive into this child
|
| + if (!IsMatch(offset, end, key))
|
| + continue;
|
| + did_consume = true;
|
| + ++offset;
|
| + ++key;
|
| + // Possible matches at this point:
|
| + // <char>+ end_char offsets
|
| + // <char>+ return value
|
| + // end_char offsets
|
| + // return value
|
| + // Remove all remaining <char> nodes possible
|
| + while (!IsEOL(offset, end) && key != key_end) {
|
| + if (!IsMatch(offset, end, key))
|
| + return kDafsaNotFound;
|
| + ++key;
|
| + ++offset;
|
| + }
|
| + }
|
| + // Possible matches at this point:
|
| + // end_char offsets
|
| + // return_value
|
| + // If one or more <char> elements were consumed, a failure
|
| + // to match is terminal. Otherwise, try the next node.
|
| + if (key == key_end) {
|
| + int return_value;
|
| + if (GetReturnValue(offset, end, &return_value))
|
| + return return_value;
|
| + // The DAFSA guarantees that if the first char is a match, all
|
| + // remaining char elements MUST match if the key is truly present.
|
| + if (did_consume)
|
| + return kDafsaNotFound;
|
| + continue;
|
| + }
|
| + if (!IsEndCharMatch(offset, end, key)) {
|
| + if (did_consume)
|
| + return kDafsaNotFound; // Unexpected
|
| + continue;
|
| + }
|
| + ++key;
|
| + pos = ++offset; // Dive into child
|
| + }
|
| + return kDafsaNotFound; // No match
|
| +}
|
| +
|
| +} // namespace net
|
|
|