| OLD | NEW |
| (Empty) |
| 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. | |
| 2 // Use of this source code is governed by a BSD-style license that can be | |
| 3 // found in the LICENSE file. | |
| 4 | |
| 5 // The cache is stored on disk as a collection of block-files, plus an index | |
| 6 // plus a collection of external files. | |
| 7 // | |
| 8 // Any data blob bigger than kMaxBlockSize (disk_cache/addr.h) will be stored in | |
| 9 // a separate file named f_xxx where x is a hexadecimal number. Shorter data | |
| 10 // will be stored as a series of blocks on a block-file. In any case, CacheAddr | |
| 11 // represents the address of the data inside the cache. | |
| 12 // | |
| 13 // The index is actually a collection of four files that store a hash table with | |
| 14 // allocation bitmaps and backup data. Hash collisions are handled directly by | |
| 15 // the table, which from some point of view behaves like a 4-way associative | |
| 16 // cache with overflow buckets (so not really open addressing). | |
| 17 // | |
| 18 // Basically the hash table is a collection of buckets. The first part of the | |
| 19 // table has a fixed number of buckets and it is directly addressed by the hash, | |
| 20 // while the second part of the table (stored on a second file) has a variable | |
| 21 // number of buckets. Each bucket stores up to four cells (each cell represents | |
| 22 // a possibl entry). The index bitmap tracks the state of individual cells. | |
| 23 // | |
| 24 // The last element of the cache is the block-file. A block file is a file | |
| 25 // designed to store blocks of data of a given size. For more details see | |
| 26 // disk_cache/disk_format_base.h | |
| 27 // | |
| 28 // A new cache is initialized with a set of block files (named data_0 through | |
| 29 // data_6), each one dedicated to store blocks of a given size or function. The | |
| 30 // number at the end of the file name is the block file number (in decimal). | |
| 31 // | |
| 32 // There are three "special" types of blocks: normal entries, evicted entries | |
| 33 // and control data for external files. | |
| 34 // | |
| 35 // The files that store internal information for the cache (blocks and index) | |
| 36 // are memory mapped. They have a location that is signaled every time the | |
| 37 // internal structures are modified, so it is possible to detect (most of the | |
| 38 // time) when the process dies in the middle of an update. There are dedicated | |
| 39 // backup files for cache bitmaps, used to detect entries out of date. | |
| 40 // | |
| 41 // Although cache files are to be consumed on the same machine that creates | |
| 42 // them, if files are to be moved accross machines, little endian storage is | |
| 43 // assumed. | |
| 44 | |
| 45 #ifndef NET_DISK_CACHE_BLOCKFILE_DISK_FORMAT_V3_H_ | |
| 46 #define NET_DISK_CACHE_BLOCKFILE_DISK_FORMAT_V3_H_ | |
| 47 | |
| 48 #include <stdint.h> | |
| 49 #include <string.h> | |
| 50 | |
| 51 #include "net/disk_cache/blockfile/disk_format_base.h" | |
| 52 | |
| 53 namespace disk_cache { | |
| 54 | |
| 55 const int kBaseTableLen = 0x400; | |
| 56 const uint32_t kIndexMagicV3 = 0xC103CAC3; | |
| 57 const uint32_t kVersion3 = 0x30000; // Version 3.0. | |
| 58 | |
| 59 // Flags for a given cache. | |
| 60 enum CacheFlags { | |
| 61 SMALL_CACHE = 1 << 0, // See IndexCell. | |
| 62 CACHE_EVICTION_2 = 1 << 1, // Keep multiple lists for eviction. | |
| 63 CACHE_EVICTED = 1 << 2 // Already evicted at least one entry. | |
| 64 }; | |
| 65 | |
| 66 // Header for the master index file. | |
| 67 struct IndexHeaderV3 { | |
| 68 uint32_t magic; | |
| 69 uint32_t version; | |
| 70 int32_t num_entries; // Number of entries currently stored. | |
| 71 int32_t num_bytes; // Total size of the stored data. | |
| 72 int32_t last_file; // Last external file created. | |
| 73 int32_t reserved1; | |
| 74 CacheAddr stats; // Storage for usage data. | |
| 75 int32_t table_len; // Actual size of the table. | |
| 76 int32_t crash; // Signals a previous crash. | |
| 77 int32_t experiment; // Id of an ongoing test. | |
| 78 int32_t max_bytes; // Total maximum size of the stored data. | |
| 79 uint32_t flags; | |
| 80 int32_t used_cells; | |
| 81 int32_t max_bucket; | |
| 82 uint64_t create_time; // Creation time for this set of files. | |
| 83 uint64_t base_time; // Current base for timestamps. | |
| 84 uint64_t old_time; // Previous time used for timestamps. | |
| 85 int32_t max_block_file; | |
| 86 int32_t num_no_use_entries; | |
| 87 int32_t num_low_use_entries; | |
| 88 int32_t num_high_use_entries; | |
| 89 int32_t reserved; | |
| 90 int32_t num_evicted_entries; | |
| 91 int32_t pad[6]; | |
| 92 }; | |
| 93 | |
| 94 const int kBaseBitmapBytes = 3968; | |
| 95 // The IndexBitmap is directly saved to a file named index. The file grows in | |
| 96 // page increments (4096 bytes), but all bits don't have to be in use at any | |
| 97 // given time. The required file size can be computed from header.table_len. | |
| 98 struct IndexBitmap { | |
| 99 IndexHeaderV3 header; | |
| 100 uint32_t bitmap[kBaseBitmapBytes / 4]; // First page of the bitmap. | |
| 101 }; | |
| 102 static_assert(sizeof(IndexBitmap) == 4096, "bad IndexHeader"); | |
| 103 | |
| 104 // Possible states for a given entry. | |
| 105 enum EntryState { | |
| 106 ENTRY_FREE = 0, // Available slot. | |
| 107 ENTRY_NEW, // The entry is being created. | |
| 108 ENTRY_OPEN, // The entry is being accessed. | |
| 109 ENTRY_MODIFIED, // The entry is being modified. | |
| 110 ENTRY_DELETED, // The entry is being deleted. | |
| 111 ENTRY_FIXING, // Inconsistent state. The entry is being verified. | |
| 112 ENTRY_USED // The slot is in use (entry is present). | |
| 113 }; | |
| 114 static_assert(ENTRY_USED <= 7, "state uses 3 bits"); | |
| 115 | |
| 116 enum EntryGroup { | |
| 117 ENTRY_NO_USE = 0, // The entry has not been reused. | |
| 118 ENTRY_LOW_USE, // The entry has low reuse. | |
| 119 ENTRY_HIGH_USE, // The entry has high reuse. | |
| 120 ENTRY_RESERVED, // Reserved for future use. | |
| 121 ENTRY_EVICTED // The entry was deleted. | |
| 122 }; | |
| 123 static_assert(ENTRY_USED <= 7, "group uses 3 bits"); | |
| 124 | |
| 125 #pragma pack(push, 1) | |
| 126 struct IndexCell { | |
| 127 void Clear() { memset(this, 0, sizeof(*this)); } | |
| 128 | |
| 129 // A cell is a 9 byte bit-field that stores 7 values: | |
| 130 // location : 22 bits | |
| 131 // id : 18 bits | |
| 132 // timestamp : 20 bits | |
| 133 // reuse : 4 bits | |
| 134 // state : 3 bits | |
| 135 // group : 3 bits | |
| 136 // sum : 2 bits | |
| 137 // The id is derived from the full hash of the entry. | |
| 138 // | |
| 139 // The actual layout is as follows: | |
| 140 // | |
| 141 // first_part (low order 32 bits): | |
| 142 // 0000 0000 0011 1111 1111 1111 1111 1111 : location | |
| 143 // 1111 1111 1100 0000 0000 0000 0000 0000 : id | |
| 144 // | |
| 145 // first_part (high order 32 bits): | |
| 146 // 0000 0000 0000 0000 0000 0000 1111 1111 : id | |
| 147 // 0000 1111 1111 1111 1111 1111 0000 0000 : timestamp | |
| 148 // 1111 0000 0000 0000 0000 0000 0000 0000 : reuse | |
| 149 // | |
| 150 // last_part: | |
| 151 // 0000 0111 : state | |
| 152 // 0011 1000 : group | |
| 153 // 1100 0000 : sum | |
| 154 // | |
| 155 // The small-cache version of the format moves some bits from the location to | |
| 156 // the id fileds, like so: | |
| 157 // location : 16 bits | |
| 158 // id : 24 bits | |
| 159 // | |
| 160 // first_part (low order 32 bits): | |
| 161 // 0000 0000 0000 0000 1111 1111 1111 1111 : location | |
| 162 // 1111 1111 1111 1111 0000 0000 0000 0000 : id | |
| 163 // | |
| 164 // The actual bit distribution between location and id is determined by the | |
| 165 // table size (IndexHeaderV3.table_len). Tables smaller than 65536 entries | |
| 166 // use the small-cache version; after that size, caches should have the | |
| 167 // SMALL_CACHE flag cleared. | |
| 168 // | |
| 169 // To locate a given entry after recovering the location from the cell, the | |
| 170 // file type and file number are appended (see disk_cache/addr.h). For a large | |
| 171 // table only the file type is implied; for a small table, the file number | |
| 172 // is also implied, and it should be the first file for that type of entry, | |
| 173 // as determined by the EntryGroup (two files in total, one for active entries | |
| 174 // and another one for evicted entries). | |
| 175 // | |
| 176 // For example, a small table may store something like 0x1234 as the location | |
| 177 // field. That means it stores the entry number 0x1234. If that record belongs | |
| 178 // to a deleted entry, the regular cache address may look something like | |
| 179 // BLOCK_EVICTED + 1 block + file number 6 + entry number 0x1234 | |
| 180 // so Addr = 0xf0061234 | |
| 181 // | |
| 182 // If that same Addr is stored on a large table, the location field would be | |
| 183 // 0x61234 | |
| 184 | |
| 185 uint64_t first_part; | |
| 186 uint8_t last_part; | |
| 187 }; | |
| 188 static_assert(sizeof(IndexCell) == 9, "bad IndexCell"); | |
| 189 | |
| 190 const int kCellsPerBucket = 4; | |
| 191 struct IndexBucket { | |
| 192 IndexCell cells[kCellsPerBucket]; | |
| 193 int32_t next; | |
| 194 uint32_t hash; // The high order byte is reserved (should be zero). | |
| 195 }; | |
| 196 static_assert(sizeof(IndexBucket) == 44, "bad IndexBucket"); | |
| 197 const int kBytesPerCell = 44 / kCellsPerBucket; | |
| 198 | |
| 199 // The main cache index. Backed by a file named index_tb1. | |
| 200 // The extra table (index_tb2) has a similar format, but different size. | |
| 201 struct Index { | |
| 202 // Default size. Actual size controlled by header.table_len. | |
| 203 IndexBucket table[kBaseTableLen / kCellsPerBucket]; | |
| 204 }; | |
| 205 #pragma pack(pop) | |
| 206 | |
| 207 // Flags that can be applied to an entry. | |
| 208 enum EntryFlags { | |
| 209 PARENT_ENTRY = 1, // This entry has children (sparse) entries. | |
| 210 CHILD_ENTRY = 1 << 1 // Child entry that stores sparse data. | |
| 211 }; | |
| 212 | |
| 213 struct EntryRecord { | |
| 214 uint32_t hash; | |
| 215 uint32_t pad1; | |
| 216 uint8_t reuse_count; | |
| 217 uint8_t refetch_count; | |
| 218 int8_t state; // Current EntryState. | |
| 219 uint8_t flags; // Any combination of EntryFlags. | |
| 220 int32_t key_len; | |
| 221 int32_t data_size[4]; // We can store up to 4 data streams for each | |
| 222 CacheAddr data_addr[4]; // entry. | |
| 223 uint32_t data_hash[4]; | |
| 224 uint64_t creation_time; | |
| 225 uint64_t last_modified_time; | |
| 226 uint64_t last_access_time; | |
| 227 int32_t pad[3]; | |
| 228 uint32_t self_hash; | |
| 229 }; | |
| 230 static_assert(sizeof(EntryRecord) == 104, "bad EntryRecord"); | |
| 231 | |
| 232 struct ShortEntryRecord { | |
| 233 uint32_t hash; | |
| 234 uint32_t pad1; | |
| 235 uint8_t reuse_count; | |
| 236 uint8_t refetch_count; | |
| 237 int8_t state; // Current EntryState. | |
| 238 uint8_t flags; | |
| 239 int32_t key_len; | |
| 240 uint64_t last_access_time; | |
| 241 uint32_t long_hash[5]; | |
| 242 uint32_t self_hash; | |
| 243 }; | |
| 244 static_assert(sizeof(ShortEntryRecord) == 48, "bad ShortEntryRecord"); | |
| 245 | |
| 246 } // namespace disk_cache | |
| 247 | |
| 248 #endif // NET_DISK_CACHE_BLOCKFILE_DISK_FORMAT_V3_H_ | |
| OLD | NEW |