net/disk_cache/v3/index_table.cc - Issue 53313004: Disk cache v3: The main index table.

Side by Side Diff: net/disk_cache/v3/index_table.cc

Issue 53313004: Disk cache v3: The main index table. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src/

Patch Set: WalkTables Created 7 years, 1 month ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

Property Changes:

Added: svn:eol-style
+ LF

OLD	NEW
(Empty)
	1 // Copyright (c) 2013 The Chromium Authors. All rights reserved.

	2 // Use of this source code is governed by a BSD-style license that can be

	3 // found in the LICENSE file.

	4

	5 #include "net/disk_cache/v3/index_table.h"

	6

	7 #include <algorithm>

	8 #include <set>

	9 #include <utility>

	10

	11 #include "base/bits.h"

	12 #include "net/base/io_buffer.h"

	13 #include "net/base/net_errors.h"

	14 #include "net/disk_cache/disk_cache.h"

	15

	16 using base::Time;

	17 using base::TimeDelta;

	18 using disk_cache::CellInfo;

	19 using disk_cache::CellList;

	20 using disk_cache::IndexCell;

	21 using disk_cache::IndexIterator;

	22

	23 namespace {

	24

	25 const uint32 kMaxAddress = 1 << 22;

	26

	27 const int kCellHashOffset = 22;

	28 const int kCellSmallTableHashOffset = 16;

	29 const int kCellTimestampOffset = 40;

	30 const int kCellReuseOffset = 60;

	31 const int kCellGroupOffset = 3;

	32 const int kCellSumOffset = 6;

	33

	34 const uint64 kCellAddressMask = 0x3FFFFF;

	35 const uint64 kCellSmallTableAddressMask = 0xFFFF;

	36 const uint64 kCellHashMask = 0x3FFFF;

	37 const uint64 kCellSmallTableHashMask = 0xFFFFFF;

	38 const uint64 kCellTimestampMask = 0xFFFFF;

	39 const uint64 kCellReuseMask = 0xF;

	40 const uint8 kCellStateMask = 0x7;

	41 const uint8 kCellGroupMask = 0x7;

	42 const uint8 kCellSumMask = 0x3;

	43

	44 const int kHashShift = 14;

	45 const int kHashSmallTableShift = 8;

	46

	47 // Unfortunately we have to break the abstaction a little here: the file number

	48 // where entries are stored is outside of the control of this code, and it is

	49 // usually part of the stored address. However, for small tables we only store

	50 // 16 bits of the address so the file number is never stored on a cell. We have

	51 // to infere the file number from the type of entry (normal vs evicted), and

	52 // the knowledge that given that the table will not keep more than 64k entries,

	53 // a single file of each type is enough.

	54 const int kEntriesFile = disk_cache::BLOCK_ENTRIES - 1;

	55 const int kEvictedEntriesFile = disk_cache::BLOCK_EVICTED - 1;

	56

	57 uint32 GetCellAddress(const IndexCell& cell) {

	58 return cell.first_part & kCellAddressMask;

	59 }

	60

	61 uint32 GetCellSmallTableAddress(const IndexCell& cell) {

	62 return cell.first_part & kCellSmallTableAddressMask;

	63 }

	64

	65 uint32 GetCellHash(const IndexCell& cell) {

	66 return (cell.first_part >> kCellHashOffset) & kCellHashMask;

	67 }

	68

	69 uint32 GetCellSmallTableHash(const IndexCell& cell) {

	70 return (cell.first_part >> kCellSmallTableHashOffset) &

	71 kCellSmallTableHashMask;

	72 }

	73

	74 int GetCellTimestamp(const IndexCell& cell) {

	75 return (cell.first_part >> kCellTimestampOffset) & kCellTimestampMask;

	76 }

	77

	78 int GetCellReuse(const IndexCell& cell) {

	79 return (cell.first_part >> kCellReuseOffset) & kCellReuseMask;

	80 }

	81

	82 int GetCellState(const IndexCell& cell) {

	83 return cell.last_part & kCellStateMask;

	84 }

	85

	86 int GetCellGroup(const IndexCell& cell) {

	87 return (cell.last_part >> kCellGroupOffset) & kCellGroupMask;

	88 }

	89

	90 int GetCellSum(const IndexCell& cell) {

	91 return (cell.last_part >> kCellSumOffset) & kCellSumMask;

	92 }

	93

	94 void SetCellAddress(IndexCell* cell, uint32 address) {

	95 DCHECK_LE(address, static_cast<uint32>(kCellAddressMask));

	96 cell->first_part &= ~kCellAddressMask;

	97 cell->first_part \|= address;

	98 }

	99

	100 void SetCellSmallTableAddress(IndexCell* cell, uint32 address) {

	101 DCHECK_LE(address, static_cast<uint32>(kCellSmallTableAddressMask));

	102 cell->first_part &= ~kCellSmallTableAddressMask;

	103 cell->first_part \|= address;

	104 }

	105

	106 void SetCellHash(IndexCell* cell, uint32 hash) {

	107 DCHECK_LE(hash, static_cast<uint32>(kCellHashMask));

	108 cell->first_part &= ~(kCellHashMask << kCellHashOffset);

	109 cell->first_part \|= static_cast<int64>(hash) << kCellHashOffset;

	110 }

	111

	112 void SetCellSmallTableHash(IndexCell* cell, uint32 hash) {

	113 DCHECK_LE(hash, static_cast<uint32>(kCellSmallTableHashMask));

	114 cell->first_part &= ~(kCellSmallTableHashMask << kCellSmallTableHashOffset);

	115 cell->first_part \|= static_cast<int64>(hash) << kCellSmallTableHashOffset;

	116 }

	117

	118 void SetCellTimestamp(IndexCell* cell, int timestamp) {

	119 DCHECK_LT(timestamp, 1 << 20);

	120 DCHECK_GE(timestamp, 0);

	121 cell->first_part &= ~(kCellTimestampMask << kCellTimestampOffset);

	122 cell->first_part \|= static_cast<int64>(timestamp) << kCellTimestampOffset;

	123 }

	124

	125 void SetCellReuse(IndexCell* cell, int count) {

	126 DCHECK_LT(count, 16);

	127 DCHECK_GE(count, 0);

	128 cell->first_part &= ~(kCellReuseMask << kCellReuseOffset);

	129 cell->first_part \|= static_cast<int64>(count) << kCellReuseOffset;

	130 }

	131

	132 void SetCellState(IndexCell* cell, disk_cache::EntryState state) {

	133 cell->last_part &= ~kCellStateMask;

	134 cell->last_part \|= state;

	135 }

	136

	137 void SetCellGroup(IndexCell* cell, disk_cache::EntryGroup group) {

	138 cell->last_part &= ~(kCellGroupMask << kCellGroupOffset);

	139 cell->last_part \|= group << kCellGroupOffset;

	140 }

	141

	142 void SetCellSum(IndexCell* cell, int sum) {

	143 DCHECK_LT(sum, 4);

	144 DCHECK_GE(sum, 0);

	145 cell->last_part &= ~(kCellSumMask << kCellSumOffset);

	146 cell->last_part \|= sum << kCellSumOffset;

	147 }

	148

	149 // This is a very particular way to calculate the sum, so it will not match if

	150 // compared a gainst a pure 2 bit, modulo 2 sum.

	151 int CalculateCellSum(const IndexCell& cell) {

	152 uint32* words = bit_cast<uint32*>(&cell);

	153 uint8* bytes = bit_cast<uint8*>(&cell);

	154 uint32 result = words[0] + words[1];

	155 result += result >> 16;

	156 result += (result >> 8) + (bytes[8] & 0x3f);

	157 result += result >> 4;

	158 result += result >> 2;

	159 return result & 3;

	160 }

	161

	162 bool SanityCheck(const IndexCell& cell) {

	163 if (GetCellSum(cell) != CalculateCellSum(cell))

	164 return false;

	165

	166 if (GetCellState(cell) > disk_cache::ENTRY_USED \|\|

	167 GetCellGroup(cell) == disk_cache::ENTRY_RESERVED \|\|

	168 GetCellGroup(cell) > disk_cache::ENTRY_EVICTED) {

	169 return false;

	170 }

	171

	172 return true;

	173 }

	174

	175 bool IsValidAddress(disk_cache::Addr address) {

	176 if (!address.is_initialized() \|\|

	177 (address.file_type() != disk_cache::BLOCK_EVICTED &&

	178 address.file_type() != disk_cache::BLOCK_ENTRIES)) {

	179 return false;

	180 }

	181

	182 return address.ToIndexEntryAddress() < kMaxAddress;

	183 }

	184

	185 bool IsNormalState(const IndexCell& cell) {

	186 disk_cache::EntryState state =

	187 static_cast<disk_cache::EntryState>(GetCellState(cell));

	188 DCHECK_NE(state, disk_cache::ENTRY_FREE);

	189 return state != disk_cache::ENTRY_DELETED &&

	190 state != disk_cache::ENTRY_FIXING;

	191 }

	192

	193 inline int GetNextBucket(int min_bucket_id, int max_bucket_id,

	194 disk_cache::IndexBucket* table,

	195 disk_cache::IndexBucket** bucket) {

	196 if (!(*bucket)->next)

	197 return 0;

	198

	199 int bucket_id = (*bucket)->next / disk_cache::kCellsPerBucket;

	200 if (bucket_id < min_bucket_id \|\| bucket_id > max_bucket_id) {

	201 (*bucket)->next = 0;

	202 return 0;

	203 }

	204 *bucket = &table[bucket_id - min_bucket_id];

	205 return bucket_id;

	206 }

	207

	208 // Updates the \|iterator\| with the current \|cell\|. This cell may cause all

	209 // previous cells to be deleted (when a new target timestamp is found), the cell

	210 // may be added to the list (if it matches the target timestamp), or may it be

	211 // ignored.

	212 void UpdateIterator(const disk_cache::EntryCell& cell,

	213 int limit_time,

	214 IndexIterator* iterator) {

	215 int time = cell.GetTimestamp();

	216 // Look for not interesting times.

	217 if (iterator->forward && time <= limit_time)

	218 return;

	219 if (!iterator->forward && time >= limit_time)

	220 return;

	221

	222 if ((iterator->forward && time < iterator->timestamp) \|\|

	223 (!iterator->forward && time > iterator->timestamp)) {

	224 // This timestamp is better than the one we had.

	225 iterator->timestamp = time;

	226 iterator->cells.clear();

	227 }

	228 if (time == iterator->timestamp) {

	229 CellInfo cell_info = { cell.hash(), cell.GetAddress() };

	230 iterator->cells.push_back(cell_info);

	231 }

	232 }

	233

	234 void InitIterator(IndexIterator* iterator) {

	235 iterator->cells.clear();

	236 iterator->timestamp = iterator->forward ? kint32max : 0;

	237 }

	238

	239 } // namespace

	240

	241 namespace disk_cache {

	242

	243 EntryCell::~EntryCell() {

	244 }

	245

	246 bool EntryCell::IsValid() const {

	247 return GetCellAddress(cell_) != 0;

	248 }

	249

	250 Addr EntryCell::GetAddress() const {
	Randy Smith (Not in Mondays) 2013/11/13 21:14:12 I spent some time wrapping my brain around this fu I spent some time wrapping my brain around this function trying to understand the format difference between the small table and the large table. From disk_format_v3.h the only difference is the number of bits allocated to the hash and the address (small->large hash/addr: 16/16 -> 10/22). But looking at this function, it looks like we're also shifting from situation where we aren't using separate files (i.e. everything is always in either the entries file or the evicted entries file) to one in which the file is encoded in the address from the IndexCell. This right? If so, there looks to be a subtle contradiction between two different format descriptions. addr.h indicates taht it's reserving 8 bits in a CacheAddr for the file selector. But the specification in disk_format_v3.h implies that we're only using six of those eight bits (if the bottom 16 bits are for the block, that only leaves six bits for the file). Is that right? So a general concern (sorta implied by how long it took me to puzzle out the above) is around the documentation of the different forms of address and their relationship to each other, and the documentation of the difference in file usage for small/large table size. Is this done somewhere and I missed it? If not, could it be done somewhere? (After writing this, I went back and read the comment at the top of the file, which I hadn't understood on my first pass through, but now that I've re-read it having better grokked the file formats and small/large table distinction I understand better. But I still feels like it should be easier to really grok the file formats from reading through addr.h, disk_format_base.h, and disk_format_v3.h, though that may be just my failure in stumbling through that documentation in the wrong order.) Randy Smith (Not in Mondays) 2013/11/13 21:14:12 Another random table format question: How do you g Another random table format question: How do you get an Addr() with number of contiguous blocks != 1? If you have such a thing, how do you store it in the bits allocated for the address in an IndexCell? rvargas (doing something else) 2013/11/14 02:54:19 The issue is that the small table format only supp Show quoted text On 2013/11/13 21:14:12, rdsmith wrote: > I spent some time wrapping my brain around this function trying to understand > the format difference between the small table and the large table. From > disk_format_v3.h the only difference is the number of bits allocated to the hash > and the address (small->large hash/addr: 16/16 -> 10/22). But looking at this > function, it looks like we're also shifting from situation where we aren't > using separate files (i.e. everything is always in either the entries file or > the evicted entries file) to one in which the file is encoded in the address > from the IndexCell. This right? The issue is that the small table format only supports up to 64k entries. 64k entries is also roughly the limit of entries that can be stored by a single block file, and given that other parts of the design assign a specific file type for entries, the net result is that the block file type and number are fixed for the small table case. It is a case of implied bits that don't have to be stored. Large tables of course support more than 64k entries, and that means that the file number is also part of what changes in the address, and has to be stored. Now, ideally, an Address should be transparent for this code... and from the point of view of the index format it shouldn't really matter what specific bits of that address mean. On the other hand, the Address itself shouldn't care if it is being stored by an index or not. In other words, either part of the definition of Address leaks here, or details of the index leak into Address. So what this code says is: - for small tables, _I_ (this method) know that some bits here are fixed, and the Address API gives me a clear way to set them as needed. - for large tables, _I_ know how many bits I have on the table, and I know that they are not enough bits to overflow the "control" part of an Address, but I'll let the Address create the right object though a dedicated method because the normal API doesn't have what I need. We could add more methods to Address to make it aware of small tables (or add more arguments to the new methods), but between the two files I think it is better to leak from Address to Index than the other way. I could use Addr::FromEvictedAddress even for small tables after massaging the value, but I don't think that's what you want. Show quoted text > > If so, there looks to be a subtle contradiction between two different format > descriptions. addr.h indicates that it's reserving 8 bits in a CacheAddr for > the file selector. But the specification in disk_format_v3.h implies that we're > only using six of those eight bits (if the bottom 16 bits are for the block, > that only leaves six bits for the file). Is that right? Sort of. The low order 24 bits of an address are "pure" addressing bits in that they represent the addressing capacity of the format. The upper bits are some sort of control, but an address cannot enumerate more than 2^24 entities. BlockFiles is the one who really cares about the distinction between the low order 16 bits and the file number. So, overall, this code is saying that it can store up to 2^22 entries (active + deleted) so the high order bits of the file number are always going to be 0. Show quoted text > > So a general concern (sorta implied by how long it took me to puzzle out the > above) is around the documentation of the different forms of address and their > relationship to each other, and the documentation of the difference in file > usage for small/large table size. Is this done somewhere and I missed it? If > not, could it be done somewhere? Sorry about that. There is some text in disk_format_v3.h about what an index address is and how that is translated to Addr, but I expanded that comment with an example, and also extended the description on addr.h Show quoted text > > (After writing this, I went back and read the comment at the top of the file, > which I hadn't understood on my first pass through, but now that I've re-read it > having better grokked the file formats and small/large table distinction I > understand better. But I still feels like it should be easier to really grok > the file formats from reading through addr.h, disk_format_base.h, and > disk_format_v3.h, though that may be just my failure in stumbling through that > documentation in the wrong order.) rvargas (doing something else) 2013/11/14 02:54:19 That case corresponds for example to storing 800 b Show quoted text On 2013/11/13 21:14:12, rdsmith wrote: > Another random table format question: How do you get an Addr() with number of > contiguous blocks != 1? If you have such a thing, how do you store it in the > bits allocated for the address in an IndexCell? That case corresponds for example to storing 800 bytes of data somewhere. In that case, block size is 256 and there are 4 blocks used for the record. The index table only stores records of active or deleted entries, and they have a fixed size, so they will always use just one block. Randy Smith (Not in Mondays) 2013/11/18 20:37:04 It might be, actually. I agree with you that give Show quoted text On 2013/11/14 02:54:19, rvargas wrote: > We could add more methods to Address to make it aware of small tables (or add > more arguments to the new methods), but between the two files I think it is > better to leak from Address to Index than the other way. I could use > Addr::FromEvictedAddress even for small tables after massaging the value, but I > don't think that's what you want. It might be, actually. I agree with you that given that we're going to leak some information in one direction, I'd rather leak from Address to IndexTable than the other way around (ideally I'd like to not leak at all, but I haven't come up with any suggestions as to how to do that :-}). But we still have two different methods on address that look, from where I'm sitting, like they are targeting the two different formats of address in IndexTable. Looking just at the addr.h header, it's not clear why we need both the Addr constructor and the static methods Addr::FromAddress(). So if you'd be comfortable either breaking the block portion of address_value out and using the Addr() constructor in place of FromAddress (which means a bit more leakage of knowledge into IndexTable, specifically how multi-file address values are split between file # and block #, but we've already gestured at that in our use of the constructor) or the reverse (which leaks the same information) I'd personally prefer that, as it'd make the Addr interface not have any hints of index table formats in it. Alternatively, documentation in addr.h explaining what the FromAddress() are useful for without (explicitly or implicitly) being dependent on the IndexTable format would be fine. And, finally, I'm not fanatic about this--if you really want to leave it this way, I'm ok with that. But it feels to me as if we're leaking information both ways currently, and if we can I'd like to clean that up at least a little bit. rvargas (doing something else)* 2013/11/18 23:43:34 Done. I'll cleanup Addr if we are happy here. Show quoted text On 2013/11/18 20:37:04, rdsmith wrote: > On 2013/11/14 02:54:19, rvargas wrote: > > We could add more methods to Address to make it aware of small tables (or add > > more arguments to the new methods), but between the two files I think it is > > better to leak from Address to Index than the other way. I could use > > Addr::FromEvictedAddress even for small tables after massaging the value, but > I > > don't think that's what you want. > > It might be, actually. I agree with you that given that we're going to leak > some information in one direction, I'd rather leak from Address to IndexTable > than the other way around (ideally I'd like to not leak at all, but I haven't > come up with any suggestions as to how to do that :-}). But we still have two > different methods on address that look, from where I'm sitting, like they are > targeting the two different formats of address in IndexTable. Looking just at > the addr.h header, it's not clear why we need both the Addr constructor and the > static methods Addr::FromAddress(). So if you'd be comfortable either breaking > the block portion of address_value out and using the Addr() constructor in place > of FromAddress (which means a bit more leakage of knowledge into IndexTable, > specifically how multi-file address values are split between file # and block #, > but we've already gestured at that in our use of the constructor) or the reverse > (which leaks the same information) I'd personally prefer that, as it'd make the > Addr interface not have any hints of index table formats in it. > > Alternatively, documentation in addr.h explaining what the FromAddress() are > useful for without (explicitly or implicitly) being dependent on the IndexTable > format would be fine. > > And, finally, I'm not fanatic about this--if you really want to leave it this > way, I'm ok with that. But it feels to me as if we're leaking information both > ways currently, and if we can I'd like to clean that up at least a little bit. Done. I'll cleanup Addr if we are happy here. Randy Smith (Not in Mondays)* 2013/11/25 19:48:07 Thanks! Yes, this looks much better. Go ahead an Show quoted text On 2013/11/18 23:43:34, rvargas wrote: > On 2013/11/18 20:37:04, rdsmith wrote: > > On 2013/11/14 02:54:19, rvargas wrote: > > > We could add more methods to Address to make it aware of small tables (or > add > > > more arguments to the new methods), but between the two files I think it is > > > better to leak from Address to Index than the other way. I could use > > > Addr::FromEvictedAddress even for small tables after massaging the value, > but > > I > > > don't think that's what you want. > > > > It might be, actually. I agree with you that given that we're going to leak > > some information in one direction, I'd rather leak from Address to IndexTable > > than the other way around (ideally I'd like to not leak at all, but I haven't > > come up with any suggestions as to how to do that :-}). But we still have two > > different methods on address that look, from where I'm sitting, like they are > > targeting the two different formats of address in IndexTable. Looking just at > > the addr.h header, it's not clear why we need both the Addr constructor and > the > > static methods Addr::FromAddress(). So if you'd be comfortable either > breaking > > the block portion of address_value out and using the Addr() constructor in > place > > of FromAddress (which means a bit more leakage of knowledge into IndexTable, > > specifically how multi-file address values are split between file # and block > #, > > but we've already gestured at that in our use of the constructor) or the > reverse > > (which leaks the same information) I'd personally prefer that, as it'd make > the > > Addr interface not have any hints of index table formats in it. > > > > Alternatively, documentation in addr.h explaining what the FromAddress() are > > useful for without (explicitly or implicitly) being dependent on the > IndexTable > > format would be fine. > > > > And, finally, I'm not fanatic about this--if you really want to leave it this > > way, I'm ok with that. But it feels to me as if we're leaking information > both > > ways currently, and if we can I'd like to clean that up at least a little bit. > > Done. I'll cleanup Addr if we are happy here. Thanks! Yes, this looks much better. Go ahead and cleanup Addr. rvargas (doing something else)* 2013/11/26 19:54:40 Done. Show quoted text On 2013/11/25 19:48:07, rdsmith wrote: > On 2013/11/18 23:43:34, rvargas wrote: > > On 2013/11/18 20:37:04, rdsmith wrote: > > > On 2013/11/14 02:54:19, rvargas wrote: > > > > We could add more methods to Address to make it aware of small tables (or > > add > > > > more arguments to the new methods), but between the two files I think it > is > > > > better to leak from Address to Index than the other way. I could use > > > > Addr::FromEvictedAddress even for small tables after massaging the value, > > but > > > I > > > > don't think that's what you want. > > > > > > It might be, actually. I agree with you that given that we're going to leak > > > some information in one direction, I'd rather leak from Address to > IndexTable > > > than the other way around (ideally I'd like to not leak at all, but I > haven't > > > come up with any suggestions as to how to do that :-}). But we still have > two > > > different methods on address that look, from where I'm sitting, like they > are > > > targeting the two different formats of address in IndexTable. Looking just > at > > > the addr.h header, it's not clear why we need both the Addr constructor and > > the > > > static methods Addr::FromAddress(). So if you'd be comfortable either > > breaking > > > the block portion of address_value out and using the Addr() constructor in > > place > > > of FromAddress (which means a bit more leakage of knowledge into > IndexTable, > > > specifically how multi-file address values are split between file # and > block > > #, > > > but we've already gestured at that in our use of the constructor) or the > > reverse > > > (which leaks the same information) I'd personally prefer that, as it'd make > > the > > > Addr interface not have any hints of index table formats in it. > > > > > > Alternatively, documentation in addr.h explaining what the From*Address() > are > > > useful for without (explicitly or implicitly) being dependent on the > > IndexTable > > > format would be fine. > > > > > > And, finally, I'm not fanatic about this--if you really want to leave it > this > > > way, I'm ok with that. But it feels to me as if we're leaking information > > both > > > ways currently, and if we can I'd like to clean that up at least a little > bit. > > > > Done. I'll cleanup Addr if we are happy here. > > Thanks! Yes, this looks much better. Go ahead and cleanup Addr. Done.
	251 uint32 address_value = GetAddressValue();

	252 if (small_table_) {

	253 if (GetGroup() == ENTRY_EVICTED)

	254 return Addr(BLOCK_EVICTED, 1, kEvictedEntriesFile, address_value);

	255

	256 return Addr(BLOCK_ENTRIES, 1, kEntriesFile, address_value);

	257 }

	258

	259 if (GetGroup() == ENTRY_EVICTED)

	260 return Addr::FromEvictedAddress(address_value);

	261 else

	262 return Addr::FromEntryAddress(address_value);
	Randy Smith (Not in Mondays) 2013/11/13 21:14:12 For both these calls, it feels like "FromAddress" For both these calls, it feels like "FromAddress" is a bit of a misnomer, both because "address" means several different things in this code, and because the address_value means different things in the small and large table formats. Maybe FromFileIndexAddress? rvargas (doing something else)* 2013/11/14 02:54:19 The name follows what is available on addr.h :( I Show quoted text On 2013/11/13 21:14:12, rdsmith wrote: > For both these calls, it feels like "FromAddress" is a bit of a misnomer, both > because "address" means several different things in this code, and because the > address_value means different things in the small and large table formats. > Maybe FromFileIndexAddress? The name follows what is available on addr.h :( I can change it if you feel strongly about it, but the only things Addr knows about is BLOCK_ENTRIES and BLOCK_EVICTED, it knows nothing about the index. (of course, that is not completely true, as it knows what it means to grab value and put it into a meaningless Addr) FromEntryFileAddress could work, but I don't like that it's not clear what "file" is doing there... as in that word sticks with Entry (correct), but also with Address (wrong) Another option would be to not use "address" for the index, but I could not come up with another term Randy Smith (Not in Mondays) 2013/11/18 20:37:04 Maybe a comment inside the "if (small_table_)" con Show quoted text On 2013/11/14 02:54:19, rvargas wrote: > On 2013/11/13 21:14:12, rdsmith wrote: > > For both these calls, it feels like "FromAddress" is a bit of a misnomer, > both > > because "address" means several different things in this code, and because the > > address_value means different things in the small and large table formats. > > Maybe FromFileIndexAddress? > > The name follows what is available on addr.h :( I can change it if you feel > strongly about it, but the only things Addr knows about is BLOCK_ENTRIES and > BLOCK_EVICTED, it knows nothing about the index. (of course, that is not > completely true, as it knows what it means to grab value and put it into a > meaningless Addr) > > FromEntryFileAddress could work, but I don't like that it's not clear what > "file" is doing there... as in that word sticks with Entry (correct), but also > with Address (wrong) > > Another option would be to not use "address" for the index, but I could not come > up with another term Maybe a comment inside the "if (small_table_)" conditional saying "address_value is just a block index in the small table format, but is a full address (including file number) in the large table format." As I understand it, the misnomer here is that address_value isn't a full address (including file number) in the small table format case.
	263 }

	264

	265 EntryState EntryCell::GetState() const {

	266 return static_cast<EntryState>(cell_.last_part & kCellStateMask);

	267 }

	268

	269 EntryGroup EntryCell::GetGroup() const {

	270 return static_cast<EntryGroup>((cell_.last_part >> kCellGroupOffset) &

	271 kCellGroupMask);

	272 }

	273

	274 int EntryCell::GetReuse() const {

	275 return (cell_.first_part >> kCellReuseOffset) & kCellReuseMask;

	276 }

	277

	278 int EntryCell::GetTimestamp() const {

	279 return GetCellTimestamp(cell_);

	280 }

	281

	282 void EntryCell::SetState(EntryState state) {

	283 SetCellState(&cell_, state);

	284 }

	285

	286 void EntryCell::SetGroup(EntryGroup group) {

	287 SetCellGroup(&cell_, group);

	288 }

	289

	290 void EntryCell::SetReuse(int count) {

	291 SetCellReuse(&cell_, count);

	292 }

	293

	294 void EntryCell::SetTimestamp(int timestamp) {

	295 SetCellTimestamp(&cell_, timestamp);

	296 }

	297

	298 // Static.

	299 EntryCell EntryCell::GetEntryCellForTest(int32 cell_id,

	300 uint32 hash,

	301 Addr address,

	302 IndexCell* cell,

	303 bool small_table) {

	304 if (cell) {

	305 EntryCell entry_cell(cell_id, hash, *cell, small_table);

	306 return entry_cell;

	307 }

	308

	309 return EntryCell(cell_id, hash, address, small_table);

	310 }

	311

	312 void EntryCell::SerializaForTest(IndexCell* destination) {

	313 FixSum();

	314 Serialize(destination);

	315 }

	316

	317 EntryCell::EntryCell() : cell_id_(0), hash_(0), small_table_(false) {

	318 cell_.Clear();

	319 }

	320

	321 EntryCell::EntryCell(int32 cell_id, uint32 hash, Addr address, bool small_table)

	322 : cell_id_(cell_id),

	323 hash_(hash),

	324 small_table_(small_table) {

	325 DCHECK(IsValidAddress(address) \|\| !address.value());

	326

	327 cell_.Clear();

	328 SetCellState(&cell_, ENTRY_NEW);

	329 SetCellGroup(&cell_, ENTRY_NO_USE);

	330 if (small_table) {

	331 DCHECK(address.FileNumber() == kEntriesFile \|\|

	332 address.FileNumber() == kEvictedEntriesFile);

	333 SetCellSmallTableAddress(&cell_, address.start_block());

	334 SetCellSmallTableHash(&cell_, hash >> kHashSmallTableShift);

	335 } else {

	336 SetCellAddress(&cell_, address.ToIndexEntryAddress());

	337 SetCellHash(&cell_, hash >> kHashShift);

	338 }

	339 }

	340

	341 EntryCell::EntryCell(int32 cell_id,

	342 uint32 hash,

	343 const IndexCell& cell,

	344 bool small_table)

	345 : cell_id_(cell_id),

	346 hash_(hash),

	347 cell_(cell),

	348 small_table_(small_table) {

	349 }

	350

	351 void EntryCell::FixSum() {

	352 SetCellSum(&cell_, CalculateCellSum(cell_));

	353 }

	354

	355 uint32 EntryCell::GetAddressValue() const {

	356 if (small_table_)

	357 return GetCellSmallTableAddress(cell_);

	358

	359 return GetCellAddress(cell_);

	360 }

	361

	362 uint32 EntryCell::RecomputeHash() {

	363 if (small_table_) {

	364 hash_ &= (1 << kHashSmallTableShift) - 1;

	365 hash_ \|= GetCellSmallTableHash(cell_) << kHashSmallTableShift;

	366 return hash_;

	367 }

	368

	369 hash_ &= (1 << kHashShift) - 1;

	370 hash_ \|= GetCellHash(cell_) << kHashShift;

	371 return hash_;

	372 }

	373

	374 void EntryCell::Serialize(IndexCell* destination) const {

	375 *destination = cell_;

	376 }

	377

	378 EntrySet::EntrySet() : evicted_count(0), current(0) {

	379 }

	380

	381 EntrySet::~EntrySet() {

	382 }

	383

	384 IndexIterator::IndexIterator() {

	385 }

	386

	387 IndexIterator::~IndexIterator() {

	388 }

	389

	390 IndexTableInitData::IndexTableInitData() {

	391 }

	392

	393 IndexTableInitData::~IndexTableInitData() {

	394 }

	395

	396 // -----------------------------------------------------------------------

	397

	398 IndexTable::IndexTable(IndexTableBackend* backend)

	399 : backend_(backend),

	400 header_(NULL),

	401 main_table_(NULL),

	402 extra_table_(NULL),

	403 modified_(false),

	404 small_table_(false) {

	405 }

	406

	407 IndexTable::~IndexTable() {

	408 }

	409

	410 // For a general description of the index tables see:

	411 // http://www.chromium.org/developers/design-documents/network-stack/disk-cache/ disk-cache-v3#TOC-Index

	412 //

	413 // The index is split between two tables: the main_table_ and the extra_table_.

	414 // The main table can grow only by doubling its number of cells, while the

	415 // extra table can grow slowly, because it only contain cells that overflow

	416 // from the main table. In order to locate a given cell, part of the hash is

	417 // used directly as an index into the main table; once that bucket is located,

	418 // all cells with that partial hash (i.e., belonging to that bucket) are

	419 // inspected, and if present, the next bucket (located on the extra table) is

	420 // then located. For more information on bucket chaining see:

	421 // http://www.chromium.org/developers/design-documents/network-stack/disk-cache/ disk-cache-v3#TOC-Buckets

	422 //

	423 // There are two cases when increasing the size:

	424 // - Doubling the size of the main table

	425 // - Adding more entries to the extra table

	426 //

	427 // For example, consider a 64k main table with 8k cells on the extra table (for

	428 // a total of 72k cells). Init can be called to add another 8k cells at the end

	429 // (grow to 80k cells). When the size of the extra table approaches 64k, Init

	430 // can be called to double the main table (to 128k) and go back to a small extra

	431 // table.

	432 void IndexTable::Init(IndexTableInitData* params) {

	433 bool growing = header_ != NULL;

	434 scoped_ptr<IndexBucket[]> old_extra_table;

	435 header_ = &params->index_bitmap->header;

	436

	437 if (params->main_table) {

	438 if (main_table_) {

	439 // This is doubling the size of main table.

	440 DCHECK_EQ(base::bits::Log2Floor(header_->table_len),

	441 base::bits::Log2Floor(backup_header_->table_len) + 1);

	442 int extra_size = (header()->max_bucket - mask_) * kCellsPerBucket;

	443 DCHECK_GE(extra_size, 0);

	444

	445 // Doubling the size implies deleting the extra table and moving as many

	446 // cells as we can to the main table, so we first copy the old one. This

	447 // is not required when just growing the extra table because we don't

	448 // move any cell in that case.

	449 old_extra_table.reset(new IndexBucket[extra_size]);

	450 memcpy(old_extra_table.get(), extra_table_,

	451 extra_size * sizeof(IndexBucket));

	452 memset(params->extra_table, 0, extra_size * sizeof(IndexBucket));

	453 }

	454 main_table_ = params->main_table;

	455 }

	456 DCHECK(main_table_);

	457 extra_table_ = params->extra_table;

	458

	459 extra_bits_ = base::bits::Log2Floor(header_->table_len) -

	460 base::bits::Log2Floor(kBaseTableLen);

	461 DCHECK_GE(extra_bits_, 0);

	462 DCHECK_LE(extra_bits_, 11);

	463 mask_ = ((kBaseTableLen / kCellsPerBucket) << extra_bits_) - 1;

	464 small_table_ = extra_bits_ < kHashShift - kHashSmallTableShift;

	465 if (!small_table_)

	466 extra_bits_ -= kHashShift - kHashSmallTableShift;

	467

	468 // table_len keeps the max number of cells stored by the index. We need a

	469 // bitmap with 1 bit per cell, and that bitmap has num_words 32-bit words.

	470 int num_words = (header_->table_len + 31) / 32;

	471

	472 if (old_extra_table) {

	473 // All the cells from the extra table are moving to the new tables so before

	474 // creating the bitmaps, clear the part of the extra table.

	475 int main_table_bit_words = ((mask_ >> 1) + 1) * kCellsPerBucket / 32;

	476 DCHECK_GT(num_words, main_table_bit_words);

	477 memset(params->index_bitmap->bitmap + main_table_bit_words, 0,

	478 (num_words - main_table_bit_words) * sizeof(int32));

	479

	480 DCHECK(growing);

	481 int old_num_words = (backup_header_.get()->table_len + 31) / 32;

	482 DCHECK_GT(old_num_words, main_table_bit_words);

	483 memset(backup_bitmap_storage_.get() + main_table_bit_words, 0,

	484 (old_num_words - main_table_bit_words) * sizeof(int32));

	485 }

	486 bitmap_.reset(new Bitmap(params->index_bitmap->bitmap, header_->table_len,

	487 num_words));

	488

	489 if (growing) {

	490 int old_num_words = (backup_header_.get()->table_len + 31) / 32;

	491 DCHECK_GE(num_words, old_num_words);

	492 scoped_ptr<uint32[]> storage(new uint32[num_words]);

	493 memcpy(storage.get(), backup_bitmap_storage_.get(),

	494 old_num_words * sizeof(int32));

	495 memset(storage.get() + old_num_words, 0,

	496 (num_words - old_num_words) * sizeof(int32));

	497

	498 backup_bitmap_storage_.swap(storage);

	499 backup_header_->table_len = header_->table_len;

	500 } else {

	501 backup_bitmap_storage_.reset(params->backup_bitmap.release());

	502 backup_header_.reset(params->backup_header.release());

	503 }

	504

	505 num_words = (backup_header_->table_len + 31) / 32;

	506 backup_bitmap_.reset(new Bitmap(backup_bitmap_storage_.get(),

	507 backup_header_->table_len, num_words));

	508 if (old_extra_table)

	509 MoveCells(old_extra_table.get());

	510

	511 if (small_table_)

	512 DCHECK(header_->flags & SMALL_CACHE);

	513 }

	514

	515 void IndexTable::Reset() {

	516 header_ = NULL;

	517 main_table_ = NULL;

	518 extra_table_ = NULL;

	519 bitmap_.reset();

	520 backup_bitmap_.reset();

	521 backup_header_.reset();

	522 backup_bitmap_storage_.reset();

	523 modified_ = false;

	524 }

	525

	526 // The general method for locating cells is to:

	527 // 1. Get the first bucket. This usually means directly indexing the table (as

	528 // this method does), or iterating through all possible buckets.

	529 // 2. Iterate through all the cells in that first bucket.

	530 // 3. If there is a linked bucket, locate it directly in the extra table.

	531 // 4. Go back to 2, as needed.

	532 //

	533 // One consequence of this pattern is that we never start looking at buckets in

	534 // the extra table, unless we are following a link from the main table.

	535 EntrySet IndexTable::LookupEntries(uint32 hash) {

	536 EntrySet entries;

	537 int bucket_id = static_cast<int>(hash & mask_);

	538 IndexBucket* bucket = &main_table_[bucket_id];

	539 for (;;) {

	540 for (int i = 0; i < kCellsPerBucket; i++) {

	541 IndexCell* current_cell = &bucket->cells[i];

	542 if (!GetAddressValue(*current_cell))

	543 continue;

	544 if (!SanityCheck(*current_cell)) {

	545 NOTREACHED();

	546 int cell_id = bucket_id * kCellsPerBucket + i;

	547 current_cell->Clear();

	548 bitmap_->Set(cell_id, false);

	549 backup_bitmap_->Set(cell_id, false);

	550 modified_ = true;

	551 continue;

	552 }

	553 int cell_id = bucket_id * kCellsPerBucket + i;

	554 if (MisplacedHash(*current_cell, hash)) {

	555 HandleMisplacedCell(current_cell, cell_id, hash & mask_);

	556 } else if (IsHashMatch(*current_cell, hash)) {

	557 EntryCell entry_cell(cell_id, hash, *current_cell, small_table_);

	558 CheckState(entry_cell);

	559 if (entry_cell.GetState() != ENTRY_DELETED) {

	560 entries.cells.push_back(entry_cell);

	561 if (entry_cell.GetGroup() == ENTRY_EVICTED)

	562 entries.evicted_count++;

	563 }

	564 }

	565 }

	566 bucket_id = GetNextBucket(mask_ + 1, header()->max_bucket, extra_table_,

	567 &bucket);

	568 if (!bucket_id)

	569 break;

	570 }

	571 return entries;

	572 }

	573

	574 EntryCell IndexTable::CreateEntryCell(uint32 hash, Addr address) {

	575 DCHECK(IsValidAddress(address));

	576 DCHECK(address.ToIndexEntryAddress());

	577

	578 int bucket_id = static_cast<int>(hash & mask_);

	579 int cell_id = 0;

	580 IndexBucket* bucket = &main_table_[bucket_id];

	581 IndexCell* current_cell = NULL;

	582 bool found = false;

	583 for (; !found;) {

	584 for (int i = 0; i < kCellsPerBucket && !found; i++) {

	585 current_cell = &bucket->cells[i];

	586 if (!GetAddressValue(*current_cell)) {

	587 cell_id = bucket_id * kCellsPerBucket + i;

	588 found = true;

	589 }

	590 }

	591 if (found)

	592 break;

	593 bucket_id = GetNextBucket(mask_ + 1, header()->max_bucket, extra_table_,

	594 &bucket);

	595 if (!bucket_id)

	596 break;

	597 }

	598

	599 if (!found) {

	600 bucket_id = NewExtraBucket();

	601 if (bucket_id) {

	602 cell_id = bucket_id * kCellsPerBucket;

	603 bucket->next = cell_id;

	604 bucket = &extra_table_[bucket_id - (mask_ + 1)];

	605 bucket->hash = hash & mask_;

	606 found = true;

	607 } else {

	608 // address 0 is a reserved value, and the caller interprets it as invalid.

	609 address.set_value(0);

	610 }

	611 }

	612

	613 EntryCell entry_cell(cell_id, hash, address, small_table_);

	614 if (address.file_type() == BLOCK_EVICTED)

	615 entry_cell.SetGroup(ENTRY_EVICTED);

	616 else

	617 entry_cell.SetGroup(ENTRY_NO_USE);

	618 Save(&entry_cell);

	619

	620 if (found) {

	621 bitmap_->Set(cell_id, true);

	622 backup_bitmap_->Set(cell_id, true);

	623 header()->used_cells++;

	624 modified_ = true;

	625 }

	626

	627 return entry_cell;

	628 }

	629

	630 EntryCell IndexTable::FindEntryCell(uint32 hash, Addr address) {

	631 return FindEntryCellImpl(hash, address, false);

	632 }

	633

	634 int IndexTable::CalculateTimestamp(Time time) {

	635 TimeDelta delta = time - Time::FromInternalValue(header_->base_time);

	636 return std::max(delta.InMinutes(), 0);

	637 }

	638

	639 base::Time IndexTable::TimeFromTimestamp(int timestamp) {

	640 return Time::FromInternalValue(header_->base_time) +

	641 TimeDelta::FromMinutes(timestamp);

	642 }

	643

	644 void IndexTable::SetSate(uint32 hash, Addr address, EntryState state) {

	645 EntryCell cell = FindEntryCellImpl(hash, address, state == ENTRY_FREE);

	646 if (!cell.IsValid()) {

	647 NOTREACHED();

	648 return;

	649 }

	650

	651 EntryState old_state = cell.GetState();

	652 if (state == ENTRY_FREE) {

	653 DCHECK_EQ(old_state, ENTRY_DELETED);

	654 } else if (state == ENTRY_NEW) {

	655 DCHECK_EQ(old_state, ENTRY_FREE);

	656 } else if (state == ENTRY_OPEN) {

	657 DCHECK_EQ(old_state, ENTRY_USED);

	658 } else if (state == ENTRY_MODIFIED) {

	659 DCHECK_EQ(old_state, ENTRY_OPEN);

	660 } else if (state == ENTRY_DELETED) {

	661 DCHECK(old_state == ENTRY_NEW \|\| old_state == ENTRY_OPEN \|\|

	662 old_state == ENTRY_MODIFIED);

	663 } else if (state == ENTRY_USED) {

	664 DCHECK(old_state == ENTRY_NEW \|\| old_state == ENTRY_OPEN \|\|

	665 old_state == ENTRY_MODIFIED);

	666 }

	667

	668 modified_ = true;

	669 if (state == ENTRY_DELETED) {

	670 bitmap_->Set(cell.cell_id(), false);

	671 backup_bitmap_->Set(cell.cell_id(), false);

	672 } else if (state == ENTRY_FREE) {

	673 cell.Clear();

	674 Write(cell);

	675 header()->used_cells--;

	676 return;

	677 }

	678 cell.SetState(state);

	679

	680 Save(&cell);

	681 }

	682

	683 void IndexTable::UpdateTime(uint32 hash, Addr address, base::Time current) {

	684 EntryCell cell = FindEntryCell(hash, address);

	685 if (!cell.IsValid())

	686 return;

	687

	688 int minutes = CalculateTimestamp(current);

	689

	690 // Keep about 3 months of headroom.

	691 const int kMaxTimestamp = (1 << 20) - 60 * 24 * 90;

	692 if (minutes > kMaxTimestamp) {

	693 // TODO(rvargas):

	694 // Update header->old_time and trigger a timer

	695 // Rebaseline timestamps and don't update sums

	696 // Start a timer (about 2 backups)

	697 // fix all ckecksums and trigger another timer

	698 // update header->old_time because rebaseline is done.

	699 minutes = std::min(minutes, (1 << 20) - 1);

	700 }

	701

	702 cell.SetTimestamp(minutes);

	703 Save(&cell);

	704 }

	705

	706 void IndexTable::Save(EntryCell* cell) {

	707 cell->FixSum();

	708 Write(*cell);

	709 }

	710

	711 void IndexTable::GetOldest(IndexIterator* no_use,

	712 IndexIterator* low_use,

	713 IndexIterator* high_use) {

	714 no_use->forward = true;

	715 low_use->forward = true;

	716 high_use->forward = true;

	717 InitIterator(no_use);

	718 InitIterator(low_use);

	719 InitIterator(high_use);

	720

	721 WalkTables(-1, no_use, low_use, high_use);

	722 }

	723

	724 bool IndexTable::GetNextCells(IndexIterator* iterator) {

	725 int current_time = iterator->timestamp;

	726 InitIterator(iterator);

	727

	728 WalkTables(current_time, iterator, iterator, iterator);

	729 return !iterator->cells.empty();

	730 }

	731

	732 void IndexTable::OnBackupTimer() {

	733 if (!modified_)

	734 return;

	735

	736 int num_words = (header_->table_len + 31) / 32;

	737 int num_bytes = num_words * 4 + static_cast<int>(sizeof(*header_));

	738 scoped_refptr<net::IOBuffer> buffer(new net::IOBuffer(num_bytes));

	739 memcpy(buffer->data(), header_, sizeof(*header_));

	740 memcpy(buffer->data() + sizeof(*header_), backup_bitmap_storage_.get(),

	741 num_words * 4);

	742 backend_->SaveIndex(buffer, num_bytes);

	743 modified_ = false;

	744 }

	745

	746 // -----------------------------------------------------------------------

	747

	748 EntryCell IndexTable::FindEntryCellImpl(uint32 hash, Addr address,

	749 bool allow_deleted) {

	750 int bucket_id = static_cast<int>(hash & mask_);

	751 IndexBucket* bucket = &main_table_[bucket_id];

	752 for (;;) {

	753 for (int i = 0; i < kCellsPerBucket; i++) {

	754 IndexCell* current_cell = &bucket->cells[i];

	755 if (!GetAddressValue(*current_cell))

	756 continue;

	757 DCHECK(SanityCheck(*current_cell));

	758 if (IsHashMatch(*current_cell, hash)) {

	759 // We have a match.

	760 int cell_id = bucket_id * kCellsPerBucket + i;

	761 EntryCell entry_cell(cell_id, hash, *current_cell, small_table_);

	762 if (entry_cell.GetAddress() != address)

	763 continue;

	764

	765 if (!allow_deleted && entry_cell.GetState() == ENTRY_DELETED)

	766 continue;

	767

	768 return entry_cell;

	769 }

	770 }

	771 bucket_id = GetNextBucket(mask_ + 1, header()->max_bucket, extra_table_,

	772 &bucket);

	773 if (!bucket_id)

	774 break;

	775 }

	776 return EntryCell();

	777 }

	778

	779 void IndexTable::CheckState(const EntryCell& cell) {

	780 int current_state = cell.GetState();

	781 if (current_state != ENTRY_FIXING) {

	782 bool present = ((current_state & 3) != 0); // Look at the last two bits.

	783 if (present != bitmap_->Get(cell.cell_id()) \|\|

	784 present != backup_bitmap_->Get(cell.cell_id())) {

	785 // There's a mismatch.

	786 if (current_state == ENTRY_DELETED) {

	787 // We were in the process of deleting this entry. Finish now.

	788 backend_->DeleteCell(cell);

	789 } else {

	790 current_state = ENTRY_FIXING;

	791 EntryCell bad_cell(cell);

	792 bad_cell.SetState(ENTRY_FIXING);

	793 Save(&bad_cell);

	794 }

	795 }

	796 }

	797

	798 if (current_state == ENTRY_FIXING)

	799 backend_->FixCell(cell);

	800 }

	801

	802 void IndexTable::Write(const EntryCell& cell) {

	803 IndexBucket* bucket = NULL;

	804 int bucket_id = cell.cell_id() / kCellsPerBucket;

	805 if (bucket_id < static_cast<int32>(mask_ + 1)) {

	806 bucket = &main_table_[bucket_id];

	807 } else {

	808 DCHECK_LE(bucket_id, header()->max_bucket);

	809 bucket = &extra_table_[bucket_id - (mask_ + 1)];

	810 }

	811

	812 int cell_number = cell.cell_id() % kCellsPerBucket;

	813 if (GetAddressValue(bucket->cells[cell_number]) && cell.GetAddressValue()) {

	814 DCHECK_EQ(cell.GetAddressValue(),

	815 GetAddressValue(bucket->cells[cell_number]));

	816 }

	817 cell.Serialize(&bucket->cells[cell_number]);

	818 }

	819

	820 int IndexTable::NewExtraBucket() {

	821 int safe_window = (header()->table_len < kNumExtraBlocks * 2) ?

	822 kNumExtraBlocks / 4 : kNumExtraBlocks;

	823 if (header()->table_len - header()->max_bucket * kCellsPerBucket <

	824 safe_window) {

	825 backend_->GrowIndex();

	826 }

	827

	828 if (header()->max_bucket * kCellsPerBucket ==

	829 header()->table_len - kCellsPerBucket) {

	830 return 0;

	831 }

	832

	833 header()->max_bucket++;

	834 return header()->max_bucket;

	835 }

	836

	837 void IndexTable::WalkTables(int limit_time,

	838 IndexIterator* no_use,

	839 IndexIterator* low_use,

	840 IndexIterator* high_use) {

	841 header_->num_no_use_entries = 0;

	842 header_->num_low_use_entries = 0;

	843 header_->num_high_use_entries = 0;

	844 header_->num_evicted_entries = 0;

	845

	846 for (int i = 0; i < static_cast<int32>(mask_ + 1); i++) {

	847 int bucket_id = i;

	848 IndexBucket* bucket = &main_table_[i];

	849 for (;;) {

	850 UpdateFromBucket(bucket, i, limit_time, no_use, low_use, high_use);

	851

	852 bucket_id = GetNextBucket(mask_ + 1, header()->max_bucket, extra_table_,

	853 &bucket);

	854 if (!bucket_id)

	855 break;

	856 }

	857 }

	858 header_->num_entries = header_->num_no_use_entries +

	859 header_->num_low_use_entries +

	860 header_->num_high_use_entries +

	861 header_->num_evicted_entries;

	862 modified_ = true;

	863 }

	864

	865 void IndexTable::UpdateFromBucket(IndexBucket* bucket, int bucket_hash,

	866 int limit_time,

	867 IndexIterator* no_use,

	868 IndexIterator* low_use,

	869 IndexIterator* high_use) {

	870 for (int i = 0; i < kCellsPerBucket; i++) {

	871 IndexCell& current_cell = bucket->cells[i];

	872 if (!GetAddressValue(current_cell))

	873 continue;

	874 DCHECK(SanityCheck(current_cell));

	875 if (!IsNormalState(current_cell))

	876 continue;

	877

	878 EntryCell entry_cell(0, GetFullHash(current_cell, bucket_hash),

	879 current_cell, small_table_);

	880 switch (GetCellGroup(current_cell)) {

	881 case ENTRY_NO_USE:

	882 UpdateIterator(entry_cell, limit_time, no_use);

	883 header_->num_no_use_entries++;

	884 break;

	885 case ENTRY_LOW_USE:

	886 UpdateIterator(entry_cell, limit_time, low_use);

	887 header_->num_low_use_entries++;

	888 break;

	889 case ENTRY_HIGH_USE:

	890 UpdateIterator(entry_cell, limit_time, high_use);

	891 header_->num_high_use_entries++;

	892 break;

	893 case ENTRY_EVICTED:

	894 header_->num_evicted_entries++;

	895 break;

	896 default:

	897 NOTREACHED();

	898 }

	899 }

	900 }

	901

	902 void IndexTable::MoveCells(IndexBucket* old_extra_table) {

	903 int max_hash = (mask_ + 1) / 2;

	904 int max_bucket = header()->max_bucket;

	905 header()->max_bucket = mask_;

	906 int used_cells = header()->used_cells;

	907

	908 // Consider a large cache: a cell stores the upper 18 bits of the hash

	909 // (h >> 14). If the table is say 8 times the original size (growing from 4x),

	910 // the bit that we are interested in would be the 3rd bit of the stored value,

	911 // in other words 'multiplier' >> 1.

	912 uint32 new_bit = (1 << extra_bits_) >> 1;

	913

	914 scoped_ptr<IndexBucket[]> old_main_table;

	915 IndexBucket* source_table = main_table_;

	916 bool upgrade_format = !extra_bits_;

	917 if (upgrade_format) {

	918 // This method should deal with migrating a small table to a big one. Given

	919 // that the first thing to do is read the old table, set small_table_ for

	920 // the size of the old table. Now, when moving a cell, the result cannot be

	921 // placed in the old table or we will end up reading it again and attempting

	922 // to move it, so we have to copy the whole table at once.

	923 DCHECK(!small_table_);

	924 small_table_ = true;

	925 old_main_table.reset(new IndexBucket[max_hash]);

	926 memcpy(old_main_table.get(), main_table_, max_hash * sizeof(IndexBucket));

	927 memset(main_table_, 0, max_hash * sizeof(IndexBucket));

	928 source_table = old_main_table.get();

	929 }

	930

	931 for (int i = 0; i < max_hash; i++) {

	932 int bucket_id = i;

	933 IndexBucket* bucket = &source_table[i];

	934 for (;;) {

	935 for (int j = 0; j < kCellsPerBucket; j++) {

	936 IndexCell& current_cell = bucket->cells[j];

	937 if (!GetAddressValue(current_cell))

	938 continue;

	939 DCHECK(SanityCheck(current_cell));

	940 if (bucket_id == i) {

	941 if (upgrade_format \|\| (GetHashValue(current_cell) & new_bit)) {

	942 // Move this cell to the upper half of the table.

	943 MoveSingleCell(&current_cell, bucket_id * kCellsPerBucket + j, i,

	944 true);

	945 }

	946 } else {

	947 // All cells on extra buckets have to move.

	948 MoveSingleCell(&current_cell, bucket_id * kCellsPerBucket + j, i,

	949 true);

	950 }

	951 }

	952

	953 bucket_id = GetNextBucket(max_hash, max_bucket, old_extra_table, &bucket);

	954 if (!bucket_id)

	955 break;

	956 }

	957 }

	958

	959 DCHECK_EQ(header()->used_cells, used_cells);

	960

	961 if (upgrade_format) {

	962 small_table_ = false;

	963 header()->flags &= ~SMALL_CACHE;

	964 }

	965 }

	966

	967 void IndexTable::MoveSingleCell(IndexCell* current_cell, int cell_id,

	968 int main_table_index, bool growing) {

	969 uint32 hash = GetFullHash(*current_cell, main_table_index);

	970 EntryCell old_cell(cell_id, hash, *current_cell, small_table_);

	971

	972 bool upgrade_format = !extra_bits_ && growing;

	973 if (upgrade_format)

	974 small_table_ = false;

	975 EntryCell new_cell = CreateEntryCell(hash, old_cell.GetAddress());

	976

	977 if (!new_cell.IsValid()) {

	978 // We'll deal with this entry later.

	979 if (upgrade_format)

	980 small_table_ = true;

	981 return;

	982 }

	983

	984 new_cell.SetState(old_cell.GetState());

	985 new_cell.SetGroup(old_cell.GetGroup());

	986 new_cell.SetReuse(old_cell.GetReuse());

	987 new_cell.SetTimestamp(old_cell.GetTimestamp());

	988 Save(&new_cell);

	989 modified_ = true;

	990 if (upgrade_format)

	991 small_table_ = true;

	992

	993 if (old_cell.GetState() == ENTRY_DELETED) {

	994 bitmap_->Set(new_cell.cell_id(), false);

	995 backup_bitmap_->Set(new_cell.cell_id(), false);

	996 }

	997

	998 if (!growing \|\| cell_id / kCellsPerBucket == main_table_index) {

	999 // Only delete entries that live on the main table.

	1000 if (!upgrade_format) {

	1001 old_cell.Clear();

	1002 Write(old_cell);

	1003 }

	1004

	1005 if (cell_id != new_cell.cell_id()) {

	1006 bitmap_->Set(old_cell.cell_id(), false);

	1007 backup_bitmap_->Set(old_cell.cell_id(), false);

	1008 }

	1009 }

	1010 header()->used_cells--;

	1011 }

	1012

	1013 void IndexTable::HandleMisplacedCell(IndexCell* current_cell, int cell_id,

	1014 int main_table_index) {

	1015 // The cell may be misplaced, or a duplicate cell exists with this data.

	1016 uint32 hash = GetFullHash(*current_cell, main_table_index);

	1017 MoveSingleCell(current_cell, cell_id, main_table_index, false);

	1018

	1019 // Now look for a duplicate cell.

	1020 CheckBucketList(hash & mask_);

	1021 }

	1022

	1023 void IndexTable::CheckBucketList(int bucket_id) {

	1024 typedef std::pair<int, EntryGroup> AddressAndGroup;

	1025 std::set<AddressAndGroup> entries;

	1026 IndexBucket* bucket = &main_table_[bucket_id];

	1027 int bucket_hash = bucket_id;

	1028 for (;;) {

	1029 for (int i = 0; i < kCellsPerBucket; i++) {

	1030 IndexCell* current_cell = &bucket->cells[i];

	1031 if (!GetAddressValue(*current_cell))

	1032 continue;

	1033 if (!SanityCheck(*current_cell)) {

	1034 NOTREACHED();

	1035 current_cell->Clear();

	1036 continue;

	1037 }

	1038 int cell_id = bucket_id * kCellsPerBucket + i;

	1039 EntryCell cell(cell_id, GetFullHash(*current_cell, bucket_hash),

	1040 *current_cell, small_table_);

	1041 if (!entries.insert(std::make_pair(cell.GetAddress().value(),

	1042 cell.GetGroup())).second) {

	1043 current_cell->Clear();

	1044 continue;

	1045 }

	1046 CheckState(cell);

	1047 }

	1048

	1049 bucket_id = GetNextBucket(mask_ + 1, header()->max_bucket, extra_table_,

	1050 &bucket);

	1051 if (!bucket_id)

	1052 break;

	1053 }

	1054 }

	1055

	1056 uint32 IndexTable::GetAddressValue(const IndexCell& cell) {

	1057 if (small_table_)

	1058 return GetCellSmallTableAddress(cell);

	1059

	1060 return GetCellAddress(cell);

	1061 }

	1062

	1063 uint32 IndexTable::GetHashValue(const IndexCell& cell) {

	1064 if (small_table_)

	1065 return GetCellSmallTableHash(cell);

	1066

	1067 return GetCellHash(cell);

	1068 }

	1069

	1070 uint32 IndexTable::GetFullHash(const IndexCell& cell, uint32 lower_part) {

	1071 // It is OK for the high order bits of lower_part to overlap with the stored

	1072 // part of the hash.

	1073 if (small_table_)

	1074 return (GetCellSmallTableHash(cell) << kHashSmallTableShift) \| lower_part;

	1075

	1076 return (GetCellHash(cell) << kHashShift) \| lower_part;

	1077 }

	1078

	1079 // All the bits stored in the cell should match the provided hash.

	1080 bool IndexTable::IsHashMatch(const IndexCell& cell, uint32 hash) {

	1081 hash = small_table_ ? hash >> kHashSmallTableShift : hash >> kHashShift;

	1082 return GetHashValue(cell) == hash;

	1083 }

	1084

	1085 bool IndexTable::MisplacedHash(const IndexCell& cell, uint32 hash) {

	1086 if (!extra_bits_)

	1087 return false;

	1088

	1089 uint32 mask = (1 << extra_bits_) - 1;

	1090 hash = small_table_ ? hash >> kHashSmallTableShift : hash >> kHashShift;

	1091 return (GetHashValue(cell) & mask) != (hash & mask);

	1092 }

	1093

	1094 } // namespace disk_cache

OLD	NEW

« net/disk_cache/v3/index_table.h ('K') | « net/disk_cache/v3/index_table.h ('k') | net/disk_cache/v3/index_table_unittest.cc » ('j') | no next file with comments »