OLD | NEW |
(Empty) | |
| 1 // Copyright 2015 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. |
| 4 |
| 5 package memory |
| 6 |
| 7 import ( |
| 8 "bytes" |
| 9 "fmt" |
| 10 "sort" |
| 11 "strings" |
| 12 |
| 13 ds "github.com/luci/gae/service/datastore" |
| 14 "github.com/luci/gae/service/datastore/serialize" |
| 15 ) |
| 16 |
| 17 // reducedQuery contains only the pieces of the query necessary to iterate for |
| 18 // results. |
| 19 // deduplication is applied externally |
| 20 // projection / keysonly / entity retrieval is done externally |
| 21 type reducedQuery struct { |
| 22 ns string |
| 23 kind string |
| 24 |
| 25 // eqFilters indicate the set of all prefix constraints which need to be |
| 26 // fulfilled in the composite query. All of these will translate into pr
efix |
| 27 // bytes for SOME index. |
| 28 eqFilters map[string]stringSet |
| 29 |
| 30 // suffixFormat is the PRECISE listing of the suffix columns that ALL in
dexes |
| 31 // in the multi query will have. |
| 32 // |
| 33 // suffixFormat ALWAYS includes the inequality filter (if any) as the 0t
h |
| 34 // element |
| 35 // suffixFormat ALWAYS includes any additional projections (in ascending |
| 36 // order) after all user defined sort orders |
| 37 // suffixFormat ALWAYS has __key__ as the last column |
| 38 suffixFormat []ds.IndexColumn |
| 39 |
| 40 // limits of the inequality and/or full sort order. This is ONLY a suffi
x, |
| 41 // and it will be appended to the prefix during iteration. |
| 42 start []byte |
| 43 end []byte |
| 44 |
| 45 // metadata describing the total number of columns that this query requi
res to |
| 46 // execute perfectly. |
| 47 numCols int |
| 48 } |
| 49 |
| 50 type IndexDefinitionSortable struct { |
| 51 // eqFilts is the list of ACTUAL prefix columns. Note that it may contai
n |
| 52 // redundant columns! (e.g. (tag, tag) is a perfectly valid prefix, becu
ase |
| 53 // (tag=1, tag=2) is a perfectly valid query). |
| 54 eqFilts []ds.IndexColumn |
| 55 coll *memCollection |
| 56 } |
| 57 |
| 58 func (i *IndexDefinitionSortable) hasAncestor() bool { |
| 59 return len(i.eqFilts) > 0 && i.eqFilts[0].Property == "__ancestor__" |
| 60 } |
| 61 |
| 62 func (i *IndexDefinitionSortable) numEqHits(c *constraints) int { |
| 63 ret := 0 |
| 64 for _, filt := range i.eqFilts { |
| 65 if _, ok := c.constraints[filt.Property]; ok { |
| 66 ret++ |
| 67 } |
| 68 } |
| 69 return ret |
| 70 } |
| 71 |
| 72 type IndexDefinitionSortableSlice []IndexDefinitionSortable |
| 73 |
| 74 func (s IndexDefinitionSortableSlice) Len() int { return len(s) } |
| 75 func (s IndexDefinitionSortableSlice) Swap(i, j int) { s[i], s[j] = s[j], s[i] } |
| 76 func (s IndexDefinitionSortableSlice) Less(i, j int) bool { |
| 77 a, b := s[i], s[j] |
| 78 if a.coll == nil && b.coll != nil { |
| 79 return true |
| 80 } else if a.coll != nil && b.coll == nil { |
| 81 return false |
| 82 } |
| 83 |
| 84 cmp := len(a.eqFilts) - len(b.eqFilts) |
| 85 if cmp < 0 { |
| 86 return true |
| 87 } else if cmp > 0 { |
| 88 return false |
| 89 } |
| 90 for k, col := range a.eqFilts { |
| 91 ocol := b.eqFilts[k] |
| 92 if col.Direction == ds.ASCENDING && ocol.Direction == ds.DESCEND
ING { |
| 93 return true |
| 94 } else if col.Direction == ds.DESCENDING && ocol.Direction == ds
.ASCENDING { |
| 95 return false |
| 96 } |
| 97 if col.Property < ocol.Property { |
| 98 return true |
| 99 } else if col.Property > ocol.Property { |
| 100 return false |
| 101 } |
| 102 } |
| 103 return false |
| 104 } |
| 105 |
| 106 // maybeAddDefinition possibly adds a new IndexDefinitionSortable to this slice. |
| 107 // It's only added if it could be useful in servicing q, otherwise this function |
| 108 // is a noop. |
| 109 // |
| 110 // This returns true iff the proposed index is OK and depletes missingTerms to |
| 111 // empty. |
| 112 // |
| 113 // If the proposed index is PERFECT (e.g. contains enough columns to cover all |
| 114 // equality filters, and also has the correct suffix), idxs will be replaced |
| 115 // with JUST that index, and this will return true. |
| 116 func (idxs *IndexDefinitionSortableSlice) maybeAddDefinition(q *reducedQuery, s
*memStore, missingTerms stringSet, id *ds.IndexDefinition) bool { |
| 117 // Kindless queries are handled elsewhere. |
| 118 if id.Kind != q.kind { |
| 119 impossible( |
| 120 fmt.Errorf("maybeAddDefinition given index with wrong ki
nd %q v %q", id.Kind, q.kind)) |
| 121 } |
| 122 |
| 123 // If we're an ancestor query, and the index is compound, but doesn't in
clude |
| 124 // an Ancestor field, it doesn't work. Builtin indexes can be used for |
| 125 // ancestor queries (and have !Ancestor), assuming that it's only equali
ty |
| 126 // filters (plus inequality on __key__), or a single inequality. |
| 127 if q.eqFilters["__ancestor__"] != nil && !id.Ancestor && !id.Builtin() { |
| 128 impossible( |
| 129 fmt.Errorf("maybeAddDefinition given compound index with
wrong ancestor info: %s %#v", id, q)) |
| 130 } |
| 131 |
| 132 // add __ancestor__ if necessary |
| 133 sortBy := id.GetFullSortOrder() |
| 134 |
| 135 // If the index has fewer fields than we need for the suffix, it can't |
| 136 // possibly help. |
| 137 if len(sortBy) < len(q.suffixFormat) { |
| 138 return false |
| 139 } |
| 140 |
| 141 numEqFilts := len(sortBy) - len(q.suffixFormat) |
| 142 // make sure the orders are precisely the same |
| 143 for i, sb := range sortBy[numEqFilts:] { |
| 144 if q.suffixFormat[i] != sb { |
| 145 return false |
| 146 } |
| 147 } |
| 148 |
| 149 if id.Builtin() && numEqFilts == 0 { |
| 150 if len(q.eqFilters) > 1 || (len(q.eqFilters) == 1 && q.eqFilters
["__ancestor__"] == nil) { |
| 151 return false |
| 152 } |
| 153 } |
| 154 |
| 155 // Make sure the equalities section doesn't contain any properties we do
n't |
| 156 // want in our query. |
| 157 // |
| 158 // numByProp && totalEqFilts will be used to see if this is a perfect ma
tch |
| 159 // later. |
| 160 numByProp := make(map[string]int, len(q.eqFilters)) |
| 161 totalEqFilts := 0 |
| 162 |
| 163 eqFilts := sortBy[:numEqFilts] |
| 164 for _, p := range eqFilts { |
| 165 if _, ok := q.eqFilters[p.Property]; !ok { |
| 166 return false |
| 167 } |
| 168 numByProp[p.Property]++ |
| 169 totalEqFilts++ |
| 170 } |
| 171 |
| 172 // ok, we can actually use this |
| 173 |
| 174 // Grab the collection for convenience later. We don't want to invalidat
e this |
| 175 // index's potential just because the collection doesn't exist. If it's |
| 176 // a builtin and it doesn't exist, it still needs to be one of the 'poss
ible' |
| 177 // indexes... it just means that the user's query will end up with no re
sults. |
| 178 coll := s.GetCollection( |
| 179 fmt.Sprintf("idx:%s:%s", q.ns, serialize.ToBytes(*id.PrepForIdxT
able()))) |
| 180 |
| 181 // First, see if it's a perfect match. If it is, then our search is over
. |
| 182 // |
| 183 // A perfect match contains ALL the equality filter columns (or more, si
nce |
| 184 // we can use residuals to fill in the extras). |
| 185 toAdd := IndexDefinitionSortable{coll: coll} |
| 186 toAdd.eqFilts = eqFilts |
| 187 for _, sb := range toAdd.eqFilts { |
| 188 missingTerms.rm(sb.Property) |
| 189 } |
| 190 |
| 191 perfect := false |
| 192 if len(sortBy) == q.numCols { |
| 193 perfect = true |
| 194 for k, num := range numByProp { |
| 195 if num < len(q.eqFilters[k]) { |
| 196 perfect = false |
| 197 break |
| 198 } |
| 199 } |
| 200 } |
| 201 if perfect { |
| 202 *idxs = IndexDefinitionSortableSlice{toAdd} |
| 203 } else { |
| 204 *idxs = append(*idxs, toAdd) |
| 205 } |
| 206 return len(missingTerms) == 0 |
| 207 } |
| 208 |
| 209 // getRelevantIndexes retrieves the relevant indexes which could be used to |
| 210 // service q. It returns nil if it's not possible to service q with the current |
| 211 // indexes. |
| 212 func getRelevantIndexes(q *reducedQuery, s *memStore) (IndexDefinitionSortableSl
ice, error) { |
| 213 missingTerms := stringSet{} |
| 214 for k := range q.eqFilters { |
| 215 if k == "__ancestor__" { |
| 216 // ancestor is not a prefix which can be satisfied by a
single index. It |
| 217 // must be satisfied by ALL indexes (and has special log
ic for this in |
| 218 // the addDefinition logic) |
| 219 continue |
| 220 } |
| 221 missingTerms.add(k) |
| 222 } |
| 223 idxs := IndexDefinitionSortableSlice{} |
| 224 |
| 225 // First we add builtins |
| 226 // add |
| 227 // idx:KIND |
| 228 if idxs.maybeAddDefinition(q, s, missingTerms, &ds.IndexDefinition{ |
| 229 Kind: q.kind, |
| 230 }) { |
| 231 return idxs, nil |
| 232 } |
| 233 |
| 234 // add |
| 235 // idx:KIND:prop |
| 236 // idx:KIND:-prop |
| 237 props := stringSet{} |
| 238 for prop := range q.eqFilters { |
| 239 props.add(prop) |
| 240 } |
| 241 for _, col := range q.suffixFormat[:len(q.suffixFormat)-1] { |
| 242 props.add(col.Property) |
| 243 } |
| 244 for prop := range props { |
| 245 if strings.HasPrefix(prop, "__") && strings.HasSuffix(prop, "__"
) { |
| 246 continue |
| 247 } |
| 248 if idxs.maybeAddDefinition(q, s, missingTerms, &ds.IndexDefiniti
on{ |
| 249 Kind: q.kind, |
| 250 SortBy: []ds.IndexColumn{ |
| 251 {Property: prop}, |
| 252 }, |
| 253 }) { |
| 254 return idxs, nil |
| 255 } |
| 256 if idxs.maybeAddDefinition(q, s, missingTerms, &ds.IndexDefiniti
on{ |
| 257 Kind: q.kind, |
| 258 SortBy: []ds.IndexColumn{ |
| 259 {Property: prop, Direction: ds.DESCENDING}, |
| 260 }, |
| 261 }) { |
| 262 return idxs, nil |
| 263 } |
| 264 } |
| 265 |
| 266 // Try adding all compound indexes whose suffix matches. |
| 267 suffix := &ds.IndexDefinition{ |
| 268 Kind: q.kind, |
| 269 Ancestor: q.eqFilters["__ancestor__"] != nil, |
| 270 SortBy: q.suffixFormat, |
| 271 } |
| 272 walkCompIdxs(s, suffix, func(def *ds.IndexDefinition) bool { |
| 273 // keep walking until we find a perfect index. |
| 274 return !idxs.maybeAddDefinition(q, s, missingTerms, def) |
| 275 }) |
| 276 |
| 277 // this query is impossible to fulfil with the current indexes. Not all
the |
| 278 // terms (equality + projection) are satisfied. |
| 279 if len(missingTerms) < 0 || len(idxs) == 0 { |
| 280 remains := &ds.IndexDefinition{ |
| 281 Kind: q.kind, |
| 282 Ancestor: q.eqFilters["__ancestor__"] != nil, |
| 283 } |
| 284 terms := make([]string, 0, len(missingTerms)) |
| 285 for mt := range missingTerms { |
| 286 terms = append(terms, mt) |
| 287 } |
| 288 if serializationDeterministic { |
| 289 sort.Strings(terms) |
| 290 } |
| 291 for _, term := range terms { |
| 292 remains.SortBy = append(remains.SortBy, ds.IndexColumn{P
roperty: term}) |
| 293 } |
| 294 remains.SortBy = append(remains.SortBy, q.suffixFormat...) |
| 295 last := remains.SortBy[len(remains.SortBy)-1] |
| 296 if last.Direction == ds.ASCENDING { |
| 297 // this removes the __key__ column, since it's implicit. |
| 298 remains.SortBy = remains.SortBy[:len(remains.SortBy)-1] |
| 299 } |
| 300 if remains.Builtin() { |
| 301 impossible( |
| 302 fmt.Errorf("recommended missing index would be a
builtin: %s", remains)) |
| 303 } |
| 304 return nil, fmt.Errorf( |
| 305 "Your indexes are insufficient! Try adding:\n %s", rema
ins) |
| 306 } |
| 307 |
| 308 return idxs, nil |
| 309 } |
| 310 |
| 311 // generate generates a single iterDefinition for the given index. |
| 312 func generate(q *reducedQuery, idx *IndexDefinitionSortable, c *constraints) *it
erDefinition { |
| 313 def := &iterDefinition{ |
| 314 c: idx.coll, |
| 315 start: q.start, |
| 316 end: q.end, |
| 317 } |
| 318 toJoin := make([][]byte, len(idx.eqFilts)) |
| 319 for _, sb := range idx.eqFilts { |
| 320 val := c.peel(sb.Property) |
| 321 if sb.Direction == ds.DESCENDING { |
| 322 val = invert(val) |
| 323 } |
| 324 toJoin = append(toJoin, val) |
| 325 } |
| 326 def.prefix = bjoin(toJoin...) |
| 327 def.prefixLen = len(def.prefix) |
| 328 |
| 329 if q.eqFilters["__ancestor__"] != nil && !idx.hasAncestor() { |
| 330 // The query requires an ancestor, but the index doesn't explici
tly have it |
| 331 // as part of the prefix (otherwise it would have been the first
eqFilt |
| 332 // above). This happens when it's a builtin index, or if it's th
e primary |
| 333 // index (for a kindless query), or if it's the Kind index (for
a filterless |
| 334 // query). |
| 335 // |
| 336 // builtin indexes are: |
| 337 // Kind/__key__ |
| 338 // Kind/Prop/__key__ |
| 339 // Kind/Prop/-__key__ |
| 340 if len(q.suffixFormat) > 2 || q.suffixFormat[len(q.suffixFormat)
-1].Property != "__key__" { |
| 341 // This should never happen. One of the previous validat
ors would have |
| 342 // selected a different index. But just in case. |
| 343 impossible(fmt.Errorf("cannot supply an implicit ancesto
r for %#v", idx)) |
| 344 } |
| 345 |
| 346 // get the only value out of __ancestor__ |
| 347 anc := q.eqFilters["__ancestor__"].getOne() |
| 348 |
| 349 // Intentionally do NOT update prefixLen. This allows multiItera
tor to |
| 350 // correctly include the entire key in the shared iterator suffi
x, instead |
| 351 // of just the remainder. |
| 352 |
| 353 // chop the terminal null byte off the q.ancestor key... we can
accept |
| 354 // anything which is a descendant or an exact match. Removing t
he last byte |
| 355 // from the key (the terminating null) allows this trick to work
. Otherwise |
| 356 // it would be a closed range of EXACTLY this key. |
| 357 chopped := []byte(anc[:len(anc)-1]) |
| 358 if q.suffixFormat[0].Direction == ds.DESCENDING { |
| 359 chopped = invert(chopped) |
| 360 } |
| 361 def.prefix = bjoin(def.prefix, chopped) |
| 362 |
| 363 // Update start and end, since we know that if they contain anyt
hing, they |
| 364 // contain values for the __key__ field. |
| 365 if def.start != nil { |
| 366 offset := 0 |
| 367 if len(q.suffixFormat) > 1 { |
| 368 chunks, _ := parseSuffix(q.ns, q.suffixFormat, d
ef.start, 1) |
| 369 offset = len(chunks[0]) |
| 370 } |
| 371 if !bytes.HasPrefix(def.start[offset:], chopped) { |
| 372 // again, shouldn't happen, but if it does, we w
ant to know about it. |
| 373 impossible(fmt.Errorf( |
| 374 "start suffix for implied ancestor doesn
't start with ancestor! start:%v ancestor:%v", |
| 375 def.start, chopped)) |
| 376 } |
| 377 def.start = def.start[:offset+len(chopped)] |
| 378 } |
| 379 if def.end != nil { |
| 380 offset := 0 |
| 381 if len(q.suffixFormat) > 1 { |
| 382 chunks, _ := parseSuffix(q.ns, q.suffixFormat, d
ef.end, 1) |
| 383 offset = len(chunks[0]) |
| 384 } |
| 385 if !bytes.HasPrefix(def.end[offset:], chopped) { |
| 386 impossible(fmt.Errorf( |
| 387 "end suffix for implied ancestor doesn't
start with ancestor! end:%v ancestor:%v", |
| 388 def.end, chopped)) |
| 389 } |
| 390 def.end = def.end[:offset+len(chopped)] |
| 391 } |
| 392 } |
| 393 |
| 394 return def |
| 395 } |
| 396 |
| 397 type constraints struct { |
| 398 constraints map[string][][]byte |
| 399 original map[string][][]byte |
| 400 residualMapping map[string]int |
| 401 } |
| 402 |
| 403 // peel picks a constraint value for the property. It then removes this value |
| 404 // from constraints (possibly removing the entire row from constraints if it |
| 405 // was the last value). If the value wasn't available in constraints, it picks |
| 406 // the value from residuals. |
| 407 func (c *constraints) peel(prop string) []byte { |
| 408 ret := []byte(nil) |
| 409 if vals, ok := c.constraints[prop]; ok { |
| 410 ret = vals[0] |
| 411 if len(vals) == 1 { |
| 412 delete(c.constraints, prop) |
| 413 } else { |
| 414 c.constraints[prop] = vals[1:] |
| 415 } |
| 416 } else { |
| 417 row := c.original[prop] |
| 418 idx := c.residualMapping[prop] |
| 419 c.residualMapping[prop]++ |
| 420 ret = row[idx%len(row)] |
| 421 } |
| 422 return ret |
| 423 } |
| 424 |
| 425 func (c *constraints) empty() bool { |
| 426 return len(c.constraints) == 0 |
| 427 } |
| 428 |
| 429 // calculateConstraints produces a mapping of all equality filters to the values |
| 430 // that they're constrained to. It also calculates residuals, which are an |
| 431 // arbitrary value for filling index prefixes which have more equality fields |
| 432 // than are necessary. The value doesn't matter, as long as its an equality |
| 433 // constraint in the original query. |
| 434 func calculateConstraints(q *reducedQuery) *constraints { |
| 435 ret := &constraints{ |
| 436 original: make(map[string][][]byte, len(q.eqFilters)), |
| 437 constraints: make(map[string][][]byte, len(q.eqFilters)), |
| 438 residualMapping: make(map[string]int), |
| 439 } |
| 440 for prop, vals := range q.eqFilters { |
| 441 bvals := make([][]byte, 0, len(vals)) |
| 442 for val := range vals { |
| 443 bvals = append(bvals, []byte(val)) |
| 444 } |
| 445 ret.original[prop] = bvals |
| 446 if prop == "__ancestor__" { |
| 447 // exclude __ancestor__ from the constraints. |
| 448 // |
| 449 // This is because it's handled specially during index p
roposal and |
| 450 // generation. Ancestor is used by ALL indexes, and so i
ts residual value |
| 451 // in ret.original above will be sufficient. |
| 452 continue |
| 453 } |
| 454 ret.constraints[prop] = bvals |
| 455 } |
| 456 return ret |
| 457 } |
| 458 |
| 459 // getIndexes returns a set of iterator definitions. Iterating over these |
| 460 // will result in matching suffixes. |
| 461 func getIndexes(q *reducedQuery, s *memStore) ([]*iterDefinition, error) { |
| 462 relevantIdxs := IndexDefinitionSortableSlice(nil) |
| 463 if q.kind == "" { |
| 464 if coll := s.GetCollection("ents:" + q.ns); coll != nil { |
| 465 relevantIdxs = IndexDefinitionSortableSlice{{coll: coll}
} |
| 466 } |
| 467 } else { |
| 468 err := error(nil) |
| 469 relevantIdxs, err = getRelevantIndexes(q, s) |
| 470 if err != nil { |
| 471 return nil, err |
| 472 } |
| 473 } |
| 474 if len(relevantIdxs) == 0 { |
| 475 return nil, errQueryDone |
| 476 } |
| 477 |
| 478 // This sorts it so that relevantIdxs goes less filters -> more filters.
We |
| 479 // traverse this list backwards, however, so we traverse it in more filt
ers -> |
| 480 // less filters order. |
| 481 sort.Sort(relevantIdxs) |
| 482 |
| 483 constraints := calculateConstraints(q) |
| 484 |
| 485 ret := []*iterDefinition{} |
| 486 for !constraints.empty() || len(ret) == 0 { |
| 487 bestIdx := (*IndexDefinitionSortable)(nil) |
| 488 if len(ret) == 0 { |
| 489 // if ret is empty, take the biggest relevantIdx. It's g
uaranteed to have |
| 490 // the greatest number of equality filters of any index
in the list, and |
| 491 // we know that every equality filter will be pulled fro
m constraints and |
| 492 // not residual. |
| 493 // |
| 494 // This also takes care of the case when the query has n
o equality filters, |
| 495 // in which case relevantIdxs will actually only contain
one index anyway |
| 496 // :) |
| 497 bestIdx = &relevantIdxs[len(relevantIdxs)-1] |
| 498 if bestIdx.coll == nil { |
| 499 return nil, errQueryDone |
| 500 } |
| 501 } else { |
| 502 // If ret's not empty, then we need to find the best ind
ex we can. The |
| 503 // best index will be the one with the most matching equ
ality columns. |
| 504 // Since relevantIdxs is sorted primarially by the numbe
r of equality |
| 505 // columns, we walk down the list until the number of po
ssible columns is |
| 506 // worse than our best-so-far. |
| 507 // |
| 508 // Traversing the list backwards goes from more filters
-> less filters, |
| 509 // but also allows us to remove items from the list as w
e iterate over it. |
| 510 bestNumEqHits := 0 |
| 511 for i := len(relevantIdxs) - 1; i >= 0; i-- { |
| 512 idx := &relevantIdxs[i] |
| 513 if len(idx.eqFilts) < bestNumEqHits { |
| 514 // if the number of filters drops below
our best hit, it's never going |
| 515 // to get better than that. This index m
ight be helpful on a later |
| 516 // loop though, so don't remove it. |
| 517 break |
| 518 } |
| 519 numHits := 0 |
| 520 if idx.coll != nil { |
| 521 numHits = idx.numEqHits(constraints) |
| 522 } |
| 523 if numHits > bestNumEqHits { |
| 524 bestNumEqHits = numHits |
| 525 bestIdx = idx |
| 526 } else if numHits == 0 { |
| 527 // This index will never become useful a
gain, so remove it. |
| 528 relevantIdxs = append(relevantIdxs[:i],
relevantIdxs[i+1:]...) |
| 529 } |
| 530 } |
| 531 } |
| 532 if bestIdx == nil { |
| 533 // something is really wrong here... if relevantIdxs is
!nil, then we |
| 534 // should always be able to make progress in this loop. |
| 535 impossible(fmt.Errorf("deadlock: cannot fulfil query?")) |
| 536 } |
| 537 ret = append(ret, generate(q, bestIdx, constraints)) |
| 538 } |
| 539 |
| 540 return ret, nil |
| 541 } |
OLD | NEW |