Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1)

Unified Diff: impl/memory/datastore_index_selection.go

Issue 1302813003: impl/memory: Implement Queries (Closed) Base URL: https://github.com/luci/gae.git@add_multi_iterator
Patch Set: stringSet everywhere Created 5 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « impl/memory/datastore_index.go ('k') | impl/memory/datastore_index_test.go » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: impl/memory/datastore_index_selection.go
diff --git a/impl/memory/datastore_index_selection.go b/impl/memory/datastore_index_selection.go
new file mode 100644
index 0000000000000000000000000000000000000000..f0aec8240b7248b7e48c62fd74a54121da034437
--- /dev/null
+++ b/impl/memory/datastore_index_selection.go
@@ -0,0 +1,541 @@
+// Copyright 2015 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+package memory
+
+import (
+ "bytes"
+ "fmt"
+ "sort"
+ "strings"
+
+ ds "github.com/luci/gae/service/datastore"
+ "github.com/luci/gae/service/datastore/serialize"
+)
+
+// reducedQuery contains only the pieces of the query necessary to iterate for
+// results.
+// deduplication is applied externally
+// projection / keysonly / entity retrieval is done externally
+type reducedQuery struct {
+ ns string
+ kind string
+
+ // eqFilters indicate the set of all prefix constraints which need to be
+ // fulfilled in the composite query. All of these will translate into prefix
+ // bytes for SOME index.
+ eqFilters map[string]stringSet
+
+ // suffixFormat is the PRECISE listing of the suffix columns that ALL indexes
+ // in the multi query will have.
+ //
+ // suffixFormat ALWAYS includes the inequality filter (if any) as the 0th
+ // element
+ // suffixFormat ALWAYS includes any additional projections (in ascending
+ // order) after all user defined sort orders
+ // suffixFormat ALWAYS has __key__ as the last column
+ suffixFormat []ds.IndexColumn
+
+ // limits of the inequality and/or full sort order. This is ONLY a suffix,
+ // and it will be appended to the prefix during iteration.
+ start []byte
+ end []byte
+
+ // metadata describing the total number of columns that this query requires to
+ // execute perfectly.
+ numCols int
+}
+
+type IndexDefinitionSortable struct {
+ // eqFilts is the list of ACTUAL prefix columns. Note that it may contain
+ // redundant columns! (e.g. (tag, tag) is a perfectly valid prefix, becuase
+ // (tag=1, tag=2) is a perfectly valid query).
+ eqFilts []ds.IndexColumn
+ coll *memCollection
+}
+
+func (i *IndexDefinitionSortable) hasAncestor() bool {
+ return len(i.eqFilts) > 0 && i.eqFilts[0].Property == "__ancestor__"
+}
+
+func (i *IndexDefinitionSortable) numEqHits(c *constraints) int {
+ ret := 0
+ for _, filt := range i.eqFilts {
+ if _, ok := c.constraints[filt.Property]; ok {
+ ret++
+ }
+ }
+ return ret
+}
+
+type IndexDefinitionSortableSlice []IndexDefinitionSortable
+
+func (s IndexDefinitionSortableSlice) Len() int { return len(s) }
+func (s IndexDefinitionSortableSlice) Swap(i, j int) { s[i], s[j] = s[j], s[i] }
+func (s IndexDefinitionSortableSlice) Less(i, j int) bool {
+ a, b := s[i], s[j]
+ if a.coll == nil && b.coll != nil {
+ return true
+ } else if a.coll != nil && b.coll == nil {
+ return false
+ }
+
+ cmp := len(a.eqFilts) - len(b.eqFilts)
+ if cmp < 0 {
+ return true
+ } else if cmp > 0 {
+ return false
+ }
+ for k, col := range a.eqFilts {
+ ocol := b.eqFilts[k]
+ if col.Direction == ds.ASCENDING && ocol.Direction == ds.DESCENDING {
+ return true
+ } else if col.Direction == ds.DESCENDING && ocol.Direction == ds.ASCENDING {
+ return false
+ }
+ if col.Property < ocol.Property {
+ return true
+ } else if col.Property > ocol.Property {
+ return false
+ }
+ }
+ return false
+}
+
+// maybeAddDefinition possibly adds a new IndexDefinitionSortable to this slice.
+// It's only added if it could be useful in servicing q, otherwise this function
+// is a noop.
+//
+// This returns true iff the proposed index is OK and depletes missingTerms to
+// empty.
+//
+// If the proposed index is PERFECT (e.g. contains enough columns to cover all
+// equality filters, and also has the correct suffix), idxs will be replaced
+// with JUST that index, and this will return true.
+func (idxs *IndexDefinitionSortableSlice) maybeAddDefinition(q *reducedQuery, s *memStore, missingTerms stringSet, id *ds.IndexDefinition) bool {
+ // Kindless queries are handled elsewhere.
+ if id.Kind != q.kind {
+ impossible(
+ fmt.Errorf("maybeAddDefinition given index with wrong kind %q v %q", id.Kind, q.kind))
+ }
+
+ // If we're an ancestor query, and the index is compound, but doesn't include
+ // an Ancestor field, it doesn't work. Builtin indexes can be used for
+ // ancestor queries (and have !Ancestor), assuming that it's only equality
+ // filters (plus inequality on __key__), or a single inequality.
+ if q.eqFilters["__ancestor__"] != nil && !id.Ancestor && !id.Builtin() {
+ impossible(
+ fmt.Errorf("maybeAddDefinition given compound index with wrong ancestor info: %s %#v", id, q))
+ }
+
+ // add __ancestor__ if necessary
+ sortBy := id.GetFullSortOrder()
+
+ // If the index has fewer fields than we need for the suffix, it can't
+ // possibly help.
+ if len(sortBy) < len(q.suffixFormat) {
+ return false
+ }
+
+ numEqFilts := len(sortBy) - len(q.suffixFormat)
+ // make sure the orders are precisely the same
+ for i, sb := range sortBy[numEqFilts:] {
+ if q.suffixFormat[i] != sb {
+ return false
+ }
+ }
+
+ if id.Builtin() && numEqFilts == 0 {
+ if len(q.eqFilters) > 1 || (len(q.eqFilters) == 1 && q.eqFilters["__ancestor__"] == nil) {
+ return false
+ }
+ }
+
+ // Make sure the equalities section doesn't contain any properties we don't
+ // want in our query.
+ //
+ // numByProp && totalEqFilts will be used to see if this is a perfect match
+ // later.
+ numByProp := make(map[string]int, len(q.eqFilters))
+ totalEqFilts := 0
+
+ eqFilts := sortBy[:numEqFilts]
+ for _, p := range eqFilts {
+ if _, ok := q.eqFilters[p.Property]; !ok {
+ return false
+ }
+ numByProp[p.Property]++
+ totalEqFilts++
+ }
+
+ // ok, we can actually use this
+
+ // Grab the collection for convenience later. We don't want to invalidate this
+ // index's potential just because the collection doesn't exist. If it's
+ // a builtin and it doesn't exist, it still needs to be one of the 'possible'
+ // indexes... it just means that the user's query will end up with no results.
+ coll := s.GetCollection(
+ fmt.Sprintf("idx:%s:%s", q.ns, serialize.ToBytes(*id.PrepForIdxTable())))
+
+ // First, see if it's a perfect match. If it is, then our search is over.
+ //
+ // A perfect match contains ALL the equality filter columns (or more, since
+ // we can use residuals to fill in the extras).
+ toAdd := IndexDefinitionSortable{coll: coll}
+ toAdd.eqFilts = eqFilts
+ for _, sb := range toAdd.eqFilts {
+ missingTerms.rm(sb.Property)
+ }
+
+ perfect := false
+ if len(sortBy) == q.numCols {
+ perfect = true
+ for k, num := range numByProp {
+ if num < len(q.eqFilters[k]) {
+ perfect = false
+ break
+ }
+ }
+ }
+ if perfect {
+ *idxs = IndexDefinitionSortableSlice{toAdd}
+ } else {
+ *idxs = append(*idxs, toAdd)
+ }
+ return len(missingTerms) == 0
+}
+
+// getRelevantIndexes retrieves the relevant indexes which could be used to
+// service q. It returns nil if it's not possible to service q with the current
+// indexes.
+func getRelevantIndexes(q *reducedQuery, s *memStore) (IndexDefinitionSortableSlice, error) {
+ missingTerms := stringSet{}
+ for k := range q.eqFilters {
+ if k == "__ancestor__" {
+ // ancestor is not a prefix which can be satisfied by a single index. It
+ // must be satisfied by ALL indexes (and has special logic for this in
+ // the addDefinition logic)
+ continue
+ }
+ missingTerms.add(k)
+ }
+ idxs := IndexDefinitionSortableSlice{}
+
+ // First we add builtins
+ // add
+ // idx:KIND
+ if idxs.maybeAddDefinition(q, s, missingTerms, &ds.IndexDefinition{
+ Kind: q.kind,
+ }) {
+ return idxs, nil
+ }
+
+ // add
+ // idx:KIND:prop
+ // idx:KIND:-prop
+ props := stringSet{}
+ for prop := range q.eqFilters {
+ props.add(prop)
+ }
+ for _, col := range q.suffixFormat[:len(q.suffixFormat)-1] {
+ props.add(col.Property)
+ }
+ for prop := range props {
+ if strings.HasPrefix(prop, "__") && strings.HasSuffix(prop, "__") {
+ continue
+ }
+ if idxs.maybeAddDefinition(q, s, missingTerms, &ds.IndexDefinition{
+ Kind: q.kind,
+ SortBy: []ds.IndexColumn{
+ {Property: prop},
+ },
+ }) {
+ return idxs, nil
+ }
+ if idxs.maybeAddDefinition(q, s, missingTerms, &ds.IndexDefinition{
+ Kind: q.kind,
+ SortBy: []ds.IndexColumn{
+ {Property: prop, Direction: ds.DESCENDING},
+ },
+ }) {
+ return idxs, nil
+ }
+ }
+
+ // Try adding all compound indexes whose suffix matches.
+ suffix := &ds.IndexDefinition{
+ Kind: q.kind,
+ Ancestor: q.eqFilters["__ancestor__"] != nil,
+ SortBy: q.suffixFormat,
+ }
+ walkCompIdxs(s, suffix, func(def *ds.IndexDefinition) bool {
+ // keep walking until we find a perfect index.
+ return !idxs.maybeAddDefinition(q, s, missingTerms, def)
+ })
+
+ // this query is impossible to fulfil with the current indexes. Not all the
+ // terms (equality + projection) are satisfied.
+ if len(missingTerms) < 0 || len(idxs) == 0 {
+ remains := &ds.IndexDefinition{
+ Kind: q.kind,
+ Ancestor: q.eqFilters["__ancestor__"] != nil,
+ }
+ terms := make([]string, 0, len(missingTerms))
+ for mt := range missingTerms {
+ terms = append(terms, mt)
+ }
+ if serializationDeterministic {
+ sort.Strings(terms)
+ }
+ for _, term := range terms {
+ remains.SortBy = append(remains.SortBy, ds.IndexColumn{Property: term})
+ }
+ remains.SortBy = append(remains.SortBy, q.suffixFormat...)
+ last := remains.SortBy[len(remains.SortBy)-1]
+ if last.Direction == ds.ASCENDING {
+ // this removes the __key__ column, since it's implicit.
+ remains.SortBy = remains.SortBy[:len(remains.SortBy)-1]
+ }
+ if remains.Builtin() {
+ impossible(
+ fmt.Errorf("recommended missing index would be a builtin: %s", remains))
+ }
+ return nil, fmt.Errorf(
+ "Your indexes are insufficient! Try adding:\n %s", remains)
+ }
+
+ return idxs, nil
+}
+
+// generate generates a single iterDefinition for the given index.
+func generate(q *reducedQuery, idx *IndexDefinitionSortable, c *constraints) *iterDefinition {
+ def := &iterDefinition{
+ c: idx.coll,
+ start: q.start,
+ end: q.end,
+ }
+ toJoin := make([][]byte, len(idx.eqFilts))
+ for _, sb := range idx.eqFilts {
+ val := c.peel(sb.Property)
+ if sb.Direction == ds.DESCENDING {
+ val = invert(val)
+ }
+ toJoin = append(toJoin, val)
+ }
+ def.prefix = bjoin(toJoin...)
+ def.prefixLen = len(def.prefix)
+
+ if q.eqFilters["__ancestor__"] != nil && !idx.hasAncestor() {
+ // The query requires an ancestor, but the index doesn't explicitly have it
+ // as part of the prefix (otherwise it would have been the first eqFilt
+ // above). This happens when it's a builtin index, or if it's the primary
+ // index (for a kindless query), or if it's the Kind index (for a filterless
+ // query).
+ //
+ // builtin indexes are:
+ // Kind/__key__
+ // Kind/Prop/__key__
+ // Kind/Prop/-__key__
+ if len(q.suffixFormat) > 2 || q.suffixFormat[len(q.suffixFormat)-1].Property != "__key__" {
+ // This should never happen. One of the previous validators would have
+ // selected a different index. But just in case.
+ impossible(fmt.Errorf("cannot supply an implicit ancestor for %#v", idx))
+ }
+
+ // get the only value out of __ancestor__
+ anc := q.eqFilters["__ancestor__"].getOne()
+
+ // Intentionally do NOT update prefixLen. This allows multiIterator to
+ // correctly include the entire key in the shared iterator suffix, instead
+ // of just the remainder.
+
+ // chop the terminal null byte off the q.ancestor key... we can accept
+ // anything which is a descendant or an exact match. Removing the last byte
+ // from the key (the terminating null) allows this trick to work. Otherwise
+ // it would be a closed range of EXACTLY this key.
+ chopped := []byte(anc[:len(anc)-1])
+ if q.suffixFormat[0].Direction == ds.DESCENDING {
+ chopped = invert(chopped)
+ }
+ def.prefix = bjoin(def.prefix, chopped)
+
+ // Update start and end, since we know that if they contain anything, they
+ // contain values for the __key__ field.
+ if def.start != nil {
+ offset := 0
+ if len(q.suffixFormat) > 1 {
+ chunks, _ := parseSuffix(q.ns, q.suffixFormat, def.start, 1)
+ offset = len(chunks[0])
+ }
+ if !bytes.HasPrefix(def.start[offset:], chopped) {
+ // again, shouldn't happen, but if it does, we want to know about it.
+ impossible(fmt.Errorf(
+ "start suffix for implied ancestor doesn't start with ancestor! start:%v ancestor:%v",
+ def.start, chopped))
+ }
+ def.start = def.start[:offset+len(chopped)]
+ }
+ if def.end != nil {
+ offset := 0
+ if len(q.suffixFormat) > 1 {
+ chunks, _ := parseSuffix(q.ns, q.suffixFormat, def.end, 1)
+ offset = len(chunks[0])
+ }
+ if !bytes.HasPrefix(def.end[offset:], chopped) {
+ impossible(fmt.Errorf(
+ "end suffix for implied ancestor doesn't start with ancestor! end:%v ancestor:%v",
+ def.end, chopped))
+ }
+ def.end = def.end[:offset+len(chopped)]
+ }
+ }
+
+ return def
+}
+
+type constraints struct {
+ constraints map[string][][]byte
+ original map[string][][]byte
+ residualMapping map[string]int
+}
+
+// peel picks a constraint value for the property. It then removes this value
+// from constraints (possibly removing the entire row from constraints if it
+// was the last value). If the value wasn't available in constraints, it picks
+// the value from residuals.
+func (c *constraints) peel(prop string) []byte {
+ ret := []byte(nil)
+ if vals, ok := c.constraints[prop]; ok {
+ ret = vals[0]
+ if len(vals) == 1 {
+ delete(c.constraints, prop)
+ } else {
+ c.constraints[prop] = vals[1:]
+ }
+ } else {
+ row := c.original[prop]
+ idx := c.residualMapping[prop]
+ c.residualMapping[prop]++
+ ret = row[idx%len(row)]
+ }
+ return ret
+}
+
+func (c *constraints) empty() bool {
+ return len(c.constraints) == 0
+}
+
+// calculateConstraints produces a mapping of all equality filters to the values
+// that they're constrained to. It also calculates residuals, which are an
+// arbitrary value for filling index prefixes which have more equality fields
+// than are necessary. The value doesn't matter, as long as its an equality
+// constraint in the original query.
+func calculateConstraints(q *reducedQuery) *constraints {
+ ret := &constraints{
+ original: make(map[string][][]byte, len(q.eqFilters)),
+ constraints: make(map[string][][]byte, len(q.eqFilters)),
+ residualMapping: make(map[string]int),
+ }
+ for prop, vals := range q.eqFilters {
+ bvals := make([][]byte, 0, len(vals))
+ for val := range vals {
+ bvals = append(bvals, []byte(val))
+ }
+ ret.original[prop] = bvals
+ if prop == "__ancestor__" {
+ // exclude __ancestor__ from the constraints.
+ //
+ // This is because it's handled specially during index proposal and
+ // generation. Ancestor is used by ALL indexes, and so its residual value
+ // in ret.original above will be sufficient.
+ continue
+ }
+ ret.constraints[prop] = bvals
+ }
+ return ret
+}
+
+// getIndexes returns a set of iterator definitions. Iterating over these
+// will result in matching suffixes.
+func getIndexes(q *reducedQuery, s *memStore) ([]*iterDefinition, error) {
+ relevantIdxs := IndexDefinitionSortableSlice(nil)
+ if q.kind == "" {
+ if coll := s.GetCollection("ents:" + q.ns); coll != nil {
+ relevantIdxs = IndexDefinitionSortableSlice{{coll: coll}}
+ }
+ } else {
+ err := error(nil)
+ relevantIdxs, err = getRelevantIndexes(q, s)
+ if err != nil {
+ return nil, err
+ }
+ }
+ if len(relevantIdxs) == 0 {
+ return nil, errQueryDone
+ }
+
+ // This sorts it so that relevantIdxs goes less filters -> more filters. We
+ // traverse this list backwards, however, so we traverse it in more filters ->
+ // less filters order.
+ sort.Sort(relevantIdxs)
+
+ constraints := calculateConstraints(q)
+
+ ret := []*iterDefinition{}
+ for !constraints.empty() || len(ret) == 0 {
+ bestIdx := (*IndexDefinitionSortable)(nil)
+ if len(ret) == 0 {
+ // if ret is empty, take the biggest relevantIdx. It's guaranteed to have
+ // the greatest number of equality filters of any index in the list, and
+ // we know that every equality filter will be pulled from constraints and
+ // not residual.
+ //
+ // This also takes care of the case when the query has no equality filters,
+ // in which case relevantIdxs will actually only contain one index anyway
+ // :)
+ bestIdx = &relevantIdxs[len(relevantIdxs)-1]
+ if bestIdx.coll == nil {
+ return nil, errQueryDone
+ }
+ } else {
+ // If ret's not empty, then we need to find the best index we can. The
+ // best index will be the one with the most matching equality columns.
+ // Since relevantIdxs is sorted primarially by the number of equality
+ // columns, we walk down the list until the number of possible columns is
+ // worse than our best-so-far.
+ //
+ // Traversing the list backwards goes from more filters -> less filters,
+ // but also allows us to remove items from the list as we iterate over it.
+ bestNumEqHits := 0
+ for i := len(relevantIdxs) - 1; i >= 0; i-- {
+ idx := &relevantIdxs[i]
+ if len(idx.eqFilts) < bestNumEqHits {
+ // if the number of filters drops below our best hit, it's never going
+ // to get better than that. This index might be helpful on a later
+ // loop though, so don't remove it.
+ break
+ }
+ numHits := 0
+ if idx.coll != nil {
+ numHits = idx.numEqHits(constraints)
+ }
+ if numHits > bestNumEqHits {
+ bestNumEqHits = numHits
+ bestIdx = idx
+ } else if numHits == 0 {
+ // This index will never become useful again, so remove it.
+ relevantIdxs = append(relevantIdxs[:i], relevantIdxs[i+1:]...)
+ }
+ }
+ }
+ if bestIdx == nil {
+ // something is really wrong here... if relevantIdxs is !nil, then we
+ // should always be able to make progress in this loop.
+ impossible(fmt.Errorf("deadlock: cannot fulfil query?"))
+ }
+ ret = append(ret, generate(q, bestIdx, constraints))
+ }
+
+ return ret, nil
+}
« no previous file with comments | « impl/memory/datastore_index.go ('k') | impl/memory/datastore_index_test.go » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698