Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1111)

Side by Side Diff: server/logdog/storage/bigtable/rowKey.go

Issue 1872903002: LogDog: Enable keys-only BigTable queries. (Closed) Base URL: https://github.com/luci/luci-go@logdog-archive-v2
Patch Set: Rebase Created 4 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright 2015 The Chromium Authors. All rights reserved. 1 // Copyright 2015 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 package bigtable 5 package bigtable
6 6
7 import ( 7 import (
8 "bytes" 8 "bytes"
9 "crypto/sha256" 9 "crypto/sha256"
10 "encoding/base64" 10 "encoding/base64"
11 "encoding/hex" 11 "encoding/hex"
12 "errors" 12 "errors"
13 "strings" 13 "strings"
14 "sync" 14 "sync"
15 "unicode/utf8"
16 15
17 "github.com/luci/luci-go/common/cmpbin" 16 "github.com/luci/luci-go/common/cmpbin"
18 ) 17 )
19 18
20 // rowKeyBufferPool stores a pool of allocated Buffer instances for reuse when 19 // rowKeyBufferPool stores a pool of allocated Buffer instances for reuse when
21 // constructing row keys. 20 // constructing row keys.
22 var ( 21 var (
23 // errMalformedRowKey is an error that is returned if the row key in the 22 // errMalformedRowKey is an error that is returned if the row key in the
24 // tables does not comform to our row key structure. 23 // tables does not comform to our row key structure.
25 errMalformedRowKey = errors.New("bigtable: malformed row key") 24 errMalformedRowKey = errors.New("bigtable: malformed row key")
26 25
27 // encodedPrefixSize is the size in bytes of the encoded row key prefix. All 26 // encodedPrefixSize is the size in bytes of the encoded row key prefix. All
28 // rows from the same stream path share this prefix. 27 // rows from the same stream path share this prefix.
29 » encodedPrefixSize = base64.URLEncoding.EncodedLen(sha256.Size) + len("~" ) 28 » encodedPrefixSize = base64.URLEncoding.EncodedLen(sha256.Size)
30 // maxEncodedKeySize is the maximum size in bytes of a full row key. 29 // maxEncodedKeySize is the maximum size in bytes of a full row key.
31 » maxEncodedKeySize = encodedPrefixSize + hex.EncodedLen(cmpbin.MaxIntLen6 4) 30 » maxEncodedKeySize = encodedPrefixSize + (2 * (len("~") + hex.EncodedLen( cmpbin.MaxIntLen64)))
Ryan Tseng 2016/04/13 21:38:06 The extra parens seem redundent
dnj 2016/04/13 21:44:15 Acknowledged.
32 31
33 rowKeyBufferPool = sync.Pool{ 32 rowKeyBufferPool = sync.Pool{
34 New: func() interface{} { 33 New: func() interface{} {
35 return &rowKeyBuffers{} 34 return &rowKeyBuffers{}
36 }, 35 },
37 } 36 }
38 ) 37 )
39 38
40 type rowKeyBuffers struct { 39 type rowKeyBuffers struct {
41 // binBuf is a Buffer to write binary data for encoding. 40 // binBuf is a Buffer to write binary data for encoding.
(...skipping 15 matching lines...) Expand all
57 func (rkb *rowKeyBuffers) reset() { 56 func (rkb *rowKeyBuffers) reset() {
58 if rkb.key == nil { 57 if rkb.key == nil {
59 rkb.key = make([]byte, maxEncodedKeySize) 58 rkb.key = make([]byte, maxEncodedKeySize)
60 } 59 }
61 rkb.size = 0 60 rkb.size = 0
62 } 61 }
63 62
64 func (rkb *rowKeyBuffers) appendPathPrefix(pathHash []byte) { 63 func (rkb *rowKeyBuffers) appendPathPrefix(pathHash []byte) {
65 base64.URLEncoding.Encode(rkb.remaining(), pathHash) 64 base64.URLEncoding.Encode(rkb.remaining(), pathHash)
66 rkb.size += base64.URLEncoding.EncodedLen(len(pathHash)) 65 rkb.size += base64.URLEncoding.EncodedLen(len(pathHash))
67 rkb.appendRune('~')
68 } 66 }
69 67
70 func (rkb *rowKeyBuffers) appendIndex(i int64) { 68 func (rkb *rowKeyBuffers) appendInt64(i int64) {
71 // Encode index to "cmpbin". 69 // Encode index to "cmpbin".
72 rkb.binBuf.Reset() 70 rkb.binBuf.Reset()
73 cmpbin.WriteInt(&rkb.binBuf, i) 71 cmpbin.WriteInt(&rkb.binBuf, i)
74 72
75 rkb.size += hex.Encode(rkb.remaining(), rkb.binBuf.Bytes()) 73 rkb.size += hex.Encode(rkb.remaining(), rkb.binBuf.Bytes())
76 } 74 }
77 75
78 func (rkb *rowKeyBuffers) appendRune(r rune) { 76 func (rkb *rowKeyBuffers) appendBytes(d []byte) {
79 » rkb.size += utf8.EncodeRune(rkb.remaining(), r) 77 » rkb.size += copy(rkb.remaining(), d)
80 } 78 }
81 79
82 func (rkb *rowKeyBuffers) remaining() []byte { 80 func (rkb *rowKeyBuffers) remaining() []byte {
83 return rkb.key[rkb.size:] 81 return rkb.key[rkb.size:]
84 } 82 }
85 83
86 func (rkb *rowKeyBuffers) value() string { 84 func (rkb *rowKeyBuffers) value() string {
87 return string(rkb.key[:rkb.size]) 85 return string(rkb.key[:rkb.size])
88 } 86 }
89 87
90 // rowKey is a BigTable row key. 88 // rowKey is a BigTable row key.
91 // 89 //
92 // The row key is formed from a Path and its Index. The goal: 90 // The row key is formed from a Path and its Index. The goal:
93 // - Rows with the same path should be clustered. 91 // - Rows with the same path should be clustered.
94 // - Rows with the same path should be sorted according to index. 92 // - Rows with the same path should be sorted according to index.
95 // 93 //
96 // Since BigTable rows must be valid UTF8, and since paths are effectively 94 // Since BigTable rows must be valid UTF8, and since paths are effectively
97 // unbounded, the row key will be formed by composing: 95 // unbounded, the row key will be formed by composing:
98 // [ base64(sha256(path)) ] + '~' + [ hex(cmpbin(index)) ] 96 //
97 // [ base64(sha256(path)) ] + '~' + [ hex(cmpbin(index)) ] + '~' +
98 // [hex(cmpbin(count)]
99 //
100 // NOTE: There is a "legacy" period of time when row keys will NOT include a
101 // count. Since these sort before row keys with a count, row key order will be
102 // maintained. These row keys will have a count value of "0".
99 type rowKey struct { 103 type rowKey struct {
100 pathHash []byte 104 pathHash []byte
101 index int64 105 index int64
106 count int64
102 } 107 }
103 108
104 // newRowKey generates the row key matching a given entry path and index. 109 // newRowKey generates the row key matching a given entry path and index.
105 func newRowKey(path string, index int64) *rowKey { 110 func newRowKey(path string, index, count int64) *rowKey {
106 pathHash := sha256.Sum256([]byte(path)) 111 pathHash := sha256.Sum256([]byte(path))
107 return &rowKey{ 112 return &rowKey{
108 pathHash: pathHash[:], 113 pathHash: pathHash[:],
109 index: index, 114 index: index,
115 count: count,
110 } 116 }
111 } 117 }
112 118
113 // decodeRowKey decodes an encoded row key into its structural components. 119 // decodeRowKey decodes an encoded row key into its structural components.
114 func decodeRowKey(v string) (*rowKey, error) { 120 func decodeRowKey(v string) (*rowKey, error) {
115 » keyParts := strings.SplitN(v, "~", 2) 121 » keyParts := strings.SplitN(v, "~", 3)
116 » if len(keyParts) != 2 { 122 » if len(keyParts) < 2 {
123 » » // TODO: Make this force 3 once "legacy mode" is disabled.
117 return nil, errMalformedRowKey 124 return nil, errMalformedRowKey
118 } 125 }
119 126
120 hashEnc, idxEnc := keyParts[0], keyParts[1] 127 hashEnc, idxEnc := keyParts[0], keyParts[1]
121 if base64.URLEncoding.DecodedLen(len(hashEnc)) < sha256.Size { 128 if base64.URLEncoding.DecodedLen(len(hashEnc)) < sha256.Size {
122 return nil, errMalformedRowKey 129 return nil, errMalformedRowKey
123 } 130 }
124 131
125 // Decode encoded path hash. 132 // Decode encoded path hash.
126 var err error 133 var err error
127 rk := rowKey{} 134 rk := rowKey{}
128 rk.pathHash, err = base64.URLEncoding.DecodeString(hashEnc) 135 rk.pathHash, err = base64.URLEncoding.DecodeString(hashEnc)
129 if err != nil { 136 if err != nil {
130 return nil, errMalformedRowKey 137 return nil, errMalformedRowKey
131 } 138 }
132 139
133 // Decode index. 140 // Decode index.
134 » idxBytes, err := hex.DecodeString(idxEnc) 141 » rk.index, err = readHexInt64(idxEnc)
135 if err != nil { 142 if err != nil {
136 » » return nil, errMalformedRowKey 143 » » return nil, err
137 } 144 }
138 145
139 » dr := bytes.NewReader(idxBytes) 146 » // If a count is available, decode that as well.
140 » index, _, err := cmpbin.ReadInt(dr) 147 » if len(keyParts) == 3 {
141 » if err != nil { 148 » » rk.count, err = readHexInt64(keyParts[2])
142 » » return nil, errMalformedRowKey 149 » » if err != nil {
150 » » » return nil, err
151 » » }
143 } 152 }
144 rk.index = index
145 153
146 // There should be no more data.
147 if dr.Len() > 0 {
148 return nil, errMalformedRowKey
149 }
150 return &rk, nil 154 return &rk, nil
151 } 155 }
152 156
153 func (rk *rowKey) String() string { 157 func (rk *rowKey) String() string {
154 return rk.encode() 158 return rk.encode()
155 } 159 }
156 160
157 // newRowKey instantiates a new rowKey from its components. 161 // newRowKey instantiates a new rowKey from its components.
158 func (rk *rowKey) encode() (v string) { 162 func (rk *rowKey) encode() (v string) {
159 // Write the final key to "key": (base64(HASH)~hex(INDEX)) 163 // Write the final key to "key": (base64(HASH)~hex(INDEX))
160 withRowKeyBuffers(func(rkb *rowKeyBuffers) { 164 withRowKeyBuffers(func(rkb *rowKeyBuffers) {
161 rkb.appendPathPrefix(rk.pathHash) 165 rkb.appendPathPrefix(rk.pathHash)
162 » » rkb.appendIndex(rk.index) 166 » » rkb.appendBytes([]byte("~"))
167 » » rkb.appendInt64(rk.index)
168 » » if rk.count > 0 {
169 » » » rkb.appendBytes([]byte("~"))
170 » » » rkb.appendInt64(rk.count)
171 » » }
163 v = rkb.value() 172 v = rkb.value()
164 }) 173 })
165 return 174 return
166 } 175 }
167 176
168 // prefix returns the encoded path prefix for the row key. 177 // prefix returns the encoded path prefix for the row key.
169 func (rk *rowKey) pathPrefix() (v string) { 178 func (rk *rowKey) pathPrefix() (v string) {
170 withRowKeyBuffers(func(rkb *rowKeyBuffers) { 179 withRowKeyBuffers(func(rkb *rowKeyBuffers) {
171 rkb.appendPathPrefix(rk.pathHash) 180 rkb.appendPathPrefix(rk.pathHash)
181 rkb.appendBytes([]byte("~"))
172 v = rkb.value() 182 v = rkb.value()
173 }) 183 })
174 return 184 return
175 } 185 }
176 186
177 // pathPrefixUpperBound returns the path prefix that is higher than any path 187 // pathPrefixUpperBound returns the path prefix that is higher than any path
178 // allowed in the row key space. 188 // allowed in the row key space.
179 // 189 //
180 // This is accomplished by appending a "~" character to the path prefix, 190 // This is accomplished by appending a "~" character to the path prefix,
181 // creating something like this: 191 // creating something like this:
182 // 192 //
183 // prefix~~ 193 // prefix~~
184 // 194 //
185 // The "prefix~" is shared with all keys in "rk", but the extra "~" is larger 195 // The "prefix~" is shared with all keys in "rk", but the extra "~" is larger
186 // than any hex-encoded row index, so this key will always be larger. 196 // than any hex-encoded row index, so this key will always be larger.
187 func (rk *rowKey) pathPrefixUpperBound() (v string) { 197 func (rk *rowKey) pathPrefixUpperBound() (v string) {
188 withRowKeyBuffers(func(rkb *rowKeyBuffers) { 198 withRowKeyBuffers(func(rkb *rowKeyBuffers) {
189 rkb.appendPathPrefix(rk.pathHash) 199 rkb.appendPathPrefix(rk.pathHash)
190 » » rkb.appendRune('~') 200 » » rkb.appendBytes([]byte("~~"))
191 v = rkb.value() 201 v = rkb.value()
192 }) 202 })
193 return 203 return
194 } 204 }
195 205
196 // sharesPrefixWith tests if the "path" component of the row key "rk" matches 206 // sharesPrefixWith tests if the "path" component of the row key "rk" matches
197 // the "path" component of "o". 207 // the "path" component of "o".
198 func (rk *rowKey) sharesPathWith(o *rowKey) bool { 208 func (rk *rowKey) sharesPathWith(o *rowKey) bool {
199 return bytes.Equal(rk.pathHash, o.pathHash) 209 return bytes.Equal(rk.pathHash, o.pathHash)
200 } 210 }
211
212 func readHexInt64(v string) (int64, error) {
213 d, err := hex.DecodeString(v)
214 if err != nil {
215 return 0, errMalformedRowKey
216 }
217
218 dr := bytes.NewReader(d)
219 value, _, err := cmpbin.ReadInt(dr)
220 if err != nil {
221 return 0, errMalformedRowKey
222 }
223
224 // There should be no more data.
225 if dr.Len() > 0 {
226 return 0, errMalformedRowKey
227 }
228
229 return value, nil
230 }
OLDNEW
« no previous file with comments | « server/logdog/storage/bigtable/bigtable_test.go ('k') | server/logdog/storage/bigtable/rowKey_test.go » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698