Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 // Copyright 2015 The Chromium Authors. All rights reserved. | 1 // Copyright 2015 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 package bigtable | 5 package bigtable |
| 6 | 6 |
| 7 import ( | 7 import ( |
| 8 "bytes" | 8 "bytes" |
| 9 "crypto/sha256" | 9 "crypto/sha256" |
| 10 "encoding/base64" | 10 "encoding/base64" |
| 11 "encoding/hex" | 11 "encoding/hex" |
| 12 "errors" | 12 "errors" |
| 13 "strings" | 13 "strings" |
| 14 "sync" | 14 "sync" |
| 15 "unicode/utf8" | |
| 16 | 15 |
| 17 "github.com/luci/luci-go/common/cmpbin" | 16 "github.com/luci/luci-go/common/cmpbin" |
| 18 ) | 17 ) |
| 19 | 18 |
| 20 // rowKeyBufferPool stores a pool of allocated Buffer instances for reuse when | 19 // rowKeyBufferPool stores a pool of allocated Buffer instances for reuse when |
| 21 // constructing row keys. | 20 // constructing row keys. |
| 22 var ( | 21 var ( |
| 23 // errMalformedRowKey is an error that is returned if the row key in the | 22 // errMalformedRowKey is an error that is returned if the row key in the |
| 24 // tables does not comform to our row key structure. | 23 // tables does not comform to our row key structure. |
| 25 errMalformedRowKey = errors.New("bigtable: malformed row key") | 24 errMalformedRowKey = errors.New("bigtable: malformed row key") |
| 26 | 25 |
| 27 // encodedPrefixSize is the size in bytes of the encoded row key prefix. All | 26 // encodedPrefixSize is the size in bytes of the encoded row key prefix. All |
| 28 // rows from the same stream path share this prefix. | 27 // rows from the same stream path share this prefix. |
| 29 » encodedPrefixSize = base64.URLEncoding.EncodedLen(sha256.Size) + len("~" ) | 28 » encodedPrefixSize = base64.URLEncoding.EncodedLen(sha256.Size) |
| 30 // maxEncodedKeySize is the maximum size in bytes of a full row key. | 29 // maxEncodedKeySize is the maximum size in bytes of a full row key. |
| 31 » maxEncodedKeySize = encodedPrefixSize + hex.EncodedLen(cmpbin.MaxIntLen6 4) | 30 » maxEncodedKeySize = encodedPrefixSize + (2 * (len("~") + hex.EncodedLen( cmpbin.MaxIntLen64))) |
|
Ryan Tseng
2016/04/13 21:38:06
The extra parens seem redundent
dnj
2016/04/13 21:44:15
Acknowledged.
| |
| 32 | 31 |
| 33 rowKeyBufferPool = sync.Pool{ | 32 rowKeyBufferPool = sync.Pool{ |
| 34 New: func() interface{} { | 33 New: func() interface{} { |
| 35 return &rowKeyBuffers{} | 34 return &rowKeyBuffers{} |
| 36 }, | 35 }, |
| 37 } | 36 } |
| 38 ) | 37 ) |
| 39 | 38 |
| 40 type rowKeyBuffers struct { | 39 type rowKeyBuffers struct { |
| 41 // binBuf is a Buffer to write binary data for encoding. | 40 // binBuf is a Buffer to write binary data for encoding. |
| (...skipping 15 matching lines...) Expand all Loading... | |
| 57 func (rkb *rowKeyBuffers) reset() { | 56 func (rkb *rowKeyBuffers) reset() { |
| 58 if rkb.key == nil { | 57 if rkb.key == nil { |
| 59 rkb.key = make([]byte, maxEncodedKeySize) | 58 rkb.key = make([]byte, maxEncodedKeySize) |
| 60 } | 59 } |
| 61 rkb.size = 0 | 60 rkb.size = 0 |
| 62 } | 61 } |
| 63 | 62 |
| 64 func (rkb *rowKeyBuffers) appendPathPrefix(pathHash []byte) { | 63 func (rkb *rowKeyBuffers) appendPathPrefix(pathHash []byte) { |
| 65 base64.URLEncoding.Encode(rkb.remaining(), pathHash) | 64 base64.URLEncoding.Encode(rkb.remaining(), pathHash) |
| 66 rkb.size += base64.URLEncoding.EncodedLen(len(pathHash)) | 65 rkb.size += base64.URLEncoding.EncodedLen(len(pathHash)) |
| 67 rkb.appendRune('~') | |
| 68 } | 66 } |
| 69 | 67 |
| 70 func (rkb *rowKeyBuffers) appendIndex(i int64) { | 68 func (rkb *rowKeyBuffers) appendInt64(i int64) { |
| 71 // Encode index to "cmpbin". | 69 // Encode index to "cmpbin". |
| 72 rkb.binBuf.Reset() | 70 rkb.binBuf.Reset() |
| 73 cmpbin.WriteInt(&rkb.binBuf, i) | 71 cmpbin.WriteInt(&rkb.binBuf, i) |
| 74 | 72 |
| 75 rkb.size += hex.Encode(rkb.remaining(), rkb.binBuf.Bytes()) | 73 rkb.size += hex.Encode(rkb.remaining(), rkb.binBuf.Bytes()) |
| 76 } | 74 } |
| 77 | 75 |
| 78 func (rkb *rowKeyBuffers) appendRune(r rune) { | 76 func (rkb *rowKeyBuffers) appendBytes(d []byte) { |
| 79 » rkb.size += utf8.EncodeRune(rkb.remaining(), r) | 77 » rkb.size += copy(rkb.remaining(), d) |
| 80 } | 78 } |
| 81 | 79 |
| 82 func (rkb *rowKeyBuffers) remaining() []byte { | 80 func (rkb *rowKeyBuffers) remaining() []byte { |
| 83 return rkb.key[rkb.size:] | 81 return rkb.key[rkb.size:] |
| 84 } | 82 } |
| 85 | 83 |
| 86 func (rkb *rowKeyBuffers) value() string { | 84 func (rkb *rowKeyBuffers) value() string { |
| 87 return string(rkb.key[:rkb.size]) | 85 return string(rkb.key[:rkb.size]) |
| 88 } | 86 } |
| 89 | 87 |
| 90 // rowKey is a BigTable row key. | 88 // rowKey is a BigTable row key. |
| 91 // | 89 // |
| 92 // The row key is formed from a Path and its Index. The goal: | 90 // The row key is formed from a Path and its Index. The goal: |
| 93 // - Rows with the same path should be clustered. | 91 // - Rows with the same path should be clustered. |
| 94 // - Rows with the same path should be sorted according to index. | 92 // - Rows with the same path should be sorted according to index. |
| 95 // | 93 // |
| 96 // Since BigTable rows must be valid UTF8, and since paths are effectively | 94 // Since BigTable rows must be valid UTF8, and since paths are effectively |
| 97 // unbounded, the row key will be formed by composing: | 95 // unbounded, the row key will be formed by composing: |
| 98 // [ base64(sha256(path)) ] + '~' + [ hex(cmpbin(index)) ] | 96 // |
| 97 // [ base64(sha256(path)) ] + '~' + [ hex(cmpbin(index)) ] + '~' + | |
| 98 // [hex(cmpbin(count)] | |
| 99 // | |
| 100 // NOTE: There is a "legacy" period of time when row keys will NOT include a | |
| 101 // count. Since these sort before row keys with a count, row key order will be | |
| 102 // maintained. These row keys will have a count value of "0". | |
| 99 type rowKey struct { | 103 type rowKey struct { |
| 100 pathHash []byte | 104 pathHash []byte |
| 101 index int64 | 105 index int64 |
| 106 count int64 | |
| 102 } | 107 } |
| 103 | 108 |
| 104 // newRowKey generates the row key matching a given entry path and index. | 109 // newRowKey generates the row key matching a given entry path and index. |
| 105 func newRowKey(path string, index int64) *rowKey { | 110 func newRowKey(path string, index, count int64) *rowKey { |
| 106 pathHash := sha256.Sum256([]byte(path)) | 111 pathHash := sha256.Sum256([]byte(path)) |
| 107 return &rowKey{ | 112 return &rowKey{ |
| 108 pathHash: pathHash[:], | 113 pathHash: pathHash[:], |
| 109 index: index, | 114 index: index, |
| 115 count: count, | |
| 110 } | 116 } |
| 111 } | 117 } |
| 112 | 118 |
| 113 // decodeRowKey decodes an encoded row key into its structural components. | 119 // decodeRowKey decodes an encoded row key into its structural components. |
| 114 func decodeRowKey(v string) (*rowKey, error) { | 120 func decodeRowKey(v string) (*rowKey, error) { |
| 115 » keyParts := strings.SplitN(v, "~", 2) | 121 » keyParts := strings.SplitN(v, "~", 3) |
| 116 » if len(keyParts) != 2 { | 122 » if len(keyParts) < 2 { |
| 123 » » // TODO: Make this force 3 once "legacy mode" is disabled. | |
| 117 return nil, errMalformedRowKey | 124 return nil, errMalformedRowKey |
| 118 } | 125 } |
| 119 | 126 |
| 120 hashEnc, idxEnc := keyParts[0], keyParts[1] | 127 hashEnc, idxEnc := keyParts[0], keyParts[1] |
| 121 if base64.URLEncoding.DecodedLen(len(hashEnc)) < sha256.Size { | 128 if base64.URLEncoding.DecodedLen(len(hashEnc)) < sha256.Size { |
| 122 return nil, errMalformedRowKey | 129 return nil, errMalformedRowKey |
| 123 } | 130 } |
| 124 | 131 |
| 125 // Decode encoded path hash. | 132 // Decode encoded path hash. |
| 126 var err error | 133 var err error |
| 127 rk := rowKey{} | 134 rk := rowKey{} |
| 128 rk.pathHash, err = base64.URLEncoding.DecodeString(hashEnc) | 135 rk.pathHash, err = base64.URLEncoding.DecodeString(hashEnc) |
| 129 if err != nil { | 136 if err != nil { |
| 130 return nil, errMalformedRowKey | 137 return nil, errMalformedRowKey |
| 131 } | 138 } |
| 132 | 139 |
| 133 // Decode index. | 140 // Decode index. |
| 134 » idxBytes, err := hex.DecodeString(idxEnc) | 141 » rk.index, err = readHexInt64(idxEnc) |
| 135 if err != nil { | 142 if err != nil { |
| 136 » » return nil, errMalformedRowKey | 143 » » return nil, err |
| 137 } | 144 } |
| 138 | 145 |
| 139 » dr := bytes.NewReader(idxBytes) | 146 » // If a count is available, decode that as well. |
| 140 » index, _, err := cmpbin.ReadInt(dr) | 147 » if len(keyParts) == 3 { |
| 141 » if err != nil { | 148 » » rk.count, err = readHexInt64(keyParts[2]) |
| 142 » » return nil, errMalformedRowKey | 149 » » if err != nil { |
| 150 » » » return nil, err | |
| 151 » » } | |
| 143 } | 152 } |
| 144 rk.index = index | |
| 145 | 153 |
| 146 // There should be no more data. | |
| 147 if dr.Len() > 0 { | |
| 148 return nil, errMalformedRowKey | |
| 149 } | |
| 150 return &rk, nil | 154 return &rk, nil |
| 151 } | 155 } |
| 152 | 156 |
| 153 func (rk *rowKey) String() string { | 157 func (rk *rowKey) String() string { |
| 154 return rk.encode() | 158 return rk.encode() |
| 155 } | 159 } |
| 156 | 160 |
| 157 // newRowKey instantiates a new rowKey from its components. | 161 // newRowKey instantiates a new rowKey from its components. |
| 158 func (rk *rowKey) encode() (v string) { | 162 func (rk *rowKey) encode() (v string) { |
| 159 // Write the final key to "key": (base64(HASH)~hex(INDEX)) | 163 // Write the final key to "key": (base64(HASH)~hex(INDEX)) |
| 160 withRowKeyBuffers(func(rkb *rowKeyBuffers) { | 164 withRowKeyBuffers(func(rkb *rowKeyBuffers) { |
| 161 rkb.appendPathPrefix(rk.pathHash) | 165 rkb.appendPathPrefix(rk.pathHash) |
| 162 » » rkb.appendIndex(rk.index) | 166 » » rkb.appendBytes([]byte("~")) |
| 167 » » rkb.appendInt64(rk.index) | |
| 168 » » if rk.count > 0 { | |
| 169 » » » rkb.appendBytes([]byte("~")) | |
| 170 » » » rkb.appendInt64(rk.count) | |
| 171 » » } | |
| 163 v = rkb.value() | 172 v = rkb.value() |
| 164 }) | 173 }) |
| 165 return | 174 return |
| 166 } | 175 } |
| 167 | 176 |
| 168 // prefix returns the encoded path prefix for the row key. | 177 // prefix returns the encoded path prefix for the row key. |
| 169 func (rk *rowKey) pathPrefix() (v string) { | 178 func (rk *rowKey) pathPrefix() (v string) { |
| 170 withRowKeyBuffers(func(rkb *rowKeyBuffers) { | 179 withRowKeyBuffers(func(rkb *rowKeyBuffers) { |
| 171 rkb.appendPathPrefix(rk.pathHash) | 180 rkb.appendPathPrefix(rk.pathHash) |
| 181 rkb.appendBytes([]byte("~")) | |
| 172 v = rkb.value() | 182 v = rkb.value() |
| 173 }) | 183 }) |
| 174 return | 184 return |
| 175 } | 185 } |
| 176 | 186 |
| 177 // pathPrefixUpperBound returns the path prefix that is higher than any path | 187 // pathPrefixUpperBound returns the path prefix that is higher than any path |
| 178 // allowed in the row key space. | 188 // allowed in the row key space. |
| 179 // | 189 // |
| 180 // This is accomplished by appending a "~" character to the path prefix, | 190 // This is accomplished by appending a "~" character to the path prefix, |
| 181 // creating something like this: | 191 // creating something like this: |
| 182 // | 192 // |
| 183 // prefix~~ | 193 // prefix~~ |
| 184 // | 194 // |
| 185 // The "prefix~" is shared with all keys in "rk", but the extra "~" is larger | 195 // The "prefix~" is shared with all keys in "rk", but the extra "~" is larger |
| 186 // than any hex-encoded row index, so this key will always be larger. | 196 // than any hex-encoded row index, so this key will always be larger. |
| 187 func (rk *rowKey) pathPrefixUpperBound() (v string) { | 197 func (rk *rowKey) pathPrefixUpperBound() (v string) { |
| 188 withRowKeyBuffers(func(rkb *rowKeyBuffers) { | 198 withRowKeyBuffers(func(rkb *rowKeyBuffers) { |
| 189 rkb.appendPathPrefix(rk.pathHash) | 199 rkb.appendPathPrefix(rk.pathHash) |
| 190 » » rkb.appendRune('~') | 200 » » rkb.appendBytes([]byte("~~")) |
| 191 v = rkb.value() | 201 v = rkb.value() |
| 192 }) | 202 }) |
| 193 return | 203 return |
| 194 } | 204 } |
| 195 | 205 |
| 196 // sharesPrefixWith tests if the "path" component of the row key "rk" matches | 206 // sharesPrefixWith tests if the "path" component of the row key "rk" matches |
| 197 // the "path" component of "o". | 207 // the "path" component of "o". |
| 198 func (rk *rowKey) sharesPathWith(o *rowKey) bool { | 208 func (rk *rowKey) sharesPathWith(o *rowKey) bool { |
| 199 return bytes.Equal(rk.pathHash, o.pathHash) | 209 return bytes.Equal(rk.pathHash, o.pathHash) |
| 200 } | 210 } |
| 211 | |
| 212 func readHexInt64(v string) (int64, error) { | |
| 213 d, err := hex.DecodeString(v) | |
| 214 if err != nil { | |
| 215 return 0, errMalformedRowKey | |
| 216 } | |
| 217 | |
| 218 dr := bytes.NewReader(d) | |
| 219 value, _, err := cmpbin.ReadInt(dr) | |
| 220 if err != nil { | |
| 221 return 0, errMalformedRowKey | |
| 222 } | |
| 223 | |
| 224 // There should be no more data. | |
| 225 if dr.Len() > 0 { | |
| 226 return 0, errMalformedRowKey | |
| 227 } | |
| 228 | |
| 229 return value, nil | |
| 230 } | |
| OLD | NEW |