server/logdog/storage/bigtable/rowKey.go - Issue 1872903002: LogDog: Enable keys-only BigTable queries.

Side by Side Diff: server/logdog/storage/bigtable/rowKey.go

Issue 1872903002: LogDog: Enable keys-only BigTable queries. (Closed) Base URL: https://github.com/luci/luci-go@logdog-archive-v2

Patch Set: Rebase Created 4 years, 8 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 // Copyright 2015 The Chromium Authors. All rights reserved.	1 // Copyright 2015 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 package bigtable	5 package bigtable

6	6

7 import (	7 import (

8 "bytes"	8 "bytes"

9 "crypto/sha256"	9 "crypto/sha256"

10 "encoding/base64"	10 "encoding/base64"

11 "encoding/hex"	11 "encoding/hex"

12 "errors"	12 "errors"

13 "strings"	13 "strings"

14 "sync"	14 "sync"

15 "unicode/utf8"

16	15

17 "github.com/luci/luci-go/common/cmpbin"	16 "github.com/luci/luci-go/common/cmpbin"

18 )	17 )

19	18

20 // rowKeyBufferPool stores a pool of allocated Buffer instances for reuse when	19 // rowKeyBufferPool stores a pool of allocated Buffer instances for reuse when

21 // constructing row keys.	20 // constructing row keys.

22 var (	21 var (

23 // errMalformedRowKey is an error that is returned if the row key in the	22 // errMalformedRowKey is an error that is returned if the row key in the

24 // tables does not comform to our row key structure.	23 // tables does not comform to our row key structure.

25 errMalformedRowKey = errors.New("bigtable: malformed row key")	24 errMalformedRowKey = errors.New("bigtable: malformed row key")

26	25

27 // encodedPrefixSize is the size in bytes of the encoded row key prefix. All	26 // encodedPrefixSize is the size in bytes of the encoded row key prefix. All

28 // rows from the same stream path share this prefix.	27 // rows from the same stream path share this prefix.

29 » encodedPrefixSize = base64.URLEncoding.EncodedLen(sha256.Size) + len("~" )	28 » encodedPrefixSize = base64.URLEncoding.EncodedLen(sha256.Size)

30 // maxEncodedKeySize is the maximum size in bytes of a full row key.	29 // maxEncodedKeySize is the maximum size in bytes of a full row key.

31 » maxEncodedKeySize = encodedPrefixSize + hex.EncodedLen(cmpbin.MaxIntLen6 4)	30 » maxEncodedKeySize = encodedPrefixSize + (2 * (len("~") + hex.EncodedLen( cmpbin.MaxIntLen64)))
	Ryan Tseng 2016/04/13 21:38:06 The extra parens seem redundent The extra parens seem redundent dnj 2016/04/13 21:44:15 Acknowledged. Show quoted text On 2016/04/13 21:38:06, Ryan Tseng wrote: > The extra parens seem redundent Acknowledged.
32	31

33 rowKeyBufferPool = sync.Pool{	32 rowKeyBufferPool = sync.Pool{

34 New: func() interface{} {	33 New: func() interface{} {

35 return &rowKeyBuffers{}	34 return &rowKeyBuffers{}

36 },	35 },

37 }	36 }

38 )	37 )

39	38

40 type rowKeyBuffers struct {	39 type rowKeyBuffers struct {

41 // binBuf is a Buffer to write binary data for encoding.	40 // binBuf is a Buffer to write binary data for encoding.

(...skipping 15 matching lines...) Expand all Loading...
57 func (rkb *rowKeyBuffers) reset() {	56 func (rkb *rowKeyBuffers) reset() {

58 if rkb.key == nil {	57 if rkb.key == nil {

59 rkb.key = make([]byte, maxEncodedKeySize)	58 rkb.key = make([]byte, maxEncodedKeySize)

60 }	59 }

61 rkb.size = 0	60 rkb.size = 0

62 }	61 }

63	62

64 func (rkb *rowKeyBuffers) appendPathPrefix(pathHash []byte) {	63 func (rkb *rowKeyBuffers) appendPathPrefix(pathHash []byte) {

65 base64.URLEncoding.Encode(rkb.remaining(), pathHash)	64 base64.URLEncoding.Encode(rkb.remaining(), pathHash)

66 rkb.size += base64.URLEncoding.EncodedLen(len(pathHash))	65 rkb.size += base64.URLEncoding.EncodedLen(len(pathHash))

67 rkb.appendRune('~')

68 }	66 }

69	67

70 func (rkb *rowKeyBuffers) appendIndex(i int64) {	68 func (rkb *rowKeyBuffers) appendInt64(i int64) {

71 // Encode index to "cmpbin".	69 // Encode index to "cmpbin".

72 rkb.binBuf.Reset()	70 rkb.binBuf.Reset()

73 cmpbin.WriteInt(&rkb.binBuf, i)	71 cmpbin.WriteInt(&rkb.binBuf, i)

74	72

75 rkb.size += hex.Encode(rkb.remaining(), rkb.binBuf.Bytes())	73 rkb.size += hex.Encode(rkb.remaining(), rkb.binBuf.Bytes())

76 }	74 }

77	75

78 func (rkb *rowKeyBuffers) appendRune(r rune) {	76 func (rkb *rowKeyBuffers) appendBytes(d []byte) {

79 » rkb.size += utf8.EncodeRune(rkb.remaining(), r)	77 » rkb.size += copy(rkb.remaining(), d)

80 }	78 }

81	79

82 func (rkb *rowKeyBuffers) remaining() []byte {	80 func (rkb *rowKeyBuffers) remaining() []byte {

83 return rkb.key[rkb.size:]	81 return rkb.key[rkb.size:]

84 }	82 }

85	83

86 func (rkb *rowKeyBuffers) value() string {	84 func (rkb *rowKeyBuffers) value() string {

87 return string(rkb.key[:rkb.size])	85 return string(rkb.key[:rkb.size])

88 }	86 }

89	87

90 // rowKey is a BigTable row key.	88 // rowKey is a BigTable row key.

91 //	89 //

92 // The row key is formed from a Path and its Index. The goal:	90 // The row key is formed from a Path and its Index. The goal:

93 // - Rows with the same path should be clustered.	91 // - Rows with the same path should be clustered.

94 // - Rows with the same path should be sorted according to index.	92 // - Rows with the same path should be sorted according to index.

95 //	93 //

96 // Since BigTable rows must be valid UTF8, and since paths are effectively	94 // Since BigTable rows must be valid UTF8, and since paths are effectively

97 // unbounded, the row key will be formed by composing:	95 // unbounded, the row key will be formed by composing:

98 // [ base64(sha256(path)) ] + '~' + [ hex(cmpbin(index)) ]	96 //

	97 // [ base64(sha256(path)) ] + '~' + [ hex(cmpbin(index)) ] + '~' +

	98 // [hex(cmpbin(count)]

	99 //

	100 // NOTE: There is a "legacy" period of time when row keys will NOT include a

	101 // count. Since these sort before row keys with a count, row key order will be

	102 // maintained. These row keys will have a count value of "0".

99 type rowKey struct {	103 type rowKey struct {

100 pathHash []byte	104 pathHash []byte

101 index int64	105 index int64

	106 count int64

102 }	107 }

103	108

104 // newRowKey generates the row key matching a given entry path and index.	109 // newRowKey generates the row key matching a given entry path and index.

105 func newRowKey(path string, index int64) *rowKey {	110 func newRowKey(path string, index, count int64) *rowKey {

106 pathHash := sha256.Sum256([]byte(path))	111 pathHash := sha256.Sum256([]byte(path))

107 return &rowKey{	112 return &rowKey{

108 pathHash: pathHash[:],	113 pathHash: pathHash[:],

109 index: index,	114 index: index,

	115 count: count,

110 }	116 }

111 }	117 }

112	118

113 // decodeRowKey decodes an encoded row key into its structural components.	119 // decodeRowKey decodes an encoded row key into its structural components.

114 func decodeRowKey(v string) (*rowKey, error) {	120 func decodeRowKey(v string) (*rowKey, error) {

115 » keyParts := strings.SplitN(v, "~", 2)	121 » keyParts := strings.SplitN(v, "~", 3)

116 » if len(keyParts) != 2 {	122 » if len(keyParts) < 2 {

	123 » » // TODO: Make this force 3 once "legacy mode" is disabled.

117 return nil, errMalformedRowKey	124 return nil, errMalformedRowKey

118 }	125 }

119	126

120 hashEnc, idxEnc := keyParts[0], keyParts[1]	127 hashEnc, idxEnc := keyParts[0], keyParts[1]

121 if base64.URLEncoding.DecodedLen(len(hashEnc)) < sha256.Size {	128 if base64.URLEncoding.DecodedLen(len(hashEnc)) < sha256.Size {

122 return nil, errMalformedRowKey	129 return nil, errMalformedRowKey

123 }	130 }

124	131

125 // Decode encoded path hash.	132 // Decode encoded path hash.

126 var err error	133 var err error

127 rk := rowKey{}	134 rk := rowKey{}

128 rk.pathHash, err = base64.URLEncoding.DecodeString(hashEnc)	135 rk.pathHash, err = base64.URLEncoding.DecodeString(hashEnc)

129 if err != nil {	136 if err != nil {

130 return nil, errMalformedRowKey	137 return nil, errMalformedRowKey

131 }	138 }

132	139

133 // Decode index.	140 // Decode index.

134 » idxBytes, err := hex.DecodeString(idxEnc)	141 » rk.index, err = readHexInt64(idxEnc)

135 if err != nil {	142 if err != nil {

136 » » return nil, errMalformedRowKey	143 » » return nil, err

137 }	144 }

138	145

139 » dr := bytes.NewReader(idxBytes)	146 » // If a count is available, decode that as well.

140 » index, _, err := cmpbin.ReadInt(dr)	147 » if len(keyParts) == 3 {

141 » if err != nil {	148 » » rk.count, err = readHexInt64(keyParts[2])

142 » » return nil, errMalformedRowKey	149 » » if err != nil {

	150 » » » return nil, err

	151 » » }

143 }	152 }

144 rk.index = index

145	153

146 // There should be no more data.

147 if dr.Len() > 0 {

148 return nil, errMalformedRowKey

149 }

150 return &rk, nil	154 return &rk, nil

151 }	155 }

152	156

153 func (rk *rowKey) String() string {	157 func (rk *rowKey) String() string {

154 return rk.encode()	158 return rk.encode()

155 }	159 }

156	160

157 // newRowKey instantiates a new rowKey from its components.	161 // newRowKey instantiates a new rowKey from its components.

158 func (rk *rowKey) encode() (v string) {	162 func (rk *rowKey) encode() (v string) {

159 // Write the final key to "key": (base64(HASH)~hex(INDEX))	163 // Write the final key to "key": (base64(HASH)~hex(INDEX))

160 withRowKeyBuffers(func(rkb *rowKeyBuffers) {	164 withRowKeyBuffers(func(rkb *rowKeyBuffers) {

161 rkb.appendPathPrefix(rk.pathHash)	165 rkb.appendPathPrefix(rk.pathHash)

162 » » rkb.appendIndex(rk.index)	166 » » rkb.appendBytes([]byte("~"))

	167 » » rkb.appendInt64(rk.index)

	168 » » if rk.count > 0 {

	169 » » » rkb.appendBytes([]byte("~"))

	170 » » » rkb.appendInt64(rk.count)

	171 » » }

163 v = rkb.value()	172 v = rkb.value()

164 })	173 })

165 return	174 return

166 }	175 }

167	176

168 // prefix returns the encoded path prefix for the row key.	177 // prefix returns the encoded path prefix for the row key.

169 func (rk *rowKey) pathPrefix() (v string) {	178 func (rk *rowKey) pathPrefix() (v string) {

170 withRowKeyBuffers(func(rkb *rowKeyBuffers) {	179 withRowKeyBuffers(func(rkb *rowKeyBuffers) {

171 rkb.appendPathPrefix(rk.pathHash)	180 rkb.appendPathPrefix(rk.pathHash)

	181 rkb.appendBytes([]byte("~"))

172 v = rkb.value()	182 v = rkb.value()

173 })	183 })

174 return	184 return

175 }	185 }

176	186

177 // pathPrefixUpperBound returns the path prefix that is higher than any path	187 // pathPrefixUpperBound returns the path prefix that is higher than any path

178 // allowed in the row key space.	188 // allowed in the row key space.

179 //	189 //

180 // This is accomplished by appending a "~" character to the path prefix,	190 // This is accomplished by appending a "~" character to the path prefix,

181 // creating something like this:	191 // creating something like this:

182 //	192 //

183 // prefix~~	193 // prefix~~

184 //	194 //

185 // The "prefix~" is shared with all keys in "rk", but the extra "~" is larger	195 // The "prefix~" is shared with all keys in "rk", but the extra "~" is larger

186 // than any hex-encoded row index, so this key will always be larger.	196 // than any hex-encoded row index, so this key will always be larger.

187 func (rk *rowKey) pathPrefixUpperBound() (v string) {	197 func (rk *rowKey) pathPrefixUpperBound() (v string) {

188 withRowKeyBuffers(func(rkb *rowKeyBuffers) {	198 withRowKeyBuffers(func(rkb *rowKeyBuffers) {

189 rkb.appendPathPrefix(rk.pathHash)	199 rkb.appendPathPrefix(rk.pathHash)

190 » » rkb.appendRune('~')	200 » » rkb.appendBytes([]byte("~~"))

191 v = rkb.value()	201 v = rkb.value()

192 })	202 })

193 return	203 return

194 }	204 }

195	205

196 // sharesPrefixWith tests if the "path" component of the row key "rk" matches	206 // sharesPrefixWith tests if the "path" component of the row key "rk" matches

197 // the "path" component of "o".	207 // the "path" component of "o".

198 func (rk rowKey) sharesPathWith(o rowKey) bool {	208 func (rk rowKey) sharesPathWith(o rowKey) bool {

199 return bytes.Equal(rk.pathHash, o.pathHash)	209 return bytes.Equal(rk.pathHash, o.pathHash)

200 }	210 }

	211

	212 func readHexInt64(v string) (int64, error) {

	213 d, err := hex.DecodeString(v)

	214 if err != nil {

	215 return 0, errMalformedRowKey

	216 }

	217

	218 dr := bytes.NewReader(d)

	219 value, _, err := cmpbin.ReadInt(dr)

	220 if err != nil {

	221 return 0, errMalformedRowKey

	222 }

	223

	224 // There should be no more data.

	225 if dr.Len() > 0 {

	226 return 0, errMalformedRowKey

	227 }

	228

	229 return value, nil

	230 }

OLD	NEW

« no previous file with comments | « server/logdog/storage/bigtable/bigtable_test.go ('k') | server/logdog/storage/bigtable/rowKey_test.go » ('j') | no next file with comments »