Chromium Code Reviews| Index: go/src/infra/libs/git/tree.go |
| diff --git a/go/src/infra/libs/git/tree.go b/go/src/infra/libs/git/tree.go |
| new file mode 100644 |
| index 0000000000000000000000000000000000000000..1022022018a15907e2598eec525fb74fa20afc48 |
| --- /dev/null |
| +++ b/go/src/infra/libs/git/tree.go |
| @@ -0,0 +1,343 @@ |
| +// Copyright 2014 The Chromium Authors. All rights reserved. |
| +// Use of this source code is governed by a BSD-style license that can be |
| +// found in the LICENSE file. |
| +package git |
| + |
| +import ( |
| + "bytes" |
| + "fmt" |
| + "sort" |
| + "strconv" |
| + "strings" |
| + "sync" |
| + |
| + "infra/libs/infra_util" |
| +) |
| + |
| +// Mode is a typed representation of git's tree mode concept |
| +type Mode int |
|
M-A Ruel
2014/10/21 00:55:55
Mode is maybe a tad too generic. But I see that it
|
| + |
| +// Type returns a Object.Type() compatible string for this mode. |
| +func (m Mode) Type() ObjectType { |
| + switch { |
| + case m == 0040000: |
| + return TreeType |
| + case m == 0160000: |
| + return CommitType |
| + case (m & 0100000) != 0: |
| + return BlobType |
| + default: |
| + panic(fmt.Sprintf("unknown type for mode %d", m)) |
|
M-A Ruel
2014/10/21 00:55:55
fmt.Errorf()
but I don't like that input data can
|
| + } |
| +} |
| + |
| +func (m Mode) String() string { return fmt.Sprintf("%06o", m) } |
| + |
| +// Child is an entry in a Tree. It ties a Mode to an Object. |
| +type Child struct { |
| + Object Object |
| + Mode Mode |
| +} |
| + |
| +func (c Child) String() string { |
| + return fmt.Sprintf("Child(%s, %s)", c.Object, c.Mode) |
| +} |
| + |
| +// Tree is a true tree which represents a git 'tree' of objects, and can |
| +// represent those objects recursively (unlike a true git 'tree'). |
| +// |
| +// Trees are also mutable, so you can manipulate them with impunity. Note that |
| +// ID() is not continuously updated, and you must call Rehash in order to |
| +// recalculate the correct ID() after manipulating the Tree. |
| +type Tree struct { |
| + id ObjectID |
| + cachedRawString *string |
| + |
| + children map[string]*Child |
| +} |
| + |
| +func (t *Tree) String() string { |
| + return fmt.Sprintf("Tree(%s, %v)", t.id, t.children) |
| +} |
| + |
| +// ID returns the last-calculated ObjectId of this tree. Note that this ID() |
| +// may be out-of-date if you have modified this Tree with {Set,Del}Child(). |
| +func (t *Tree) ID() ObjectID { return t.id } |
|
M-A Ruel
2014/10/21 00:55:55
Why not call Rehash automatically?
iannucci
2016/05/23 21:53:43
Because if it's an empty tree, rehashing it would
|
| + |
| +// Type returns TreeType to satisfy the Object interface |
| +func (t *Tree) Type() ObjectType { return TreeType } |
| + |
| +// Complete indicates that this tree's ID() matches its contents. It can |
| +// become false if you call DelChild or SetChild on this Tree. |
| +func (t *Tree) Complete() bool { return t.cachedRawString != nil } |
| + |
| +// NumChildren returns the number of immediate children this tree has. |
| +func (t *Tree) NumChildren() int { return len(t.children) } |
| + |
| +// Intern recursively interns this tree into the given repo. |
| +// |
| +// If the tree contains a missing object which is internable (i.g. not |
| +// EmptyObject), it will also be interned. |
| +// |
| +// If the tree contains an object with an ID of NoID, this method will panic. |
| +// |
| +// If the tree contains a missing object which is not internable, this method |
| +// will panic (see InternAllowMissing to negate this condition). |
| +// |
| +// This method has the side effect of causing all internable but !Complete() |
| +// objects to become Complete(). In particular, this means that the value of |
| +// t.ID() will be correct after calling this. |
| +func (t *Tree) Intern(r *Repo) { |
| + t.intern(r, false) |
| +} |
| + |
| +// InternAllowMissing is identical to Intern except that it will not panic |
| +// on missing non-internable objects, but just treats them |
| +func (t *Tree) InternAllowMissing(r *Repo) { |
| + t.intern(r, true) |
| +} |
| + |
| +// Rehash causes an !Complete() tree to become Complete, and resets ID() to |
| +// match the Tree's current state. |
| +func (t *Tree) Rehash() { |
| + if !t.Complete() { |
| + t.RawString() |
| + } |
| +} |
| + |
| +// NewEmptyTree creates a new Tree with the given |id|, but no children (e.g. |
| +// it is !Complete()). You only want to use this if you know that you're going |
| +// to be adding children to the Tree, otherwise use EmptyObject(TreeType, id). |
| +func NewEmptyTree(id ObjectID, childrenHint int) *Tree { |
| + return &Tree{ |
| + id: id, |
| + children: make(map[string]*Child, childrenHint), |
| + } |
| +} |
| + |
| +// NewEmptyChild returns a Child containing an EmptyObject of id. |
| +func NewEmptyChild(mode Mode, id ObjectID) *Child { |
| + return &Child{ |
| + NewEmptyObject(mode.Type(), id), |
| + mode, |
| + } |
| +} |
| + |
| +// TreeFromText returns a Complete() Tree for a `git ls-tree`-formatted |
|
M-A Ruel
2014/10/21 00:55:55
Note that there could be cases where it doesn't fi
iannucci
2016/05/23 21:53:43
The thought crossed my mind, but for now I think i
|
| +// tree string. |
| +func TreeFromText(data []byte) (ret *Tree, err error) { |
| + lines := strings.Split(strings.TrimSpace(string(data)), "\n") |
| + retPtr := NewEmptyTree(NoID, len(lines)) |
| + for _, line := range lines { |
| + infoPath := strings.SplitN(line, "\t", 2) |
| + var path string |
| + if infoPath[1][0] == '"' { |
| + path, err = strconv.Unquote(infoPath[1]) |
| + if err != nil { |
| + return |
| + } |
| + } else { |
| + path = infoPath[1] |
| + } |
| + |
| + modeTypeID := strings.Fields(infoPath[0]) |
| + var mode uint64 |
| + mode, err = strconv.ParseUint(modeTypeID[0], 8, 32) |
| + if err != nil { |
| + return |
| + } |
| + |
| + retPtr.SetChild(path, NewEmptyChild(Mode(mode), MakeObjectID(modeTypeID[2]))) |
| + } |
| + retPtr.Rehash() |
| + ret = retPtr |
| + return |
| +} |
| + |
| +// TreeFromRawWithID trusts id, and will return a Complete() *Tree. Data is |
| +// formatted the same as RawString (i.e. in the raw git tree format). |
| +func TreeFromRawWithID(id ObjectID, data []byte) (ret *Tree, err error) { |
| + retPtr := NewEmptyTree(id, 16) |
| + // <mode> SP <name> NUL [20]byte |
| + buf := bytes.NewBuffer(data) |
| + nom := infra_util.Nom(buf) |
| + yoink := infra_util.Yoink(buf) |
| + for buf.Len() != 0 { |
| + var mode uint64 |
| + mode, err = strconv.ParseUint(nom(' '), 8, 64) |
| + if err != nil { |
| + return |
| + } |
| + name := nom('\x00') |
| + id := MakeObjectIDRaw(yoink(20)) |
| + retPtr.SetChild(name, NewEmptyChild(Mode(int(mode)), id)) |
| + } |
| + s := string(data) |
| + retPtr.cachedRawString = &s |
| + ret = retPtr |
| + return |
| +} |
| + |
| +// GetChild returns the Child at the slash-delemited |path| relative to this |
| +// Tree, or nil (if the child was not found). |
| +func (t *Tree) GetChild(path string) *Child { |
| + pathParts := strings.SplitN(path, "/", 2) |
| + if len(pathParts) == 1 { |
| + return t.children[path] |
| + } |
| + |
| + dir, rest := pathParts[0], pathParts[1] |
| + intermediate, exists := t.children[dir] |
| + if !exists { |
| + panic(fmt.Errorf("intermediate child %s doesn't exist", dir)) |
|
M-A Ruel
2014/10/21 00:55:55
return an error, it's not panic worthy.
|
| + } |
| + |
| + if childTree, ok := intermediate.Object.(*Tree); !ok { |
| + panic(fmt.Errorf("entry path is not a tree: %s", dir)) |
| + } else { |
| + return childTree.GetChild(rest) |
| + } |
| +} |
| + |
| +// SetChild adds |child| to this Tree at the given slash-delimited |path|. It |
| +// will create intermediate empty Trees if necessary, but will panic() if you |
| +// attempt to set the child under something which is not a tree. |
| +func (t *Tree) SetChild(path string, child *Child) { |
| + t.cachedRawString = nil |
| + pathParts := strings.SplitN(path, "/", 2) |
| + if len(pathParts) == 1 { |
| + t.children[path] = child |
| + } else { |
| + dir, rest := pathParts[0], pathParts[1] |
| + intermediate, exists := t.children[dir] |
| + if !exists { |
| + intermediate = NewEmptyChild(Mode(040000), NoID) |
| + t.children[dir] = intermediate |
| + } |
| + |
| + if childTree, ok := intermediate.Object.(*Tree); !ok { |
| + panic("entry path is not a tree: " + dir) |
| + } else { |
| + childTree.SetChild(rest, child) |
| + } |
| + } |
| +} |
| + |
| +// DelChild removes the child at the slash-delimited |path|. Will panic if |
| +// |path| does not exist, or if one of the intermediate paths is not a Tree. |
| +func (t *Tree) DelChild(path string) { |
| + t.cachedRawString = nil |
| + pathParts := strings.SplitN(path, "/", 2) |
| + if len(pathParts) == 1 { |
| + if _, has := t.children[path]; !has { |
| + panic("entry path does not exist: " + path) |
| + } |
| + delete(t.children, path) |
| + } else { |
| + dir, rest := pathParts[0], pathParts[1] |
| + child, exists := t.children[dir] |
| + if !exists { |
| + panic("entry path does not exist: " + dir) |
| + } |
| + |
| + if childTree, ok := child.Object.(*Tree); !ok { |
| + panic("etnry path is not a tree: " + dir) |
| + } else { |
| + childTree.DelChild(rest) |
| + if childTree.NumChildren() == 0 { |
| + delete(t.children, dir) |
| + } |
| + } |
| + } |
| +} |
| + |
| +// RawString returns a git hash-object compatible representation of this Tree, |
| +// according to the currently-set children (e.g. it may return something which |
| +// does not match ID). |
| +func (t *Tree) RawString() string { |
| + if t.cachedRawString == nil { |
|
M-A Ruel
2014/10/21 00:55:55
It's not necessary that it is a pointer, because i
iannucci
2016/05/23 21:53:43
Except that it /can/ be "" :).
$ git hash-object
|
| + buf := &bytes.Buffer{} |
| + |
| + entries := make(treeEntries, 0, len(t.children)) |
| + for path, child := range t.children { |
| + entries = append(entries, treeEntry{ |
| + id: child.Object.ID(), |
| + mode: child.Mode, |
| + name: path, |
| + }) |
| + } |
| + |
| + sort.Sort(entries) |
| + |
| + for _, ent := range entries { |
| + fmt.Fprintf(buf, "%o %s\x00%s", ent.mode, ent.name, ent.id.RawString()) |
| + } |
| + s := buf.String() |
| + t.id = MakeObjectIDForData(TreeType, buf.Bytes()) |
| + t.cachedRawString = &s |
| + } |
| + return *t.cachedRawString |
| +} |
| + |
| +/// Private |
|
M-A Ruel
2014/10/21 00:55:54
assuming is a file block section, add an empty lin
iannucci
2016/05/23 21:53:43
Done.
|
| +type treeEntry struct { |
| + id ObjectID |
| + mode Mode |
| + name string |
| +} |
| +type treeEntries []treeEntry |
| + |
| +func (s treeEntries) Len() int { return len(s) } |
| + |
| +// Extra trailing slash is how git sorts the tree objects too. |
| +func (s treeEntries) Less(i, j int) bool { |
| + a, b := s[i].name, s[j].name |
| + |
| + if s[i].mode.Type() == TreeType { |
| + a += "/" |
| + } |
| + if s[j].mode.Type() == TreeType { |
| + b += "/" |
| + } |
| + |
| + return a < b |
| +} |
| +func (s treeEntries) Swap(i, j int) { s[i], s[j] = s[j], s[i] } |
| + |
| +// Interns the Tree recursively. If allowMissing is true, this will not panic |
|
M-A Ruel
2014/10/21 00:55:55
// intern interns the ..
|
| +// when the repo is missing an object which is non-internable in this tree (i.e. |
| +// if you have an EmptyObject in the Tree, and the repo doesn't contain the |
| +// real object). |
| +func (t *Tree) intern(r *Repo, allowMissing bool) { |
| + if t.Complete() && r.HasObjectID(t.ID()) { |
| + return |
| + } |
| + grp := sync.WaitGroup{} |
| + for _, c := range t.children { |
| + c := c |
| + grp.Add(1) |
| + go func() { |
| + defer grp.Done() |
| + switch x := c.Object.(type) { |
| + case *Tree: |
| + x.intern(r, allowMissing) |
| + case InternableObject: |
| + r.Intern(x) |
| + case Object: |
| + if !(r.HasObjectID(x.ID()) || allowMissing) { |
| + panic(fmt.Errorf("missing non-internable object: %v", x)) |
| + } |
| + if x.ID() == NoID { |
| + panic(fmt.Errorf("object has an ID of <NoID>")) |
| + } |
| + } |
| + }() |
| + } |
| + grp.Wait() |
| + if !(t.ID() != NoID && t.NumChildren() == 0) { |
| + // Having an ID but no children is a special case. Because of how DelChild |
| + // cleans up empty trees, you can't have an empty subtree, unless you |
| + // intentionally put an empty tree there with an ID. |
| + r.Intern(t) |
|
M-A Ruel
2014/10/21 00:55:55
What about a directory containing a directory that
iannucci
2016/05/23 21:53:43
It's not a standard thing in git. You can contrive
|
| + } |
| +} |