Chromium Code Reviews| Index: go/src/infra/libs/git/tree.go |
| diff --git a/go/src/infra/libs/git/tree.go b/go/src/infra/libs/git/tree.go |
| new file mode 100644 |
| index 0000000000000000000000000000000000000000..eb29407cf41db24270a5b123213f27695c902ab2 |
| --- /dev/null |
| +++ b/go/src/infra/libs/git/tree.go |
| @@ -0,0 +1,297 @@ |
| +package git |
| + |
| +import "bytes" |
| +import "fmt" |
| +import "sort" |
| +import "strconv" |
| +import "strings" |
| +import "sync" |
| + |
| +import "infra/libs/infra_util" |
| + |
| +// Tree is a true tree which represents a git 'tree' of objects, and can |
| +// represent those objects recursively (unlike a true git 'tree'). |
| +// |
| +// Trees are also mutable, so you can manipulate them with impunity. Note that |
| +// ID() is not continuously updated, and you must call Rehash in order to |
| +// recalculate the correct ID() after manipulating the Tree. |
| +type Tree struct { |
| + id ObjectID |
| + cachedRawString *string |
|
M-A Ruel
2014/10/18 00:47:06
You could use a string (non pointer) and just do t
iannucci
2014/10/20 21:11:57
It's possible to have an empty tree. I need to dis
|
| + |
| + children map[string]*Child |
| +} |
| + |
| +func (t *Tree) String() string { |
| + return fmt.Sprintf("Tree(%s, %v)", t.id, t.children) |
| +} |
| + |
| +// ID returns the last-calculated ObjectId of this tree. Note that this ID() |
| +// may be out-of-date if you have modified this Tree with {Set,Del}Child(). |
| +func (t *Tree) ID() ObjectID { return t.id } |
| + |
| +// Type returns "tree" to satisfy the Object interface |
| +func (t *Tree) Type() string { return "tree" } |
| + |
| +// Complete indicates that this tree's ID() matches its contents. It can |
| +// become false if you call DelChild or SetChild on this Tree. |
| +func (t *Tree) Complete() bool { return t.cachedRawString != nil } |
| + |
| +// NumChildren returns the number of immediate children this tree has. |
| +func (t *Tree) NumChildren() int { return len(t.children) } |
| + |
| +// Intern recursively interns this tree into the given repo. |
| +// |
| +// If the tree contains a missing object which is internable (i.g. not |
| +// EmptyObject), it will also be interned. |
| +// |
| +// If the tree contains an object with an ID of NoID, this method will panic. |
| +// |
| +// If the tree contains a missing object which is not internable, this method |
| +// will panic (see InternAllowMissing to negate this condition). |
| +// |
| +// This method has the side effect of causing all internable but !Complete() |
| +// objects to become Complete(). In particular, this means that the value of |
| +// t.ID() will be correct after calling this. |
| +func (t *Tree) Intern(r *Repo) { |
| + t.intern(r, false) |
| +} |
| + |
| +// InternAllowMissing is identical to Intern except that it will not panic |
| +// on missing non-internable objects, but just treats them |
| +func (t *Tree) InternAllowMissing(r *Repo) { |
| + t.intern(r, true) |
| +} |
| + |
| +// Rehash causes an !Complete() tree to become Complete, and resets ID() to |
| +// match the Tree's current state. |
| +func (t *Tree) Rehash() { |
| + if !t.Complete() { |
| + t.RawString() |
| + } |
| +} |
| + |
| +// NewEmptyTree creates a new Tree with the given |id|, but no children (e.g. |
| +// it is !Complete()). You only want to use this if you know that you're going |
| +// to be adding children to the Tree, otherwise use EmptyObject("tree", id). |
| +func NewEmptyTree(id ObjectID, childrenHint int) *Tree { |
| + return &Tree{ |
| + id: id, |
| + children: make(map[string]*Child, childrenHint), |
| + } |
| +} |
| + |
| +// TreeFromText returns a Complete() Tree for a `git ls-tree`-formatted |
| +// tree string. |
| +func TreeFromText(data []byte) (ret *Tree, err error) { |
| + lines := strings.Split(strings.TrimSpace(string(data)), "\n") |
| + retPtr := NewEmptyTree(NoID, len(lines)) |
| + for _, line := range lines { |
| + infoPath := strings.SplitN(line, "\t", 2) |
| + var path string |
| + if infoPath[1][0] == '"' { |
| + path, err = strconv.Unquote(infoPath[1]) |
| + if err != nil { |
| + return |
| + } |
| + } else { |
| + path = infoPath[1] |
| + } |
| + |
| + modeTypeID := strings.Fields(infoPath[0]) |
| + var mode uint64 |
| + mode, err = strconv.ParseUint(modeTypeID[0], 8, 32) |
| + if err != nil { |
| + return |
| + } |
| + |
| + retPtr.SetChild(path, NewEmptyChild(Mode(mode), MakeObjectID(modeTypeID[2]))) |
| + } |
| + retPtr.Rehash() |
| + ret = retPtr |
| + return |
| +} |
| + |
| +// TreeFromRawWithID trusts id, and will return a Complete() *Tree. Data is |
| +// formatted the same as RawString (i.e. in the raw git tree format). |
| +func TreeFromRawWithID(id ObjectID, data []byte) (ret *Tree, err error) { |
| + retPtr := NewEmptyTree(id, 16) |
| + // <mode> SP <name> NUL [20]byte |
| + buf := bytes.NewBuffer(data) |
| + nom := infra_util.Nom(buf) |
| + yoink := infra_util.Yoink(buf) |
| + for buf.Len() != 0 { |
| + var mode uint64 |
| + mode, err = strconv.ParseUint(nom(' '), 8, 64) |
| + if err != nil { |
| + return |
| + } |
| + name := nom('\x00') |
| + id := MakeObjectIDRaw(yoink(20)) |
| + retPtr.SetChild(name, NewEmptyChild(Mode(int(mode)), id)) |
| + } |
| + s := string(data) |
| + retPtr.cachedRawString = &s |
| + ret = retPtr |
| + return |
| +} |
| + |
| +// GetChild returns the Child at the slash-delemited |path| relative to this |
| +// Tree, or nil (if the child was not found). |
| +func (t *Tree) GetChild(path string) *Child { |
| + pathParts := strings.SplitN(path, "/", 2) |
| + if len(pathParts) == 1 { |
| + return t.children[path] |
| + } |
| + |
| + dir, rest := pathParts[0], pathParts[1] |
| + intermediate, exists := t.children[dir] |
| + if !exists { |
| + panic(fmt.Errorf("intermediate child %s doesn't exist", dir)) |
| + } |
| + |
| + if childTree, ok := intermediate.Object.(*Tree); !ok { |
| + panic(fmt.Errorf("entry path is not a tree: %s", dir)) |
| + } else { |
| + return childTree.GetChild(rest) |
| + } |
| +} |
| + |
| +// SetChild adds |child| to this Tree at the given slash-delimited |path|. It |
| +// will create intermediate empty Trees if necessary, but will panic() if you |
| +// attempt to set the child under something which is not a tree. |
| +func (t *Tree) SetChild(path string, child *Child) { |
| + t.cachedRawString = nil |
| + pathParts := strings.SplitN(path, "/", 2) |
| + if len(pathParts) == 1 { |
| + t.children[path] = child |
| + } else { |
| + dir, rest := pathParts[0], pathParts[1] |
| + intermediate, exists := t.children[dir] |
| + if !exists { |
| + intermediate = NewEmptyChild(Mode(040000), NoID) |
| + t.children[dir] = intermediate |
| + } |
| + |
| + if childTree, ok := intermediate.Object.(*Tree); !ok { |
| + panic("entry path is not a tree: " + dir) |
| + } else { |
| + childTree.SetChild(rest, child) |
| + } |
| + } |
| +} |
| + |
| +// DelChild removes the child at the slash-delimited |path|. Will panic if |
| +// |path| does not exist, or if one of the intermediate paths is not a Tree. |
| +func (t *Tree) DelChild(path string) { |
| + t.cachedRawString = nil |
| + pathParts := strings.SplitN(path, "/", 2) |
| + if len(pathParts) == 1 { |
| + if _, has := t.children[path]; !has { |
| + panic("entry path does not exist: " + path) |
| + } |
| + delete(t.children, path) |
| + } else { |
| + dir, rest := pathParts[0], pathParts[1] |
| + child, exists := t.children[dir] |
| + if !exists { |
| + panic("entry path does not exist: " + dir) |
| + } |
| + |
| + if childTree, ok := child.Object.(*Tree); !ok { |
| + panic("etnry path is not a tree: " + dir) |
| + } else { |
| + childTree.DelChild(rest) |
| + if childTree.NumChildren() == 0 { |
| + delete(t.children, dir) |
| + } |
| + } |
| + } |
| +} |
| + |
| +// RawString returns a git hash-object compatible representation of this Tree, |
| +// according to the currently-set children (e.g. it may return something which |
| +// does not match ID). |
| +func (t *Tree) RawString() string { |
| + if t.cachedRawString == nil { |
| + buf := &bytes.Buffer{} |
| + |
| + entries := make(treeEntries, 0, len(t.children)) |
| + for path, child := range t.children { |
| + entries = append(entries, treeEntry{ |
| + id: child.Object.ID(), |
| + mode: child.Mode, |
| + name: path, |
| + }) |
| + } |
| + |
| + sort.Sort(entries) |
| + |
| + for _, ent := range entries { |
| + fmt.Fprintf(buf, "%o %s\x00%s", ent.mode, ent.name, ent.id.RawString()) |
| + } |
| + s := buf.String() |
| + t.id = MakeObjectIDForData("tree", buf.Bytes()) |
| + t.cachedRawString = &s |
| + } |
| + return *t.cachedRawString |
| +} |
| + |
| +/// Private |
| +type treeEntry struct { |
| + id ObjectID |
| + mode Mode |
| + name string |
| +} |
| +type treeEntries []treeEntry |
| + |
| +func (s treeEntries) Len() int { return len(s) } |
| + |
| +// Extra trailing slash is how git sorts the tree objects too. |
| +func (s treeEntries) Less(i, j int) bool { |
| + a, b := s[i].name, s[j].name |
| + |
| + if s[i].mode.Type() == "tree" { |
| + a += "/" |
| + } |
| + if s[j].mode.Type() == "tree" { |
| + b += "/" |
| + } |
| + |
| + return a < b |
| +} |
|
M-A Ruel
2014/10/18 00:47:06
Spacing
iannucci
2014/10/20 21:11:57
It's what gofmt does. What did you have in mind?
M-A Ruel
2014/10/21 00:55:53
gofmt allows both no empty line and one empty line
|
| +func (s treeEntries) Swap(i, j int) { s[i], s[j] = s[j], s[i] } |
| + |
| +func (t *Tree) intern(r *Repo, allowMissing bool) { |
|
M-A Ruel
2014/10/18 00:47:06
Can you add a short comment about what it means?
iannucci
2014/10/20 21:11:57
Done.
|
| + if t.Complete() && r.HasObjectID(t.ID()) { |
| + return |
| + } |
| + grp := sync.WaitGroup{} |
| + for _, c := range t.children { |
| + c := c |
|
M-A Ruel
2014/10/18 00:47:06
?
iannucci
2014/10/20 21:11:58
Again, avoids loop variable aliasing. See bottom o
|
| + grp.Add(1) |
| + go func() { |
| + defer grp.Done() |
| + switch x := c.Object.(type) { |
| + case *Tree: |
| + x.intern(r, allowMissing) |
| + case InternableObject: |
| + r.Intern(x) |
| + case Object: |
| + if !(r.HasObjectID(x.ID()) || allowMissing) { |
| + panic(fmt.Errorf("missing non-internable object: %v", x)) |
| + } |
| + if x.ID() == NoID { |
| + panic(fmt.Errorf("object has an ID of <NoID>")) |
| + } |
| + } |
| + }() |
| + } |
| + grp.Wait() |
| + if !(t.ID() != NoID && t.NumChildren() == 0) { |
| + // Having an ID but no children is a special case. Because of how DelChild |
| + // cleans up empty trees, you can't have an empty subtree, unless you |
| + // intentionally put an empty tree there with an ID. |
| + r.Intern(t) |
| + } |
| +} |