Chromium Code Reviews| Index: go/src/infra/libs/git/repo.go |
| diff --git a/go/src/infra/libs/git/repo.go b/go/src/infra/libs/git/repo.go |
| new file mode 100644 |
| index 0000000000000000000000000000000000000000..23029efc927219d79199eda1bde4e3399e7e3750 |
| --- /dev/null |
| +++ b/go/src/infra/libs/git/repo.go |
| @@ -0,0 +1,423 @@ |
| +// Copyright 2014 The Chromium Authors. All rights reserved. |
| +// Use of this source code is governed by a BSD-style license that can be |
| +// found in the LICENSE file. |
| +package git |
| + |
| +import ( |
| + "bufio" |
| + "bytes" |
| + "fmt" |
| + "os/exec" |
| + "strconv" |
| + "strings" |
| + "sync" |
| + "sync/atomic" |
| + "unsafe" |
| + |
| + "infra/libs/infra_util" |
| +) |
| + |
| +type blobOpt bool |
| + |
| +const ( |
| + WithBlobs blobOpt = true |
| + NoBlobs = false |
|
M-A Ruel
2014/10/21 00:55:54
If the type is not exported, the consts should not
iannucci
2016/05/23 21:53:42
Why not? I don't want them constructing their own
|
| +) |
| + |
| +type fullTree bool |
| + |
| +const ( |
| + FullTree fullTree = true |
| + MissingOK = false |
| +) |
| + |
| +// Repo represents a local git repository at the path |Path|. |
| +type Repo struct { |
| + // The path on disk to the location of the git repo. |
| + Path string |
| + |
| + catFile *chan<- catFileRequest |
|
M-A Ruel
2014/10/21 00:55:54
I still don't understand why it's a pointer. Creat
|
| + catFileCheck *chan<- catFileRequest |
| +} |
| + |
| +// TreeDiff represents the difference between two Treeish objects |
| +type TreeDiff []TreeDiffEntry |
| + |
| +// TreeDiffEntry represents the before and after of one path in the repo. |
| +// Note that the Old.Name and New.Name may be different if this item was |
| +// Moved or Copied. |
| +type TreeDiffEntry struct { |
| + // Actino is one of "ACDMRTUX" |
|
M-A Ruel
2014/10/21 00:55:54
Action
iannucci
2016/05/23 21:53:42
oops. done.
|
| + // U is for unmerged... if you're just comparing trees you should never see this |
| + // X is probably a bug in git... you should also never see this. |
| + // T is a type change, so if a tree turned into a blob, for example |
| + Action string |
| + |
| + // For Action types 'R' or 'C', what percentage (from 0-100) are the old and |
| + // new blobs similar. |
| + Similarity int |
| + |
| + Old TreeDiffEntryHalf |
| + New TreeDiffEntryHalf |
| +} |
| + |
| +// TreeDiffEntryHalf is one entry in a TreeDiffEntry, either the Old or New half. |
| +type TreeDiffEntryHalf struct { |
| + Child |
| + Name string |
| +} |
| + |
| +func (t *TreeDiffEntryHalf) String() string { |
| + return fmt.Sprintf("%s: %s", t.Name, t.Child) |
| +} |
| + |
| +// InternableObject is an Object which may be interned into a git repo |
| +// (e.g. hash-object'd). EmptyObjects are NOT InternableObjects. |
| +type InternableObject interface { |
| + Object |
| + |
| + // A hash-object compatible string. May panic if this is an Object which is |
| + // !Complete(). Note that for Trees this does NOT imply that all of the |
| + // Tree's children are Complete(). |
| + RawString() string |
|
M-A Ruel
2014/10/21 00:55:54
As with the other part, I think []byte is a bit mo
iannucci
2016/05/23 21:53:42
but then I have to copy all that data or ppl could
|
| +} |
| + |
| +// RunInput runs a git command in the Repo, passing input on stdin, and returning |
| +// the stdout and success (i.e. did the git command return 0?) |
| +func (r *Repo) RunInput(input string, cmd ...string) (string, bool) { |
|
M-A Ruel
2014/10/21 00:55:54
That's an example of a function that could accept
iannucci
2016/05/23 21:53:42
I'm not sure why that would be helpful though, sin
|
| + ex := exec.Command("git", cmd...) |
| + ex.Dir = r.Path |
| + ex.Stdin = bytes.NewBufferString(input) |
| + out, err := ex.Output() |
| + if err == nil { |
| + return string(out), true |
| + } else if _, ok := err.(*exec.ExitError); ok { |
| + return string(out), false |
| + } else { |
| + panic(err) |
|
Vadim Sh.
2014/10/21 15:26:59
when this can happen?
|
| + } |
| +} |
| + |
| +// Run runs a git command in the Repo, with no input, returning the stdout and |
| +// success. |
| +func (r *Repo) Run(cmd ...string) (string, bool) { |
| + return r.RunInput("", cmd...) |
| +} |
| + |
| +// RunOk runs a git command in the repo with no input, and returns its success |
| +// (did it exit 0?) |
| +func (r *Repo) RunOk(cmd ...string) bool { |
| + _, ok := r.Run(cmd...) |
| + return ok |
| +} |
| + |
| +// RunOutput runs a git command in the repo with no input, and returns its |
| +// stdout |
| +func (r *Repo) RunOutput(cmd ...string) string { |
| + out, _ := r.Run(cmd...) |
| + return out |
| +} |
| + |
| +// GetObject will asynchronously fetch |objectish| from the Repo, and |
| +// return a channel for the result. If the object is missing, GetObject will |
|
Vadim Sh.
2014/10/21 15:27:00
I don't see it returning a channel... Also it's bl
|
| +// push nil to the channel. |
| +func (r *Repo) GetObject(objectish string) InternableObject { |
| + r.ensureCatFileServer(&r.catFile, false) |
| + |
| + rchan := make(chan *catFileReply, 1) |
|
Vadim Sh.
2014/10/21 15:26:59
Does go have thread pool implementation?
IMHO, it
|
| + (*r.catFile) <- catFileRequest{ |
| + objectish: objectish, |
| + reply: rchan, |
| + } |
| + |
| + if rsp := <-rchan; rsp == nil { |
| + return nil |
| + } else { |
| + switch rsp.typ { |
| + case BlobType: |
| + return BlobFromRawWithID(rsp.id, rsp.data) |
| + case TreeType: |
| + if t, err := TreeFromRawWithID(rsp.id, rsp.data); err != nil { |
| + panic(err) |
| + } else { |
| + return t |
| + } |
| + case CommitType: |
| + if c, err := CommitFromRawWithID(rsp.id, rsp.data); err != nil { |
| + panic(err) |
| + } else { |
| + return c |
| + } |
| + default: |
| + panic(fmt.Errorf("unsupported object type: %s", rsp.typ)) |
| + } |
| + } |
| +} |
| + |
| +func (r *Repo) GetObjectID(id ObjectID) InternableObject { |
| + return r.GetObject(id.String()) |
| +} |
| + |
| +// HasObject will return true iff the Repo contains the objectish |
|
M-A Ruel
2014/10/21 00:55:54
objectish or ref?
iannucci
2016/05/23 21:53:42
objectish. could be a hash, ref, hash:path/to/file
|
| +func (r *Repo) HasObject(objectish string) bool { |
| + r.ensureCatFileServer(&r.catFileCheck, true) |
| + rchan := make(chan *catFileReply, 1) |
| + (*r.catFileCheck) <- catFileRequest{ |
| + objectish: objectish, |
| + reply: rchan, |
| + } |
| + return (<-rchan) != nil |
| +} |
| + |
| +func (r *Repo) HasObjectID(id ObjectID) bool { |
|
M-A Ruel
2014/10/21 00:55:54
What about just this function and not have HasObje
iannucci
2016/05/23 21:53:42
It is, but it's less powerful.
|
| + return r.HasObject(id.String()) |
| +} |
| + |
| +// GetFullTree gets a recursively-enumerated Tree. |
| +// |
| +// blobOpt: |
| +// WithBlobs - Load blobs from Repo |
| +// NoBlobs - Blobs will be EmptyObject |
| +// |
| +// fullTree: |
| +// FullTree - All entries in tree must load |
| +// MissingOK - Missing entries will remain EmptyObject (or an !Complete() Tree) |
| +func (r *Repo) GetFullTree(treeish string, b blobOpt, f fullTree) *Tree { |
|
M-A Ruel
2014/10/21 00:55:54
This function likely doesn't need to be a method.
|
| + base, ok := r.GetObject(treeish).(*Tree) |
| + if !ok { |
| + if f == FullTree { |
| + panic(fmt.Errorf("could not load object %s", treeish)) |
| + } else { |
| + return nil |
| + } |
| + } |
| + grp := sync.WaitGroup{} |
| + for p, c := range base.children { |
| + p := p |
| + c := c |
| + grp.Add(1) |
| + go func() { |
| + defer grp.Done() |
| + switch c.Mode.Type() { |
| + case TreeType: |
| + subtree := r.GetFullTreeID(c.Object.ID(), b, f) |
| + if subtree == nil { |
| + if f == FullTree { |
| + panic(fmt.Errorf("could not load tree %s", c.Object.ID())) |
| + } |
| + } else { |
| + base.children[p] = &Child{subtree, c.Mode} |
| + } |
| + case BlobType: |
| + if b == WithBlobs { |
| + rslt := r.GetObjectID(c.Object.ID()) |
| + if rslt == nil && f == FullTree { |
| + panic(fmt.Errorf("could not load object %s", c.Object.ID())) |
| + } |
| + base.children[p] = &Child{rslt, c.Mode} |
| + } |
| + } |
| + }() |
| + } |
| + grp.Wait() |
| + return base |
| +} |
| + |
| +func (r *Repo) GetFullTreeID(tree ObjectID, b blobOpt, f fullTree) *Tree { |
|
M-A Ruel
2014/10/21 00:55:54
Keep this one, remove GetFullTree()
Vadim Sh.
2014/10/21 15:26:59
At this point you gave up writing comments? :)
|
| + return r.GetFullTree(tree.String(), b, f) |
| +} |
| + |
| +func (r *Repo) GetTextDiff(left, right string) (string, error) { |
| + rslt, ok := r.Run("diff", left, right) |
|
M-A Ruel
2014/10/21 00:55:54
At the very least, you want --no-ext-diff
|
| + if !ok { |
| + return "", fmt.Errorf("cannot diff(%s, %s): %s", left, right, rslt) |
| + } |
| + return rslt, nil |
| +} |
| + |
| +// DiffTree computes the 2-tree diff (with copy/rename detection) and returns |
| +// a parsed TreeDiff of what it found. |
| +// |
| +// This diff-tree invocation is done with -t, which implies that it is recursive, |
| +// and that the actual intermediate tree objects will also be contianed in the |
| +// return value. |
| +func (r *Repo) DiffTree(left, right string) (ret TreeDiff, err error) { |
| + atoi := func(s string, base int) int { |
|
M-A Ruel
2014/10/21 00:55:54
Another external function.
I think it should acce
|
| + ret, err := strconv.ParseInt(s, base, 0) |
| + if err != nil { |
| + panic(err) |
| + } |
| + return int(ret) |
| + } |
| + |
| + lines := strings.Split(strings.TrimRight( |
| + r.RunOutput("diff-tree", "-t", "-z", "-M", "-M", "-C", left, right), "\000"), |
| + "\000") |
| + |
| + infoStream := make(chan string, len(lines)) |
| + for _, line := range lines { |
| + infoStream <- line |
| + } |
| + close(infoStream) |
| + for header := range infoStream { |
| + if len(header) == 0 { |
| + break |
| + } |
| + if header[0] != ':' { |
| + return nil, fmt.Errorf("git.DiffTree: desynchronized parsing error") |
| + } |
| + info := strings.Fields(strings.TrimLeft(header, ":")) |
| + // old_mode new_mode old_id new_id action |
| + // oldPath (if action[0] in "RC") |
| + // newPath |
| + action := info[4] |
| + similarity := 0 |
| + oldPath := <-infoStream |
| + newPath := oldPath |
| + if action[0] == 'R' || action[0] == 'C' { |
| + newPath = <-infoStream |
| + similarity = atoi(action[1:], 10) |
| + } |
| + |
| + ret = append(ret, TreeDiffEntry{ |
| + Action: action, |
| + Similarity: similarity, |
| + Old: TreeDiffEntryHalf{ |
| + *NewEmptyChild(Mode(atoi(info[0], 8)), MakeObjectID(info[2])), |
| + oldPath, |
| + }, |
| + New: TreeDiffEntryHalf{ |
| + *NewEmptyChild(Mode(atoi(info[1], 8)), MakeObjectID(info[3])), |
| + newPath, |
| + }, |
| + }) |
| + } |
| + |
| + return |
| +} |
| + |
| +// Intern takes an InternableObject (Blob, Tree, Commit), and writes it into |
| +// the on-disk Repo. |
| +func (r *Repo) Intern(obj InternableObject) (ObjectID, error) { |
|
M-A Ruel
2014/10/21 00:55:54
I find the name surprising.
I would have thought
|
| + gotData := false |
| + var data string |
| + if !obj.Complete() { |
| + gotData = true |
| + data = obj.RawString() |
| + } |
| + |
| + if obj.ID() != NoID && r.HasObjectID(obj.ID()) { |
| + return obj.ID(), nil |
| + } |
| + |
| + switch obj.Type() { |
| + case CommitType, BlobType, TreeType: |
| + default: |
| + return NoID, fmt.Errorf("git.Intern: Unrecognized type %s", obj.Type()) |
| + } |
| + if !gotData { |
| + data = obj.RawString() |
| + } |
| + cmd := []string{"hash-object", "-t", string(obj.Type()), "-w", "--stdin"} |
| + out, ok := r.RunInput(data, cmd...) |
| + if !ok { |
| + return NoID, fmt.Errorf("error running %s <- %s: not ok", cmd, data) |
| + } |
| + return MakeObjectID(strings.TrimSpace(string(out))), nil |
| +} |
| + |
| +/// Private |
| + |
| +type catFileReply struct { |
| + id ObjectID |
| + typ ObjectType |
| + size int |
| + data []byte |
| +} |
| + |
| +type catFileRequest struct { |
| + objectish string |
| + reply chan<- *catFileReply |
|
M-A Ruel
2014/10/21 00:55:54
I think it's fine to reply instances, the object i
|
| +} |
| + |
| +func (r *Repo) ensureCatFileServer(ch **chan<- catFileRequest, checkOnly bool) { |
| + if *ch == nil { |
| + c := make(chan catFileRequest, 16) |
| + swapped := atomic.CompareAndSwapPointer( |
|
Vadim Sh.
2014/10/21 15:27:00
fancy
|
| + (*unsafe.Pointer)(unsafe.Pointer(ch)), |
| + nil, |
| + unsafe.Pointer(&c), |
| + ) |
| + if swapped { |
| + go r.catFileServer(c, checkOnly) |
| + } |
| + } |
| +} |
| + |
| +func (r *Repo) catFileServer(rchan chan catFileRequest, checkOnly bool) { |
| + defer func() { |
| + atomic.StorePointer((*unsafe.Pointer)(unsafe.Pointer(&r.catFile)), nil) |
| + close(rchan) |
| + |
| + if err := recover(); err != nil { |
| + fmt.Println("recovering panick'd catFileServer", err) |
|
M-A Ruel
2014/10/21 00:55:54
Why recover()?
|
| + } |
| + }() |
| + |
| + arg := "--batch" |
| + if checkOnly { |
| + arg = "--batch-check" |
| + } |
| + catFile := exec.Command("git", "cat-file", arg) |
| + catFile.Dir = r.Path |
| + in, err := catFile.StdinPipe() |
| + if err != nil { |
| + panic(err) |
| + } |
| + defer in.Close() |
| + outRaw, err := catFile.StdoutPipe() |
| + if err != nil { |
| + panic(err) |
| + } |
| + defer outRaw.Close() |
| + out := bufio.NewReader(outRaw) |
| + |
| + if err = catFile.Start(); err != nil { |
| + panic(err) |
| + } |
| + |
| + nom := infra_util.Nom(out) |
| + yoink := infra_util.Yoink(out) |
| + |
| + for req := range rchan { |
|
Vadim Sh.
2014/10/21 15:26:59
who closes rchan? When the goroutine die?
|
| + if strings.ContainsAny(req.objectish, "\n") { |
| + panic("catFile request may not contain a newline") |
| + } |
| + |
| + in.Write([]byte(req.objectish + "\n")) |
| + rsp := nom('\n') |
| + if strings.HasSuffix(rsp, " missing") { |
| + req.reply <- nil |
| + continue |
| + } |
| + |
| + parts := strings.Split(rsp, " ") |
| + objID, typ, sizeStr := parts[0], parts[1], parts[2] |
| + size, err := strconv.ParseUint(sizeStr, 10, 64) |
| + if err != nil { |
| + panic(err) |
| + } |
| + |
| + data := []byte{} |
| + if !checkOnly { |
| + data = yoink(int(size)) |
| + out.ReadByte() // drop extra newline |
| + } |
| + req.reply <- &catFileReply{ |
| + id: MakeObjectID(objID), |
| + typ: MakeObjectType(typ), |
| + data: data, |
| + size: int(size), |
| + } |
| + } |
| +} |