Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(50)

Unified Diff: go/src/infra/libs/git/repo.go

Issue 662113003: Drover's back, baby! (Closed) Base URL: https://chromium.googlesource.com/infra/infra.git/+/master
Patch Set: Lots of fixes Created 6 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: go/src/infra/libs/git/repo.go
diff --git a/go/src/infra/libs/git/repo.go b/go/src/infra/libs/git/repo.go
new file mode 100644
index 0000000000000000000000000000000000000000..23029efc927219d79199eda1bde4e3399e7e3750
--- /dev/null
+++ b/go/src/infra/libs/git/repo.go
@@ -0,0 +1,423 @@
+// Copyright 2014 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+package git
+
+import (
+ "bufio"
+ "bytes"
+ "fmt"
+ "os/exec"
+ "strconv"
+ "strings"
+ "sync"
+ "sync/atomic"
+ "unsafe"
+
+ "infra/libs/infra_util"
+)
+
+type blobOpt bool
+
+const (
+ WithBlobs blobOpt = true
+ NoBlobs = false
M-A Ruel 2014/10/21 00:55:54 If the type is not exported, the consts should not
iannucci 2016/05/23 21:53:42 Why not? I don't want them constructing their own
+)
+
+type fullTree bool
+
+const (
+ FullTree fullTree = true
+ MissingOK = false
+)
+
+// Repo represents a local git repository at the path |Path|.
+type Repo struct {
+ // The path on disk to the location of the git repo.
+ Path string
+
+ catFile *chan<- catFileRequest
M-A Ruel 2014/10/21 00:55:54 I still don't understand why it's a pointer. Creat
+ catFileCheck *chan<- catFileRequest
+}
+
+// TreeDiff represents the difference between two Treeish objects
+type TreeDiff []TreeDiffEntry
+
+// TreeDiffEntry represents the before and after of one path in the repo.
+// Note that the Old.Name and New.Name may be different if this item was
+// Moved or Copied.
+type TreeDiffEntry struct {
+ // Actino is one of "ACDMRTUX"
M-A Ruel 2014/10/21 00:55:54 Action
iannucci 2016/05/23 21:53:42 oops. done.
+ // U is for unmerged... if you're just comparing trees you should never see this
+ // X is probably a bug in git... you should also never see this.
+ // T is a type change, so if a tree turned into a blob, for example
+ Action string
+
+ // For Action types 'R' or 'C', what percentage (from 0-100) are the old and
+ // new blobs similar.
+ Similarity int
+
+ Old TreeDiffEntryHalf
+ New TreeDiffEntryHalf
+}
+
+// TreeDiffEntryHalf is one entry in a TreeDiffEntry, either the Old or New half.
+type TreeDiffEntryHalf struct {
+ Child
+ Name string
+}
+
+func (t *TreeDiffEntryHalf) String() string {
+ return fmt.Sprintf("%s: %s", t.Name, t.Child)
+}
+
+// InternableObject is an Object which may be interned into a git repo
+// (e.g. hash-object'd). EmptyObjects are NOT InternableObjects.
+type InternableObject interface {
+ Object
+
+ // A hash-object compatible string. May panic if this is an Object which is
+ // !Complete(). Note that for Trees this does NOT imply that all of the
+ // Tree's children are Complete().
+ RawString() string
M-A Ruel 2014/10/21 00:55:54 As with the other part, I think []byte is a bit mo
iannucci 2016/05/23 21:53:42 but then I have to copy all that data or ppl could
+}
+
+// RunInput runs a git command in the Repo, passing input on stdin, and returning
+// the stdout and success (i.e. did the git command return 0?)
+func (r *Repo) RunInput(input string, cmd ...string) (string, bool) {
M-A Ruel 2014/10/21 00:55:54 That's an example of a function that could accept
iannucci 2016/05/23 21:53:42 I'm not sure why that would be helpful though, sin
+ ex := exec.Command("git", cmd...)
+ ex.Dir = r.Path
+ ex.Stdin = bytes.NewBufferString(input)
+ out, err := ex.Output()
+ if err == nil {
+ return string(out), true
+ } else if _, ok := err.(*exec.ExitError); ok {
+ return string(out), false
+ } else {
+ panic(err)
Vadim Sh. 2014/10/21 15:26:59 when this can happen?
+ }
+}
+
+// Run runs a git command in the Repo, with no input, returning the stdout and
+// success.
+func (r *Repo) Run(cmd ...string) (string, bool) {
+ return r.RunInput("", cmd...)
+}
+
+// RunOk runs a git command in the repo with no input, and returns its success
+// (did it exit 0?)
+func (r *Repo) RunOk(cmd ...string) bool {
+ _, ok := r.Run(cmd...)
+ return ok
+}
+
+// RunOutput runs a git command in the repo with no input, and returns its
+// stdout
+func (r *Repo) RunOutput(cmd ...string) string {
+ out, _ := r.Run(cmd...)
+ return out
+}
+
+// GetObject will asynchronously fetch |objectish| from the Repo, and
+// return a channel for the result. If the object is missing, GetObject will
Vadim Sh. 2014/10/21 15:27:00 I don't see it returning a channel... Also it's bl
+// push nil to the channel.
+func (r *Repo) GetObject(objectish string) InternableObject {
+ r.ensureCatFileServer(&r.catFile, false)
+
+ rchan := make(chan *catFileReply, 1)
Vadim Sh. 2014/10/21 15:26:59 Does go have thread pool implementation? IMHO, it
+ (*r.catFile) <- catFileRequest{
+ objectish: objectish,
+ reply: rchan,
+ }
+
+ if rsp := <-rchan; rsp == nil {
+ return nil
+ } else {
+ switch rsp.typ {
+ case BlobType:
+ return BlobFromRawWithID(rsp.id, rsp.data)
+ case TreeType:
+ if t, err := TreeFromRawWithID(rsp.id, rsp.data); err != nil {
+ panic(err)
+ } else {
+ return t
+ }
+ case CommitType:
+ if c, err := CommitFromRawWithID(rsp.id, rsp.data); err != nil {
+ panic(err)
+ } else {
+ return c
+ }
+ default:
+ panic(fmt.Errorf("unsupported object type: %s", rsp.typ))
+ }
+ }
+}
+
+func (r *Repo) GetObjectID(id ObjectID) InternableObject {
+ return r.GetObject(id.String())
+}
+
+// HasObject will return true iff the Repo contains the objectish
M-A Ruel 2014/10/21 00:55:54 objectish or ref?
iannucci 2016/05/23 21:53:42 objectish. could be a hash, ref, hash:path/to/file
+func (r *Repo) HasObject(objectish string) bool {
+ r.ensureCatFileServer(&r.catFileCheck, true)
+ rchan := make(chan *catFileReply, 1)
+ (*r.catFileCheck) <- catFileRequest{
+ objectish: objectish,
+ reply: rchan,
+ }
+ return (<-rchan) != nil
+}
+
+func (r *Repo) HasObjectID(id ObjectID) bool {
M-A Ruel 2014/10/21 00:55:54 What about just this function and not have HasObje
iannucci 2016/05/23 21:53:42 It is, but it's less powerful.
+ return r.HasObject(id.String())
+}
+
+// GetFullTree gets a recursively-enumerated Tree.
+//
+// blobOpt:
+// WithBlobs - Load blobs from Repo
+// NoBlobs - Blobs will be EmptyObject
+//
+// fullTree:
+// FullTree - All entries in tree must load
+// MissingOK - Missing entries will remain EmptyObject (or an !Complete() Tree)
+func (r *Repo) GetFullTree(treeish string, b blobOpt, f fullTree) *Tree {
M-A Ruel 2014/10/21 00:55:54 This function likely doesn't need to be a method.
+ base, ok := r.GetObject(treeish).(*Tree)
+ if !ok {
+ if f == FullTree {
+ panic(fmt.Errorf("could not load object %s", treeish))
+ } else {
+ return nil
+ }
+ }
+ grp := sync.WaitGroup{}
+ for p, c := range base.children {
+ p := p
+ c := c
+ grp.Add(1)
+ go func() {
+ defer grp.Done()
+ switch c.Mode.Type() {
+ case TreeType:
+ subtree := r.GetFullTreeID(c.Object.ID(), b, f)
+ if subtree == nil {
+ if f == FullTree {
+ panic(fmt.Errorf("could not load tree %s", c.Object.ID()))
+ }
+ } else {
+ base.children[p] = &Child{subtree, c.Mode}
+ }
+ case BlobType:
+ if b == WithBlobs {
+ rslt := r.GetObjectID(c.Object.ID())
+ if rslt == nil && f == FullTree {
+ panic(fmt.Errorf("could not load object %s", c.Object.ID()))
+ }
+ base.children[p] = &Child{rslt, c.Mode}
+ }
+ }
+ }()
+ }
+ grp.Wait()
+ return base
+}
+
+func (r *Repo) GetFullTreeID(tree ObjectID, b blobOpt, f fullTree) *Tree {
M-A Ruel 2014/10/21 00:55:54 Keep this one, remove GetFullTree()
Vadim Sh. 2014/10/21 15:26:59 At this point you gave up writing comments? :)
+ return r.GetFullTree(tree.String(), b, f)
+}
+
+func (r *Repo) GetTextDiff(left, right string) (string, error) {
+ rslt, ok := r.Run("diff", left, right)
M-A Ruel 2014/10/21 00:55:54 At the very least, you want --no-ext-diff
+ if !ok {
+ return "", fmt.Errorf("cannot diff(%s, %s): %s", left, right, rslt)
+ }
+ return rslt, nil
+}
+
+// DiffTree computes the 2-tree diff (with copy/rename detection) and returns
+// a parsed TreeDiff of what it found.
+//
+// This diff-tree invocation is done with -t, which implies that it is recursive,
+// and that the actual intermediate tree objects will also be contianed in the
+// return value.
+func (r *Repo) DiffTree(left, right string) (ret TreeDiff, err error) {
+ atoi := func(s string, base int) int {
M-A Ruel 2014/10/21 00:55:54 Another external function. I think it should acce
+ ret, err := strconv.ParseInt(s, base, 0)
+ if err != nil {
+ panic(err)
+ }
+ return int(ret)
+ }
+
+ lines := strings.Split(strings.TrimRight(
+ r.RunOutput("diff-tree", "-t", "-z", "-M", "-M", "-C", left, right), "\000"),
+ "\000")
+
+ infoStream := make(chan string, len(lines))
+ for _, line := range lines {
+ infoStream <- line
+ }
+ close(infoStream)
+ for header := range infoStream {
+ if len(header) == 0 {
+ break
+ }
+ if header[0] != ':' {
+ return nil, fmt.Errorf("git.DiffTree: desynchronized parsing error")
+ }
+ info := strings.Fields(strings.TrimLeft(header, ":"))
+ // old_mode new_mode old_id new_id action
+ // oldPath (if action[0] in "RC")
+ // newPath
+ action := info[4]
+ similarity := 0
+ oldPath := <-infoStream
+ newPath := oldPath
+ if action[0] == 'R' || action[0] == 'C' {
+ newPath = <-infoStream
+ similarity = atoi(action[1:], 10)
+ }
+
+ ret = append(ret, TreeDiffEntry{
+ Action: action,
+ Similarity: similarity,
+ Old: TreeDiffEntryHalf{
+ *NewEmptyChild(Mode(atoi(info[0], 8)), MakeObjectID(info[2])),
+ oldPath,
+ },
+ New: TreeDiffEntryHalf{
+ *NewEmptyChild(Mode(atoi(info[1], 8)), MakeObjectID(info[3])),
+ newPath,
+ },
+ })
+ }
+
+ return
+}
+
+// Intern takes an InternableObject (Blob, Tree, Commit), and writes it into
+// the on-disk Repo.
+func (r *Repo) Intern(obj InternableObject) (ObjectID, error) {
M-A Ruel 2014/10/21 00:55:54 I find the name surprising. I would have thought
+ gotData := false
+ var data string
+ if !obj.Complete() {
+ gotData = true
+ data = obj.RawString()
+ }
+
+ if obj.ID() != NoID && r.HasObjectID(obj.ID()) {
+ return obj.ID(), nil
+ }
+
+ switch obj.Type() {
+ case CommitType, BlobType, TreeType:
+ default:
+ return NoID, fmt.Errorf("git.Intern: Unrecognized type %s", obj.Type())
+ }
+ if !gotData {
+ data = obj.RawString()
+ }
+ cmd := []string{"hash-object", "-t", string(obj.Type()), "-w", "--stdin"}
+ out, ok := r.RunInput(data, cmd...)
+ if !ok {
+ return NoID, fmt.Errorf("error running %s <- %s: not ok", cmd, data)
+ }
+ return MakeObjectID(strings.TrimSpace(string(out))), nil
+}
+
+/// Private
+
+type catFileReply struct {
+ id ObjectID
+ typ ObjectType
+ size int
+ data []byte
+}
+
+type catFileRequest struct {
+ objectish string
+ reply chan<- *catFileReply
M-A Ruel 2014/10/21 00:55:54 I think it's fine to reply instances, the object i
+}
+
+func (r *Repo) ensureCatFileServer(ch **chan<- catFileRequest, checkOnly bool) {
+ if *ch == nil {
+ c := make(chan catFileRequest, 16)
+ swapped := atomic.CompareAndSwapPointer(
Vadim Sh. 2014/10/21 15:27:00 fancy
+ (*unsafe.Pointer)(unsafe.Pointer(ch)),
+ nil,
+ unsafe.Pointer(&c),
+ )
+ if swapped {
+ go r.catFileServer(c, checkOnly)
+ }
+ }
+}
+
+func (r *Repo) catFileServer(rchan chan catFileRequest, checkOnly bool) {
+ defer func() {
+ atomic.StorePointer((*unsafe.Pointer)(unsafe.Pointer(&r.catFile)), nil)
+ close(rchan)
+
+ if err := recover(); err != nil {
+ fmt.Println("recovering panick'd catFileServer", err)
M-A Ruel 2014/10/21 00:55:54 Why recover()?
+ }
+ }()
+
+ arg := "--batch"
+ if checkOnly {
+ arg = "--batch-check"
+ }
+ catFile := exec.Command("git", "cat-file", arg)
+ catFile.Dir = r.Path
+ in, err := catFile.StdinPipe()
+ if err != nil {
+ panic(err)
+ }
+ defer in.Close()
+ outRaw, err := catFile.StdoutPipe()
+ if err != nil {
+ panic(err)
+ }
+ defer outRaw.Close()
+ out := bufio.NewReader(outRaw)
+
+ if err = catFile.Start(); err != nil {
+ panic(err)
+ }
+
+ nom := infra_util.Nom(out)
+ yoink := infra_util.Yoink(out)
+
+ for req := range rchan {
Vadim Sh. 2014/10/21 15:26:59 who closes rchan? When the goroutine die?
+ if strings.ContainsAny(req.objectish, "\n") {
+ panic("catFile request may not contain a newline")
+ }
+
+ in.Write([]byte(req.objectish + "\n"))
+ rsp := nom('\n')
+ if strings.HasSuffix(rsp, " missing") {
+ req.reply <- nil
+ continue
+ }
+
+ parts := strings.Split(rsp, " ")
+ objID, typ, sizeStr := parts[0], parts[1], parts[2]
+ size, err := strconv.ParseUint(sizeStr, 10, 64)
+ if err != nil {
+ panic(err)
+ }
+
+ data := []byte{}
+ if !checkOnly {
+ data = yoink(int(size))
+ out.ReadByte() // drop extra newline
+ }
+ req.reply <- &catFileReply{
+ id: MakeObjectID(objID),
+ typ: MakeObjectType(typ),
+ data: data,
+ size: int(size),
+ }
+ }
+}

Powered by Google App Engine
This is Rietveld 408576698