Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(58)

Side by Side Diff: client/archiver/directory.go

Issue 2981243002: isolate: (refactor) extract blacklisting into common package (Closed)
Patch Set: address review comments Created 3 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « no previous file | client/archiver/directory_test.go » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2015 The LUCI Authors. 1 // Copyright 2015 The LUCI Authors.
2 // 2 //
3 // Licensed under the Apache License, Version 2.0 (the "License"); 3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License. 4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at 5 // You may obtain a copy of the License at
6 // 6 //
7 // http://www.apache.org/licenses/LICENSE-2.0 7 // http://www.apache.org/licenses/LICENSE-2.0
8 // 8 //
9 // Unless required by applicable law or agreed to in writing, software 9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS, 10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and 12 // See the License for the specific language governing permissions and
13 // limitations under the License. 13 // limitations under the License.
14 14
15 package archiver 15 package archiver
16 16
17 import ( 17 import (
18 "bytes" 18 "bytes"
19 "encoding/json" 19 "encoding/json"
20 "fmt" 20 "fmt"
21 "log" 21 "log"
22 "os" 22 "os"
23 "path/filepath" 23 "path/filepath"
24 24
25 "github.com/luci/luci-go/client/internal/common"
25 "github.com/luci/luci-go/common/isolated" 26 "github.com/luci/luci-go/common/isolated"
26 "github.com/luci/luci-go/common/isolatedclient" 27 "github.com/luci/luci-go/common/isolatedclient"
27 "github.com/luci/luci-go/common/runtime/tracer" 28 "github.com/luci/luci-go/common/runtime/tracer"
28 ) 29 )
29 30
30 type walkItem struct { 31 type walkItem struct {
31 fullPath string 32 fullPath string
32 relPath string 33 relPath string
33 info os.FileInfo 34 info os.FileInfo
34 err error
35 } 35 }
36 36
37 // walk() enumerates a directory tree synchronously and sends the items to 37 // walk() enumerates a directory tree synchronously and sends the items to
38 // channel c. 38 // channel c.
39 // 39 func walk(root string, fsView common.FilesystemView, c chan<- *walkItem) {
40 // blacklist is a list of globs of files to ignore. Each blacklist glob is
41 // relative to root.
42 func walk(root string, blacklist []string, c chan<- *walkItem) {
43 // TODO(maruel): Walk() sorts the file names list, which is not needed h ere 40 // TODO(maruel): Walk() sorts the file names list, which is not needed h ere
44 // and slows things down. Options: 41 // and slows things down. Options:
45 // #1 Use os.File.Readdir() directly. It's in the stdlib and works fine, but 42 // #1 Use os.File.Readdir() directly. It's in the stdlib and works fine, but
46 // it's not the most efficient implementation. On posix it does a lst at() 43 // it's not the most efficient implementation. On posix it does a lst at()
47 // call, on Windows it does a Win32FileAttributeData. 44 // call, on Windows it does a Win32FileAttributeData.
48 // #2 Use raw syscalls. 45 // #2 Use raw syscalls.
49 // - On POSIX, use syscall.ReadDirent(). See src/os/dir_unix.go. 46 // - On POSIX, use syscall.ReadDirent(). See src/os/dir_unix.go.
50 // - On Windows, use syscall.FindFirstFile(), syscall.FindNextFile(), 47 // - On Windows, use syscall.FindFirstFile(), syscall.FindNextFile(),
51 // syscall.FindClose() directly. See src/os/file_windows.go. For odd 48 // syscall.FindClose() directly. See src/os/file_windows.go. For odd
52 // reasons, Windows does not have a batched version to reduce the nu mber 49 // reasons, Windows does not have a batched version to reduce the nu mber
53 // of kernel calls. It's as if they didn't care about performance. 50 // of kernel calls. It's as if they didn't care about performance.
54 // 51 //
55 // In practice, #2 may not be needed, the performance of #1 may be good 52 // In practice, #2 may not be needed, the performance of #1 may be good
56 // enough relative to the other performance costs. This needs to be perf 53 // enough relative to the other performance costs. This needs to be perf
57 // tested at 100k+ files scale on Windows and OSX. 54 // tested at 100k+ files scale on Windows and OSX.
58 // 55 //
59 // TODO(maruel): Cache directory enumeration. In particular cases (Chrom ium), 56 // TODO(maruel): Cache directory enumeration. In particular cases (Chrom ium),
60 // the same directory may be enumerated multiple times. Caching the cont ent 57 // the same directory may be enumerated multiple times. Caching the cont ent
61 // may be worth. This needs to be perf tested. 58 // may be worth. This needs to be perf tested.
62 59
63 total := 0 60 total := 0
64 end := tracer.Span(root, "walk:"+filepath.Base(root), nil) 61 end := tracer.Span(root, "walk:"+filepath.Base(root), nil)
65 defer func() { end(tracer.Args{"root": root, "total": total}) }() 62 defer func() { end(tracer.Args{"root": root, "total": total}) }()
66 » // Check patterns upfront, so it has consistent behavior w.r.t. bad glob 63
67 » // patterns. 64 » err := filepath.Walk(root, func(path string, info os.FileInfo, err error ) error {
68 » for _, b := range blacklist {
69 » » if _, err := filepath.Match(b, b); err != nil {
70 » » » c <- &walkItem{err: fmt.Errorf("bad blacklist pattern \" %s\"", b)}
71 » » » return
72 » » }
73 » }
74 » err := filepath.Walk(root, func(p string, info os.FileInfo, err error) e rror {
75 total++ 65 total++
66
76 if err != nil { 67 if err != nil {
77 » » » return fmt.Errorf("walk(%q): %v", p, err) 68 » » » return fmt.Errorf("walk(%q): %v", path, err)
78 } 69 }
79 70
80 » » relPath, err := filepath.Rel(root, p) 71 » » relPath, err := fsView.RelativePath(path)
81 if err != nil { 72 if err != nil {
82 » » » return fmt.Errorf("walk: calculating relative path(%q): %v", p, err) 73 » » » return fmt.Errorf("walk(%q): %v", path, err)
83 } 74 }
84 75
85 » » // filepath.Rel is documented to call filepath.Clean on its resu lt before returning it, 76 » » if relPath == "" { // empty string indicates skip.
86 » » // which results in "." for an empty relative path. 77 » » » return returnSkip(info)
87 » » if relPath == "." {
88 » » » return nil // Root directory.
89 } 78 }
90 79
91 » » for _, b := range blacklist { 80 » » if !info.IsDir() {
92 » » » matched, _ := filepath.Match(b, relPath) 81 » » » c <- &walkItem{fullPath: path, relPath: relPath, info: i nfo}
93 » » » if !matched {
94 » » » » // Also check at the base file name.
95 » » » » matched, _ = filepath.Match(b, filepath.Base(rel Path))
96 » » » }
97 » » » if matched {
98 » » » » // Must not return io.SkipDir for file, filepath .walk() handles this
99 » » » » // badly.
100 » » » » if info.IsDir() {
101 » » » » » return filepath.SkipDir
102 » » » » }
103 » » » » return nil
104 » » » }
105 } 82 }
106 if info.IsDir() {
107 return nil
108 }
109 c <- &walkItem{fullPath: p, relPath: relPath, info: info}
110 return nil 83 return nil
111 }) 84 })
85
112 if err != nil { 86 if err != nil {
113 // No point continuing if an error occurred during walk. 87 // No point continuing if an error occurred during walk.
114 log.Fatalf("Unable to walk %q: %v", root, err) 88 log.Fatalf("Unable to walk %q: %v", root, err)
115 } 89 }
90
91 }
92
93 // returnSkip returns the return value expected from a filepath.WalkFunc in the case where no more processing of file should occur.
94 func returnSkip(file os.FileInfo) error {
95 if file.IsDir() {
96 // Must not return io.SkipDir for file, filepath.walk() handles this badly.
97 return filepath.SkipDir
98 }
99 return nil
116 } 100 }
117 101
118 // PushDirectory walks a directory at root and creates a .isolated file. 102 // PushDirectory walks a directory at root and creates a .isolated file.
119 // 103 //
120 // It walks the directories synchronously, then returns a *Item to signal when 104 // It walks the directories synchronously, then returns a *Item to signal when
121 // the background work is completed. The Item is signaled once all files are 105 // the background work is completed. The Item is signaled once all files are
122 // hashed. In particular, the *Item is signaled before server side cache 106 // hashed. In particular, the *Item is signaled before server side cache
123 // lookups and upload is completed. Use archiver.Close() to wait for 107 // lookups and upload is completed. Use archiver.Close() to wait for
124 // completion. 108 // completion.
125 // 109 //
126 // relDir is a relative directory to offset relative paths against in the 110 // relDir is a relative directory to offset relative paths against in the
127 // generated .isolated file. 111 // generated .isolated file.
128 // 112 //
129 // blacklist is a list of globs of files to ignore. 113 // blacklist is a list of globs of files to ignore.
130 func PushDirectory(a *Archiver, root string, relDir string, blacklist []string) *Item { 114 func PushDirectory(a *Archiver, root string, relDir string, blacklist []string) *Item {
131 total := 0 115 total := 0
132 end := tracer.Span(a, "PushDirectory", tracer.Args{"path": relDir, "root ": root}) 116 end := tracer.Span(a, "PushDirectory", tracer.Args{"path": relDir, "root ": root})
133 defer func() { end(tracer.Args{"total": total}) }() 117 defer func() { end(tracer.Args{"total": total}) }()
134 c := make(chan *walkItem) 118 c := make(chan *walkItem)
119
120 displayName := filepath.Base(root) + ".isolated"
121 s := &Item{DisplayName: displayName}
122 fsView, err := common.NewFilesystemView(root, blacklist)
123 if err != nil {
124 s.SetErr(err)
125 return s
126 }
127
135 go func() { 128 go func() {
136 » » walk(root, blacklist, c) 129 » » walk(root, fsView, c)
137 close(c) 130 close(c)
138 }() 131 }()
139 132
140 displayName := filepath.Base(root) + ".isolated"
141 i := isolated.Isolated{ 133 i := isolated.Isolated{
142 Algo: "sha-1", 134 Algo: "sha-1",
143 Files: map[string]isolated.File{}, 135 Files: map[string]isolated.File{},
144 Version: isolated.IsolatedFormatVersion, 136 Version: isolated.IsolatedFormatVersion,
145 } 137 }
146 items := []*Item{} 138 items := []*Item{}
147 s := &Item{DisplayName: displayName}
148 for item := range c { 139 for item := range c {
149 if s.Error() != nil { 140 if s.Error() != nil {
150 // Empty the queue. 141 // Empty the queue.
151 continue 142 continue
152 } 143 }
153 if item.err != nil {
154 s.SetErr(item.err)
155 continue
156 }
157 total++ 144 total++
158 if relDir != "" { 145 if relDir != "" {
159 item.relPath = filepath.Join(relDir, item.relPath) 146 item.relPath = filepath.Join(relDir, item.relPath)
160 } 147 }
161 mode := item.info.Mode() 148 mode := item.info.Mode()
162 if mode&os.ModeSymlink == os.ModeSymlink { 149 if mode&os.ModeSymlink == os.ModeSymlink {
163 l, err := os.Readlink(item.fullPath) 150 l, err := os.Readlink(item.fullPath)
164 if err != nil { 151 if err != nil {
165 s.SetErr(fmt.Errorf("readlink(%s): %s", item.ful lPath, err)) 152 s.SetErr(fmt.Errorf("readlink(%s): %s", item.ful lPath, err))
166 continue 153 continue
(...skipping 36 matching lines...) Expand 10 before | Expand all | Expand 10 after
203 } 190 }
204 } 191 }
205 } 192 }
206 } 193 }
207 if err != nil { 194 if err != nil {
208 s.SetErr(err) 195 s.SetErr(err)
209 } 196 }
210 }() 197 }()
211 return s 198 return s
212 } 199 }
OLDNEW
« no previous file with comments | « no previous file | client/archiver/directory_test.go » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698