Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(668)

Side by Side Diff: common/dirwalk/walk_nostat.go

Issue 2054763004: luci-go/common/dirwalk: Code for walking a directory tree efficiently Base URL: https://github.com/luci/luci-go@master
Patch Set: Fixes. Created 4 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 // Copyright 2016 The LUCI Authors. All rights reserved.
2 // Use of this source code is governed under the Apache License, Version 2.0
3 // that can be found in the LICENSE file.
4
5 // This function works strangely for performance reasons.
6 //
7 // File systems have been heavily optimised for doing a directory walk in inode
8 // order. It can be an order of magnitude faster to walk the directory in this
9 // order so we do. *However*, we want out output to be in sorted so it is
10 // deterministic.
11 //
12 // Calling `stat` is also one of the most expensive things you can do (it is
13 // roughly equivalent to reading 64/128k of data). Hence, if you have a lot of
14 // small files then just reading their contents directly is more efficient.
15 // Rather then doing the stat, we assume everything is a file and just try to
16 // read a chunk. If the file is smaller than the block size, we know that we
17 // have the entire contents. Otherwise we know the file is bigger and can
18 // decide to do the stat. If the name turned out to be a directory, then we
19 // will get an error.
20
21 package dirwalk
22
23 import (
24 "io"
25 "os"
26 "path/filepath"
27 "sort"
28 )
29
30 type SmallFile struct {
31 name string
32 data []byte
33 }
34 type SmallFileByName []SmallFile
35
36 func (a SmallFileByName) Len() int { return len(a) }
37 func (a SmallFileByName) Swap(i, j int) { a[i], a[j] = a[j], a[i] }
38 func (a SmallFileByName) Less(i, j int) bool { return a[i].name < a[j].name }
39
40 /*
41 type LargeFile struct {
42 name string
43 }
44 type LargeFileByName []LargeFile
45 func (a LargeFileByName) Len() int { return len(a) }
46 func (a LargeFileByName) Swap(i, j int) { a[i], a[j] = a[j], a[i] }
47 func (a LargeFileByName) Less(i, j int) bool { return a[i].name < a[j].name }
48 */
49
50 type EntryError struct {
51 name string
52 err error
53 }
54 type EntryErrorByName []EntryError
55
56 func (a EntryErrorByName) Len() int { return len(a) }
57 func (a EntryErrorByName) Swap(i, j int) { a[i], a[j] = a[j], a[i] }
58 func (a EntryErrorByName) Less(i, j int) bool { return a[i].name < a[j].name }
59
60 func walkNoStatInternal(base string, files []string, smallfile_limit int64, obs WalkObserver) {
61 var errors []EntryError
62 var smallfiles []SmallFile
63 var largefiles []string
64 var dirs []EntryError
65
66 for _, name := range files {
67 fname := filepath.Join(base, name)
68 file, err := os.Open(fname)
69
70 if err != nil {
71 errors = append(errors, EntryError{fname, err})
72 continue
73 }
74
75 block := make([]byte, smallfile_limit)
76 count, err := file.Read(block)
77 if err != io.EOF && err != nil {
78 // Its probably a directory
79 dirs = append(dirs, EntryError{fname, err})
80 continue
81 }
82
83 // This file was bigger than the block size, stat it
84 if int64(count) == smallfile_limit {
85 /*
86 stat, err := file.Stat()
87 if err != nil {
88 errors = append(errors, EntryError{fname , err})
89 continue
90 }
91 */
92 largefiles = append(largefiles, fname) //LargeFile{name: fname, stat: &stat})
93
94 // This file was smaller than the block size
95 } else {
96 smallfiles = append(smallfiles, SmallFile{name: fname, d ata: block[:count]})
97 }
98 file.Close()
99 }
100
101 sort.Sort(SmallFileByName(smallfiles))
M-A Ruel 2016/09/23 01:48:17 Why pass the file names in order?
102 for _, f := range smallfiles {
103 obs.SmallFile(f.name, f.data)
104 }
105
106 sort.Strings(largefiles)
107 for _, fname := range largefiles {
108 obs.LargeFile(fname)
109 }
110
111 sort.Sort(EntryErrorByName(dirs))
112 for _, d := range dirs {
113 file, err := os.Open(d.name)
114 if err != nil {
115 obs.Error(d.name, d.err)
116 continue
117 }
118
119 names, err := file.Readdirnames(0)
120 if err != nil {
121 obs.Error(d.name, d.err)
122 continue
123 }
124 walkNoStatInternal(d.name, names, smallfile_limit, obs)
125 }
126 }
127
128 func WalkNoStat(root string, smallfile_limit int64, obs WalkObserver) {
129 paths := []string{root}
130 walkNoStatInternal("", paths, smallfile_limit, obs)
131 obs.Finished()
132 }
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698