OLD | NEW |
---|---|
(Empty) | |
1 // Copyright 2016 The LUCI Authors. All rights reserved. | |
2 // Use of this source code is governed under the Apache License, Version 2.0 | |
3 // that can be found in the LICENSE file. | |
4 | |
5 // This function works strangely for performance reasons. | |
6 // | |
7 // File systems have been heavily optimised for doing a directory walk in inode | |
8 // order. It can be an order of magnitude faster to walk the directory in this | |
9 // order so we do. *However*, we want out output to be in sorted so it is | |
10 // deterministic. | |
11 // | |
12 // Calling `stat` is also one of the most expensive things you can do (it is | |
13 // roughly equivalent to reading 64/128k of data). Hence, if you have a lot of | |
14 // small files then just reading their contents directly is more efficient. | |
15 // Rather then doing the stat, we assume everything is a file and just try to | |
16 // read a chunk. If the file is smaller than the block size, we know that we | |
17 // have the entire contents. Otherwise we know the file is bigger and can | |
18 // decide to do the stat. If the name turned out to be a directory, then we | |
19 // will get an error. | |
20 | |
21 package dirwalk | |
22 | |
23 import ( | |
24 "io" | |
25 "os" | |
26 "path/filepath" | |
27 "sort" | |
28 ) | |
29 | |
30 type SmallFile struct { | |
31 name string | |
32 data []byte | |
33 } | |
34 type SmallFileByName []SmallFile | |
35 | |
36 func (a SmallFileByName) Len() int { return len(a) } | |
37 func (a SmallFileByName) Swap(i, j int) { a[i], a[j] = a[j], a[i] } | |
38 func (a SmallFileByName) Less(i, j int) bool { return a[i].name < a[j].name } | |
39 | |
40 /* | |
41 type LargeFile struct { | |
42 name string | |
43 } | |
44 type LargeFileByName []LargeFile | |
45 func (a LargeFileByName) Len() int { return len(a) } | |
46 func (a LargeFileByName) Swap(i, j int) { a[i], a[j] = a[j], a[i] } | |
47 func (a LargeFileByName) Less(i, j int) bool { return a[i].name < a[j].name } | |
48 */ | |
49 | |
50 type EntryError struct { | |
51 name string | |
52 err error | |
53 } | |
54 type EntryErrorByName []EntryError | |
55 | |
56 func (a EntryErrorByName) Len() int { return len(a) } | |
57 func (a EntryErrorByName) Swap(i, j int) { a[i], a[j] = a[j], a[i] } | |
58 func (a EntryErrorByName) Less(i, j int) bool { return a[i].name < a[j].name } | |
59 | |
60 func walkNoStatInternal(base string, files []string, smallfile_limit int64, obs WalkObserver) { | |
61 var errors []EntryError | |
62 var smallfiles []SmallFile | |
63 var largefiles []string | |
64 var dirs []EntryError | |
65 | |
66 for _, name := range files { | |
67 fname := filepath.Join(base, name) | |
68 file, err := os.Open(fname) | |
69 | |
70 if err != nil { | |
71 errors = append(errors, EntryError{fname, err}) | |
72 continue | |
73 } | |
74 | |
75 block := make([]byte, smallfile_limit) | |
76 count, err := file.Read(block) | |
77 if err != io.EOF && err != nil { | |
78 // Its probably a directory | |
79 dirs = append(dirs, EntryError{fname, err}) | |
80 continue | |
81 } | |
82 | |
83 // This file was bigger than the block size, stat it | |
84 if int64(count) == smallfile_limit { | |
85 /* | |
86 stat, err := file.Stat() | |
87 if err != nil { | |
88 errors = append(errors, EntryError{fname , err}) | |
89 continue | |
90 } | |
91 */ | |
92 largefiles = append(largefiles, fname) //LargeFile{name: fname, stat: &stat}) | |
93 | |
94 // This file was smaller than the block size | |
95 } else { | |
96 smallfiles = append(smallfiles, SmallFile{name: fname, d ata: block[:count]}) | |
97 } | |
98 file.Close() | |
99 } | |
100 | |
101 sort.Sort(SmallFileByName(smallfiles)) | |
M-A Ruel
2016/09/23 01:48:17
Why pass the file names in order?
| |
102 for _, f := range smallfiles { | |
103 obs.SmallFile(f.name, f.data) | |
104 } | |
105 | |
106 sort.Strings(largefiles) | |
107 for _, fname := range largefiles { | |
108 obs.LargeFile(fname) | |
109 } | |
110 | |
111 sort.Sort(EntryErrorByName(dirs)) | |
112 for _, d := range dirs { | |
113 file, err := os.Open(d.name) | |
114 if err != nil { | |
115 obs.Error(d.name, d.err) | |
116 continue | |
117 } | |
118 | |
119 names, err := file.Readdirnames(0) | |
120 if err != nil { | |
121 obs.Error(d.name, d.err) | |
122 continue | |
123 } | |
124 walkNoStatInternal(d.name, names, smallfile_limit, obs) | |
125 } | |
126 } | |
127 | |
128 func WalkNoStat(root string, smallfile_limit int64, obs WalkObserver) { | |
129 paths := []string{root} | |
130 walkNoStatInternal("", paths, smallfile_limit, obs) | |
131 obs.Finished() | |
132 } | |
OLD | NEW |