OLD | NEW |
---|---|
(Empty) | |
1 // Copyright 2016 The LUCI Authors. All rights reserved. | |
2 // Use of this source code is governed under the Apache License, Version 2.0 | |
3 // that can be found in the LICENSE file. | |
4 | |
5 /** | |
M-A Ruel
2016/09/15 14:31:03
same
mithro
2016/09/20 12:41:45
I assume you mean regarding the comment style?
Do
| |
6 | |
7 This function works strangely for performance reasons. | |
8 | |
9 File systems have been heavily optimised for doing a directory walk in inode | |
10 order. It can be an order of magnitude faster to walk the directory in this | |
11 order so we do. *However*, we want out output to be in sorted so it is | |
12 deterministic. | |
13 | |
14 Calling `stat` is also one of the most expensive things you can do (it is | |
15 roughly equivalent to reading 64/128k of data). Hence, if you have a lot of | |
16 small files then just reading their contents directly is more efficient. Rather | |
17 then doing the stat, we assume everything is a file and just try to read a | |
18 chunk. If the file is smaller than the block size, we know that we have the | |
19 entire contents. Otherwise we know the file is bigger and can decide to do the | |
20 stat. If the name turned out to be a directory, then we will get an error. | |
21 | |
22 **/ | |
23 package dirwalk | |
24 | |
25 import ( | |
26 "io" | |
27 "os" | |
28 "path/filepath" | |
29 "sort" | |
30 ) | |
31 | |
32 type SmallFile struct { | |
33 name string | |
34 data []byte | |
35 } | |
36 type SmallFileByName []SmallFile | |
37 | |
38 func (a SmallFileByName) Len() int { return len(a) } | |
39 func (a SmallFileByName) Swap(i, j int) { a[i], a[j] = a[j], a[i] } | |
40 func (a SmallFileByName) Less(i, j int) bool { return a[i].name < a[j].name } | |
41 | |
42 /* | |
43 type LargeFile struct { | |
44 name string | |
45 } | |
46 type LargeFileByName []LargeFile | |
47 func (a LargeFileByName) Len() int { return len(a) } | |
48 func (a LargeFileByName) Swap(i, j int) { a[i], a[j] = a[j], a[i] } | |
49 func (a LargeFileByName) Less(i, j int) bool { return a[i].name < a[j].name } | |
50 */ | |
51 | |
52 type EntryError struct { | |
53 name string | |
54 err error | |
55 } | |
56 type EntryErrorByName []EntryError | |
57 | |
58 func (a EntryErrorByName) Len() int { return len(a) } | |
59 func (a EntryErrorByName) Swap(i, j int) { a[i], a[j] = a[j], a[i] } | |
60 func (a EntryErrorByName) Less(i, j int) bool { return a[i].name < a[j].name } | |
61 | |
62 func walkNoStatInternal(base string, files []string, smallfile_limit int64, obs WalkObserver) { | |
63 var errors []EntryError | |
64 var smallfiles []SmallFile | |
65 var largefiles []string | |
66 var dirs []EntryError | |
67 | |
68 for _, name := range files { | |
69 fname := filepath.Join(base, name) | |
70 file, err := os.Open(fname) | |
71 | |
72 if err != nil { | |
73 errors = append(errors, EntryError{fname, err}) | |
74 continue | |
75 } | |
76 | |
77 block := make([]byte, smallfile_limit) | |
78 count, err := file.Read(block) | |
79 if err != io.EOF && err != nil { | |
80 // Its probably a directory | |
81 dirs = append(dirs, EntryError{fname, err}) | |
82 continue | |
83 } | |
84 | |
85 // This file was bigger than the block size, stat it | |
86 if int64(count) == smallfile_limit { | |
87 /* | |
88 stat, err := file.Stat() | |
89 if err != nil { | |
90 errors = append(errors, EntryError{fname , err}) | |
91 continue | |
92 } | |
93 */ | |
94 largefiles = append(largefiles, fname) //LargeFile{name: fname, stat: &stat}) | |
95 | |
96 // This file was smaller than the block size | |
97 } else { | |
98 smallfiles = append(smallfiles, SmallFile{name: fname, d ata: block[:count]}) | |
99 } | |
100 file.Close() | |
101 } | |
102 | |
103 sort.Sort(SmallFileByName(smallfiles)) | |
104 for _, f := range smallfiles { | |
105 obs.SmallFile(f.name, f.data) | |
106 } | |
107 | |
108 sort.Strings(largefiles) | |
109 for _, fname := range largefiles { | |
110 obs.LargeFile(fname) | |
111 } | |
112 | |
113 sort.Sort(EntryErrorByName(dirs)) | |
114 for _, d := range dirs { | |
115 file, err := os.Open(d.name) | |
116 if err != nil { | |
117 obs.Error(d.name, d.err) | |
118 continue | |
119 } | |
120 | |
121 names, err := file.Readdirnames(0) | |
122 if err != nil { | |
123 obs.Error(d.name, d.err) | |
124 continue | |
125 } | |
126 walkNoStatInternal(d.name, names, smallfile_limit, obs) | |
127 } | |
128 } | |
129 | |
130 func WalkNoStat(root string, smallfile_limit int64, obs WalkObserver) { | |
131 paths := []string{root} | |
132 walkNoStatInternal("", paths, smallfile_limit, obs) | |
133 obs.Finished() | |
134 } | |
OLD | NEW |