Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(129)

Side by Side Diff: common/dirwalk/walknostat.go

Issue 2054763004: luci-go/common/dirwalk: Code for walking a directory tree efficiently Base URL: https://github.com/luci/luci-go@master
Patch Set: Small updates. Created 4 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 // Copyright 2016 The LUCI Authors. All rights reserved.
2 // Use of this source code is governed under the Apache License, Version 2.0
3 // that can be found in the LICENSE file.
4
5 /**
M-A Ruel 2016/09/15 14:31:03 same
mithro 2016/09/20 12:41:45 I assume you mean regarding the comment style? Do
6
7 This function works strangely for performance reasons.
8
9 File systems have been heavily optimised for doing a directory walk in inode
10 order. It can be an order of magnitude faster to walk the directory in this
11 order so we do. *However*, we want out output to be in sorted so it is
12 deterministic.
13
14 Calling `stat` is also one of the most expensive things you can do (it is
15 roughly equivalent to reading 64/128k of data). Hence, if you have a lot of
16 small files then just reading their contents directly is more efficient. Rather
17 then doing the stat, we assume everything is a file and just try to read a
18 chunk. If the file is smaller than the block size, we know that we have the
19 entire contents. Otherwise we know the file is bigger and can decide to do the
20 stat. If the name turned out to be a directory, then we will get an error.
21
22 **/
23 package dirwalk
24
25 import (
26 "io"
27 "os"
28 "path/filepath"
29 "sort"
30 )
31
32 type SmallFile struct {
33 name string
34 data []byte
35 }
36 type SmallFileByName []SmallFile
37
38 func (a SmallFileByName) Len() int { return len(a) }
39 func (a SmallFileByName) Swap(i, j int) { a[i], a[j] = a[j], a[i] }
40 func (a SmallFileByName) Less(i, j int) bool { return a[i].name < a[j].name }
41
42 /*
43 type LargeFile struct {
44 name string
45 }
46 type LargeFileByName []LargeFile
47 func (a LargeFileByName) Len() int { return len(a) }
48 func (a LargeFileByName) Swap(i, j int) { a[i], a[j] = a[j], a[i] }
49 func (a LargeFileByName) Less(i, j int) bool { return a[i].name < a[j].name }
50 */
51
52 type EntryError struct {
53 name string
54 err error
55 }
56 type EntryErrorByName []EntryError
57
58 func (a EntryErrorByName) Len() int { return len(a) }
59 func (a EntryErrorByName) Swap(i, j int) { a[i], a[j] = a[j], a[i] }
60 func (a EntryErrorByName) Less(i, j int) bool { return a[i].name < a[j].name }
61
62 func walkNoStatInternal(base string, files []string, smallfile_limit int64, obs WalkObserver) {
63 var errors []EntryError
64 var smallfiles []SmallFile
65 var largefiles []string
66 var dirs []EntryError
67
68 for _, name := range files {
69 fname := filepath.Join(base, name)
70 file, err := os.Open(fname)
71
72 if err != nil {
73 errors = append(errors, EntryError{fname, err})
74 continue
75 }
76
77 block := make([]byte, smallfile_limit)
78 count, err := file.Read(block)
79 if err != io.EOF && err != nil {
80 // Its probably a directory
81 dirs = append(dirs, EntryError{fname, err})
82 continue
83 }
84
85 // This file was bigger than the block size, stat it
86 if int64(count) == smallfile_limit {
87 /*
88 stat, err := file.Stat()
89 if err != nil {
90 errors = append(errors, EntryError{fname , err})
91 continue
92 }
93 */
94 largefiles = append(largefiles, fname) //LargeFile{name: fname, stat: &stat})
95
96 // This file was smaller than the block size
97 } else {
98 smallfiles = append(smallfiles, SmallFile{name: fname, d ata: block[:count]})
99 }
100 file.Close()
101 }
102
103 sort.Sort(SmallFileByName(smallfiles))
104 for _, f := range smallfiles {
105 obs.SmallFile(f.name, f.data)
106 }
107
108 sort.Strings(largefiles)
109 for _, fname := range largefiles {
110 obs.LargeFile(fname)
111 }
112
113 sort.Sort(EntryErrorByName(dirs))
114 for _, d := range dirs {
115 file, err := os.Open(d.name)
116 if err != nil {
117 obs.Error(d.name, d.err)
118 continue
119 }
120
121 names, err := file.Readdirnames(0)
122 if err != nil {
123 obs.Error(d.name, d.err)
124 continue
125 }
126 walkNoStatInternal(d.name, names, smallfile_limit, obs)
127 }
128 }
129
130 func WalkNoStat(root string, smallfile_limit int64, obs WalkObserver) {
131 paths := []string{root}
132 walkNoStatInternal("", paths, smallfile_limit, obs)
133 obs.Finished()
134 }
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698