Chromium Code Reviews| OLD | NEW |
|---|---|
| (Empty) | |
| 1 // Copyright 2016 The LUCI Authors. All rights reserved. | |
| 2 // Use of this source code is governed under the Apache License, Version 2.0 | |
| 3 // that can be found in the LICENSE file. | |
| 4 | |
| 5 /** | |
|
M-A Ruel
2016/09/15 14:31:03
same
mithro
2016/09/20 12:41:45
I assume you mean regarding the comment style?
Do
| |
| 6 | |
| 7 This function works strangely for performance reasons. | |
| 8 | |
| 9 File systems have been heavily optimised for doing a directory walk in inode | |
| 10 order. It can be an order of magnitude faster to walk the directory in this | |
| 11 order so we do. *However*, we want out output to be in sorted so it is | |
| 12 deterministic. | |
| 13 | |
| 14 Calling `stat` is also one of the most expensive things you can do (it is | |
| 15 roughly equivalent to reading 64/128k of data). Hence, if you have a lot of | |
| 16 small files then just reading their contents directly is more efficient. Rather | |
| 17 then doing the stat, we assume everything is a file and just try to read a | |
| 18 chunk. If the file is smaller than the block size, we know that we have the | |
| 19 entire contents. Otherwise we know the file is bigger and can decide to do the | |
| 20 stat. If the name turned out to be a directory, then we will get an error. | |
| 21 | |
| 22 **/ | |
| 23 package dirwalk | |
| 24 | |
| 25 import ( | |
| 26 "io" | |
| 27 "os" | |
| 28 "path/filepath" | |
| 29 "sort" | |
| 30 ) | |
| 31 | |
| 32 type SmallFile struct { | |
| 33 name string | |
| 34 data []byte | |
| 35 } | |
| 36 type SmallFileByName []SmallFile | |
| 37 | |
| 38 func (a SmallFileByName) Len() int { return len(a) } | |
| 39 func (a SmallFileByName) Swap(i, j int) { a[i], a[j] = a[j], a[i] } | |
| 40 func (a SmallFileByName) Less(i, j int) bool { return a[i].name < a[j].name } | |
| 41 | |
| 42 /* | |
| 43 type LargeFile struct { | |
| 44 name string | |
| 45 } | |
| 46 type LargeFileByName []LargeFile | |
| 47 func (a LargeFileByName) Len() int { return len(a) } | |
| 48 func (a LargeFileByName) Swap(i, j int) { a[i], a[j] = a[j], a[i] } | |
| 49 func (a LargeFileByName) Less(i, j int) bool { return a[i].name < a[j].name } | |
| 50 */ | |
| 51 | |
| 52 type EntryError struct { | |
| 53 name string | |
| 54 err error | |
| 55 } | |
| 56 type EntryErrorByName []EntryError | |
| 57 | |
| 58 func (a EntryErrorByName) Len() int { return len(a) } | |
| 59 func (a EntryErrorByName) Swap(i, j int) { a[i], a[j] = a[j], a[i] } | |
| 60 func (a EntryErrorByName) Less(i, j int) bool { return a[i].name < a[j].name } | |
| 61 | |
| 62 func walkNoStatInternal(base string, files []string, smallfile_limit int64, obs WalkObserver) { | |
| 63 var errors []EntryError | |
| 64 var smallfiles []SmallFile | |
| 65 var largefiles []string | |
| 66 var dirs []EntryError | |
| 67 | |
| 68 for _, name := range files { | |
| 69 fname := filepath.Join(base, name) | |
| 70 file, err := os.Open(fname) | |
| 71 | |
| 72 if err != nil { | |
| 73 errors = append(errors, EntryError{fname, err}) | |
| 74 continue | |
| 75 } | |
| 76 | |
| 77 block := make([]byte, smallfile_limit) | |
| 78 count, err := file.Read(block) | |
| 79 if err != io.EOF && err != nil { | |
| 80 // Its probably a directory | |
| 81 dirs = append(dirs, EntryError{fname, err}) | |
| 82 continue | |
| 83 } | |
| 84 | |
| 85 // This file was bigger than the block size, stat it | |
| 86 if int64(count) == smallfile_limit { | |
| 87 /* | |
| 88 stat, err := file.Stat() | |
| 89 if err != nil { | |
| 90 errors = append(errors, EntryError{fname , err}) | |
| 91 continue | |
| 92 } | |
| 93 */ | |
| 94 largefiles = append(largefiles, fname) //LargeFile{name: fname, stat: &stat}) | |
| 95 | |
| 96 // This file was smaller than the block size | |
| 97 } else { | |
| 98 smallfiles = append(smallfiles, SmallFile{name: fname, d ata: block[:count]}) | |
| 99 } | |
| 100 file.Close() | |
| 101 } | |
| 102 | |
| 103 sort.Sort(SmallFileByName(smallfiles)) | |
| 104 for _, f := range smallfiles { | |
| 105 obs.SmallFile(f.name, f.data) | |
| 106 } | |
| 107 | |
| 108 sort.Strings(largefiles) | |
| 109 for _, fname := range largefiles { | |
| 110 obs.LargeFile(fname) | |
| 111 } | |
| 112 | |
| 113 sort.Sort(EntryErrorByName(dirs)) | |
| 114 for _, d := range dirs { | |
| 115 file, err := os.Open(d.name) | |
| 116 if err != nil { | |
| 117 obs.Error(d.name, d.err) | |
| 118 continue | |
| 119 } | |
| 120 | |
| 121 names, err := file.Readdirnames(0) | |
| 122 if err != nil { | |
| 123 obs.Error(d.name, d.err) | |
| 124 continue | |
| 125 } | |
| 126 walkNoStatInternal(d.name, names, smallfile_limit, obs) | |
| 127 } | |
| 128 } | |
| 129 | |
| 130 func WalkNoStat(root string, smallfile_limit int64, obs WalkObserver) { | |
| 131 paths := []string{root} | |
| 132 walkNoStatInternal("", paths, smallfile_limit, obs) | |
| 133 obs.Finished() | |
| 134 } | |
| OLD | NEW |