Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(365)

Side by Side Diff: common/dirtools/walknostat.go

Issue 2014243002: WIP: Archive command which is much faster (Closed) Base URL: https://github.com/luci/luci-go@master
Patch Set: Fixes. Created 4 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« client/cmd/isolate/fastarchive.go ('K') | « common/dirtools/walker.go ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 // Copyright 2016 The LUCI Authors. All rights reserved.
2 // Use of this source code is governed under the Apache License, Version 2.0
3 // that can be found in the LICENSE file.
4
5 /**
6
7 This function works strangely for performance reasons, I'll try and explain belo w.
8
9 File systems have been heavily optimised for doing a directory walk in inode
10 order. It can be an order of magnitude faster to walk the directory this way.
11 *However*, we want out output to be in sorted order so it is deterministic.
12
13 Calling `stat` a file is one of the most expensive things you can do. It is
14 equivalent to reading 64/128k of data. Hence, if you have a lot of small files
15 then just reading their contents directly is more efficient.
16
17 **/
18 package dirtools
19
20 import (
21 "io"
22 "os"
23 "path/filepath"
24 "sort"
25 )
26
27 type SmallFile struct {
28 name string
29 data []byte
30 }
31 type SmallFileByName []SmallFile
32
33 func (a SmallFileByName) Len() int { return len(a) }
34 func (a SmallFileByName) Swap(i, j int) { a[i], a[j] = a[j], a[i] }
35 func (a SmallFileByName) Less(i, j int) bool { return a[i].name < a[j].name }
36
37 /*
38 type LargeFile struct {
39 name string
40 }
41 type LargeFileByName []LargeFile
42 func (a LargeFileByName) Len() int { return len(a) }
43 func (a LargeFileByName) Swap(i, j int) { a[i], a[j] = a[j], a[i] }
44 func (a LargeFileByName) Less(i, j int) bool { return a[i].name < a[j].name }
45 */
46
47 type EntryError struct {
48 name string
49 err error
50 }
51 type EntryErrorByName []EntryError
52
53 func (a EntryErrorByName) Len() int { return len(a) }
54 func (a EntryErrorByName) Swap(i, j int) { a[i], a[j] = a[j], a[i] }
55 func (a EntryErrorByName) Less(i, j int) bool { return a[i].name < a[j].name }
56
57 func walkNoStatInternal(base string, files []string, smallfile_limit int64, obs WalkObserver) {
58 var errors []EntryError
59 var smallfiles []SmallFile
60 var largefiles []string
61 var dirs []EntryError
62
63 for _, name := range files {
64 fname := filepath.Join(base, name)
65 file, err := os.Open(fname)
66
67 if err != nil {
68 errors = append(errors, EntryError{fname, err})
69 continue
70 }
71
72 block := make([]byte, smallfile_limit)
73 count, err := file.Read(block)
74 if err != io.EOF && err != nil {
75 // Its probably a directory
76 dirs = append(dirs, EntryError{fname, err})
77 continue
78 }
79
80 // This file was bigger than the block size, stat it
81 if int64(count) == smallfile_limit {
82 /*
83 stat, err := file.Stat()
84 if err != nil {
85 errors = append(errors, EntryError{fname , err})
86 continue
87 }
88 */
89 largefiles = append(largefiles, fname) //LargeFile{name: fname, stat: &stat})
90
91 // This file was smaller than the block size
92 } else {
93 smallfiles = append(smallfiles, SmallFile{name: fname, d ata: block[:count]})
94 }
95 file.Close()
96 }
97
98 sort.Sort(SmallFileByName(smallfiles))
99 for _, f := range smallfiles {
100 obs.SmallFile(f.name, f.data)
101 }
102
103 sort.Strings(largefiles)
104 for _, fname := range largefiles {
105 obs.LargeFile(fname)
106 }
107
108 sort.Sort(EntryErrorByName(dirs))
109 for _, d := range dirs {
110 file, err := os.Open(d.name)
111 if err != nil {
112 obs.Error(d.name, d.err)
113 continue
114 }
115
116 names, err := file.Readdirnames(0)
117 if err != nil {
118 obs.Error(d.name, d.err)
119 continue
120 }
121 walkNoStatInternal(d.name, names, smallfile_limit, obs)
122 }
123 }
124
125 func WalkNoStat(root string, smallfile_limit int64, obs WalkObserver) {
126 paths := []string{root}
127 walkNoStatInternal("", paths, smallfile_limit, obs)
128 }
OLDNEW
« client/cmd/isolate/fastarchive.go ('K') | « common/dirtools/walker.go ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698