Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(480)

Side by Side Diff: net/tools/ct_mapper/dump-ct.go

Issue 1238413004: Framework for iterating over certificates in CT database from Chromium code. (not for review) (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Make samples page work Created 3 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « net/tools/ct_mapper/ct_mapper_main.cc ('k') | net/tools/ct_mapper/dump-ct.sh » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 // This tool maps over a certificate database file (as written and read by
2 // github.com/agl/certificatetransparency), and dumps all the certificates to a
3 // file.
4 //
5 // The file is structured simply as a sequence of
6 // <byte count (4 bytes, big endian)>
7 // <certificate DER>
8 //
9 // To run this program you will need to do a setup like this:
10 //
11 // export GOPATH=<enter path here...>
12 // mkdir $GOPATH
13 // go get github.com/agl/certificatetransparency
14 // mkdir $GOPATH/mycttools
15 // ln -s $(realpath dump-all-ct-certs-to-file.go) $GOPATH/mycttools
16 // go run $GOPATH/mycttools/dump-all-ct-certs-to-file.go
17
18 package main
19
20 import (
21 "crypto/sha1"
22 "encoding/binary"
23 "fmt"
24 ct "github.com/agl/certificatetransparency"
25 "os"
26 "path"
27 "sync"
28 "time"
29 )
30
31 type EntryValue struct {
32 cert []byte
33 extraCertsIds []uint16
34 }
35
36 type ExtraCertsValue struct {
37 extraCerts [][]byte
38 }
39
40 type State struct {
41 // This lock must be held when modifying values in State.
42 lock *sync.Mutex
43
44 // A map from the hash of a certificate to its position (starting from
45 // 0). There are a small number of extra certificates (intermediates) on the
46 // order of thousands. It is much more efficient to just store them once,
47 // since they are duplicated many times in the paths, and can be compressed
48 // to a 16-bit integer.
49 extraCertHashToIdMap map[string]uint16
50
51 // The number of entries (certificate paths) visited.
52 numEntries int64
53
54 startTime time.Time
55
56 extraCertsWriterChannel chan ExtraCertsValue
57 entryWriterChannel chan EntryValue
58 }
59
60 func createFile(path string) *os.File {
61 file, err := os.Create(path)
62 if err != nil {
63 fmt.Fprintf(os.Stderr, "Failed to open output file %s: %s\n", pa th, err)
64 os.Exit(1)
65 }
66 return file
67 }
68
69 func initState(state *State) {
70 state.lock = new(sync.Mutex)
71 state.extraCertHashToIdMap = make(map[string]uint16)
72 state.numEntries = 0
73 state.startTime = time.Now()
74 state.extraCertsWriterChannel = make(chan ExtraCertsValue, 100)
75 state.entryWriterChannel = make(chan EntryValue, 100)
76 }
77
78 func closeState(state *State) {
79 close(state.extraCertsWriterChannel)
80 close(state.entryWriterChannel)
81 }
82
83 func calculateHashes(dataArray [][]byte) []string {
84 hashes := make([]string, len(dataArray))
85 for i, data := range dataArray {
86 hash := sha1.Sum(data)
87 hashes[i] = string(hash[:])
88 }
89 return hashes
90 }
91
92 func addEntry(state *State, entry *ct.Entry) {
93 if entry.Type != ct.X509Entry {
94 return
95 }
96
97 // The array of extra certificates.
98 extraCerts := entry.ExtraCerts
99
100 // Calculate hashes for each extra cert (outside the lock)
101 extraCertsHashes := calculateHashes(extraCerts)
102
103 extraCertsIds := make([]uint16, len(extraCerts))
104 newExtraCerts := make([][]byte, 0, len(extraCerts))
105
106 // --------------------
107 // WITHIN LOCK
108 // --------------------
109 state.lock.Lock()
110
111 state.numEntries += 1
112
113 // Fill extraCertsIds[] with the ID for each extraCert. If it is a
114 // newly seen extra cert, then add it to newExtraCerts.
115 for i, key := range extraCertsHashes {
116 id, found := state.extraCertHashToIdMap[key]
117 if !found {
118 id = uint16(len(state.extraCertHashToIdMap))
119 state.extraCertHashToIdMap[key] = id
120 newExtraCerts = append(newExtraCerts, extraCerts[i])
121 }
122 extraCertsIds[i] = id
123 }
124
125 state.extraCertsWriterChannel <- ExtraCertsValue{newExtraCerts}
126 state.entryWriterChannel <- EntryValue{entry.X509Cert, extraCertsIds}
127
128 numEntriesProcessed := state.numEntries
129
130 // --------------------
131 state.lock.Unlock()
132 // --------------------
133
134 // Print status update.
135 if numEntriesProcessed % 50000 == 0 {
136 elapsedMinutes := float64(time.Since(state.startTime)) / float64(time.Minute )
137 fraction := float64(numEntriesProcessed) / float64(10100000)
138 remainingMinutes := (float64(1) - fraction) * (elapsedMinutes / fraction)
139 fmt.Fprintf(os.Stderr, "[%.0f%%] Processed %d entries in %0.1f minutes (%0.1 f minutes remaining)\n", fraction * float64(100), numEntriesProcessed, elapsedMi nutes, remainingMinutes)
140 }
141 }
142
143 func appendCertToFile(certBytes []byte, outFile *os.File) {
144 // Write the certificate to the output file.
145 byteLen := uint32(len(certBytes))
146
147 err := binary.Write(outFile, binary.BigEndian, byteLen)
148 if err != nil {
149 fmt.Fprintf(os.Stderr, "binary.Write failed: %s\n", err)
150 os.Exit(1)
151 }
152
153 if _, err := outFile.Write(certBytes); err != nil {
154 fmt.Fprintf(os.Stderr, "Failed to write to output file: %s\n", err)
155 os.Exit(1)
156 }
157 }
158
159 func entriesFileWriterWorker(filepath string, channel chan EntryValue) {
160 file := createFile(filepath)
161 defer file.Close()
162
163 for value := range channel {
164 // Write the leaf cert.
165 appendCertToFile(value.cert, file)
166
167 // Write the number of extra certs.
168 err := binary.Write(file, binary.BigEndian, uint16(len(value.extraCertsIds)) )
169 if err != nil {
170 fmt.Fprintf(os.Stderr, "binary.Write failed: %s\n", err)
171 os.Exit(1)
172 }
173
174 // Write the ids of the extra certs.
175 err = binary.Write(file, binary.BigEndian, value.extraCertsIds)
176 if err != nil {
177 fmt.Fprintf(os.Stderr, "binary.Write failed: %s\n", err)
178 os.Exit(1)
179 }
180 }
181 }
182
183 func extraCertsFileWriterWorker(filepath string, channel chan ExtraCertsValue) {
184 file := createFile(filepath)
185 defer file.Close()
186
187 for value := range channel {
188 for _, cert := range value.extraCerts {
189 appendCertToFile(cert, file)
190 }
191 }
192 }
193
194 func main() {
195 if len(os.Args) != 3 {
196 fmt.Fprintf(os.Stderr, "Usage: %s <input CT-DB file> <output dir >\n", os.Args[0])
197 os.Exit(1)
198 }
199 inputDbFileName := os.Args[1]
200 outputPath := os.Args[2]
201
202 // Open the input database.
203 in, err := os.Open(inputDbFileName)
204 if err != nil {
205 fmt.Fprintf(os.Stderr, "Failed to open entries file: %s\n", err)
206 os.Exit(1)
207 }
208 defer in.Close()
209
210 inEntriesFile := ct.EntriesFile{in}
211
212 // Create the output directory and files.
213 err = os.Mkdir(outputPath, 0755)
214 if err != nil {
215 fmt.Fprintf(os.Stderr, "Couldn't create directory %s: %s\n", outputPath, err )
216 os.Exit(1)
217 }
218
219 // Create state shared by entries workers.
220 var state State
221 initState(&state)
222 defer closeState(&state)
223
224 // Create workers responsible for writing the files.
225 go entriesFileWriterWorker(path.Join(outputPath, "entries.bin"), state.entryWr iterChannel)
226 go extraCertsFileWriterWorker(path.Join(outputPath, "extra_certs.bin"), state. extraCertsWriterChannel)
227
228 // Create a worker that writes the extra certs
229
230 fmt.Fprintf(os.Stderr, "Dumping entries... (Time estimate is based on assumpti on there are 10 million entries in the CT database).\n")
231
232 inEntriesFile.Map(func(ent *ct.EntryAndPosition, err error) {
233 if err != nil {
234 return
235 }
236 addEntry(&state, ent.Entry)
237 })
238 }
OLDNEW
« no previous file with comments | « net/tools/ct_mapper/ct_mapper_main.cc ('k') | net/tools/ct_mapper/dump-ct.sh » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698