| OLD | NEW |
| (Empty) | |
| 1 // This tool maps over a certificate database file (as written and read by |
| 2 // github.com/agl/certificatetransparency), and dumps all the certificates to a |
| 3 // file. |
| 4 // |
| 5 // The file is structured simply as a sequence of |
| 6 // <byte count (4 bytes, big endian)> |
| 7 // <certificate DER> |
| 8 // |
| 9 // To run this program you will need to do a setup like this: |
| 10 // |
| 11 // export GOPATH=<enter path here...> |
| 12 // mkdir $GOPATH |
| 13 // go get github.com/agl/certificatetransparency |
| 14 // mkdir $GOPATH/mycttools |
| 15 // ln -s $(realpath dump-all-ct-certs-to-file.go) $GOPATH/mycttools |
| 16 // go run $GOPATH/mycttools/dump-all-ct-certs-to-file.go |
| 17 |
| 18 package main |
| 19 |
| 20 import ( |
| 21 "crypto/sha1" |
| 22 "encoding/binary" |
| 23 "fmt" |
| 24 ct "github.com/agl/certificatetransparency" |
| 25 "os" |
| 26 "path" |
| 27 "sync" |
| 28 "time" |
| 29 ) |
| 30 |
| 31 type EntryValue struct { |
| 32 cert []byte |
| 33 extraCertsIds []uint16 |
| 34 } |
| 35 |
| 36 type ExtraCertsValue struct { |
| 37 extraCerts [][]byte |
| 38 } |
| 39 |
| 40 type State struct { |
| 41 // This lock must be held when modifying values in State. |
| 42 lock *sync.Mutex |
| 43 |
| 44 // A map from the hash of a certificate to its position (starting from |
| 45 // 0). There are a small number of extra certificates (intermediates) on the |
| 46 // order of thousands. It is much more efficient to just store them once, |
| 47 // since they are duplicated many times in the paths, and can be compressed |
| 48 // to a 16-bit integer. |
| 49 extraCertHashToIdMap map[string]uint16 |
| 50 |
| 51 // The number of entries (certificate paths) visited. |
| 52 numEntries int64 |
| 53 |
| 54 startTime time.Time |
| 55 |
| 56 extraCertsWriterChannel chan ExtraCertsValue |
| 57 entryWriterChannel chan EntryValue |
| 58 } |
| 59 |
| 60 func createFile(path string) *os.File { |
| 61 file, err := os.Create(path) |
| 62 if err != nil { |
| 63 fmt.Fprintf(os.Stderr, "Failed to open output file %s: %s\n", pa
th, err) |
| 64 os.Exit(1) |
| 65 } |
| 66 return file |
| 67 } |
| 68 |
| 69 func initState(state *State) { |
| 70 state.lock = new(sync.Mutex) |
| 71 state.extraCertHashToIdMap = make(map[string]uint16) |
| 72 state.numEntries = 0 |
| 73 state.startTime = time.Now() |
| 74 state.extraCertsWriterChannel = make(chan ExtraCertsValue, 100) |
| 75 state.entryWriterChannel = make(chan EntryValue, 100) |
| 76 } |
| 77 |
| 78 func closeState(state *State) { |
| 79 close(state.extraCertsWriterChannel) |
| 80 close(state.entryWriterChannel) |
| 81 } |
| 82 |
| 83 func calculateHashes(dataArray [][]byte) []string { |
| 84 hashes := make([]string, len(dataArray)) |
| 85 for i, data := range dataArray { |
| 86 hash := sha1.Sum(data) |
| 87 hashes[i] = string(hash[:]) |
| 88 } |
| 89 return hashes |
| 90 } |
| 91 |
| 92 func addEntry(state *State, entry *ct.Entry) { |
| 93 if entry.Type != ct.X509Entry { |
| 94 return |
| 95 } |
| 96 |
| 97 // The array of extra certificates. |
| 98 extraCerts := entry.ExtraCerts |
| 99 |
| 100 // Calculate hashes for each extra cert (outside the lock) |
| 101 extraCertsHashes := calculateHashes(extraCerts) |
| 102 |
| 103 extraCertsIds := make([]uint16, len(extraCerts)) |
| 104 newExtraCerts := make([][]byte, 0, len(extraCerts)) |
| 105 |
| 106 // -------------------- |
| 107 // WITHIN LOCK |
| 108 // -------------------- |
| 109 state.lock.Lock() |
| 110 |
| 111 state.numEntries += 1 |
| 112 |
| 113 // Fill extraCertsIds[] with the ID for each extraCert. If it is a |
| 114 // newly seen extra cert, then add it to newExtraCerts. |
| 115 for i, key := range extraCertsHashes { |
| 116 id, found := state.extraCertHashToIdMap[key] |
| 117 if !found { |
| 118 id = uint16(len(state.extraCertHashToIdMap)) |
| 119 state.extraCertHashToIdMap[key] = id |
| 120 newExtraCerts = append(newExtraCerts, extraCerts[i]) |
| 121 } |
| 122 extraCertsIds[i] = id |
| 123 } |
| 124 |
| 125 state.extraCertsWriterChannel <- ExtraCertsValue{newExtraCerts} |
| 126 state.entryWriterChannel <- EntryValue{entry.X509Cert, extraCertsIds} |
| 127 |
| 128 numEntriesProcessed := state.numEntries |
| 129 |
| 130 // -------------------- |
| 131 state.lock.Unlock() |
| 132 // -------------------- |
| 133 |
| 134 // Print status update. |
| 135 if numEntriesProcessed % 50000 == 0 { |
| 136 elapsedMinutes := float64(time.Since(state.startTime)) / float64(time.Minute
) |
| 137 fraction := float64(numEntriesProcessed) / float64(10100000) |
| 138 remainingMinutes := (float64(1) - fraction) * (elapsedMinutes / fraction) |
| 139 fmt.Fprintf(os.Stderr, "[%.0f%%] Processed %d entries in %0.1f minutes (%0.1
f minutes remaining)\n", fraction * float64(100), numEntriesProcessed, elapsedMi
nutes, remainingMinutes) |
| 140 } |
| 141 } |
| 142 |
| 143 func appendCertToFile(certBytes []byte, outFile *os.File) { |
| 144 // Write the certificate to the output file. |
| 145 byteLen := uint32(len(certBytes)) |
| 146 |
| 147 err := binary.Write(outFile, binary.BigEndian, byteLen) |
| 148 if err != nil { |
| 149 fmt.Fprintf(os.Stderr, "binary.Write failed: %s\n", err) |
| 150 os.Exit(1) |
| 151 } |
| 152 |
| 153 if _, err := outFile.Write(certBytes); err != nil { |
| 154 fmt.Fprintf(os.Stderr, "Failed to write to output file: %s\n", err) |
| 155 os.Exit(1) |
| 156 } |
| 157 } |
| 158 |
| 159 func entriesFileWriterWorker(filepath string, channel chan EntryValue) { |
| 160 file := createFile(filepath) |
| 161 defer file.Close() |
| 162 |
| 163 for value := range channel { |
| 164 // Write the leaf cert. |
| 165 appendCertToFile(value.cert, file) |
| 166 |
| 167 // Write the number of extra certs. |
| 168 err := binary.Write(file, binary.BigEndian, uint16(len(value.extraCertsIds))
) |
| 169 if err != nil { |
| 170 fmt.Fprintf(os.Stderr, "binary.Write failed: %s\n", err) |
| 171 os.Exit(1) |
| 172 } |
| 173 |
| 174 // Write the ids of the extra certs. |
| 175 err = binary.Write(file, binary.BigEndian, value.extraCertsIds) |
| 176 if err != nil { |
| 177 fmt.Fprintf(os.Stderr, "binary.Write failed: %s\n", err) |
| 178 os.Exit(1) |
| 179 } |
| 180 } |
| 181 } |
| 182 |
| 183 func extraCertsFileWriterWorker(filepath string, channel chan ExtraCertsValue) { |
| 184 file := createFile(filepath) |
| 185 defer file.Close() |
| 186 |
| 187 for value := range channel { |
| 188 for _, cert := range value.extraCerts { |
| 189 appendCertToFile(cert, file) |
| 190 } |
| 191 } |
| 192 } |
| 193 |
| 194 func main() { |
| 195 if len(os.Args) != 3 { |
| 196 fmt.Fprintf(os.Stderr, "Usage: %s <input CT-DB file> <output dir
>\n", os.Args[0]) |
| 197 os.Exit(1) |
| 198 } |
| 199 inputDbFileName := os.Args[1] |
| 200 outputPath := os.Args[2] |
| 201 |
| 202 // Open the input database. |
| 203 in, err := os.Open(inputDbFileName) |
| 204 if err != nil { |
| 205 fmt.Fprintf(os.Stderr, "Failed to open entries file: %s\n", err) |
| 206 os.Exit(1) |
| 207 } |
| 208 defer in.Close() |
| 209 |
| 210 inEntriesFile := ct.EntriesFile{in} |
| 211 |
| 212 // Create the output directory and files. |
| 213 err = os.Mkdir(outputPath, 0755) |
| 214 if err != nil { |
| 215 fmt.Fprintf(os.Stderr, "Couldn't create directory %s: %s\n", outputPath, err
) |
| 216 os.Exit(1) |
| 217 } |
| 218 |
| 219 // Create state shared by entries workers. |
| 220 var state State |
| 221 initState(&state) |
| 222 defer closeState(&state) |
| 223 |
| 224 // Create workers responsible for writing the files. |
| 225 go entriesFileWriterWorker(path.Join(outputPath, "entries.bin"), state.entryWr
iterChannel) |
| 226 go extraCertsFileWriterWorker(path.Join(outputPath, "extra_certs.bin"), state.
extraCertsWriterChannel) |
| 227 |
| 228 // Create a worker that writes the extra certs |
| 229 |
| 230 fmt.Fprintf(os.Stderr, "Dumping entries... (Time estimate is based on assumpti
on there are 10 million entries in the CT database).\n") |
| 231 |
| 232 inEntriesFile.Map(func(ent *ct.EntryAndPosition, err error) { |
| 233 if err != nil { |
| 234 return |
| 235 } |
| 236 addEntry(&state, ent.Entry) |
| 237 }) |
| 238 } |
| OLD | NEW |