OLD | NEW |
(Empty) | |
| 1 # -*- coding: utf-8 -*- |
| 2 # copyright 2003-2011 LOGILAB S.A. (Paris, FRANCE), all rights reserved. |
| 3 # contact http://www.logilab.fr/ -- mailto:contact@logilab.fr |
| 4 # |
| 5 # This file is part of logilab-common. |
| 6 # |
| 7 # logilab-common is free software: you can redistribute it and/or modify it unde
r |
| 8 # the terms of the GNU Lesser General Public License as published by the Free |
| 9 # Software Foundation, either version 2.1 of the License, or (at your option) an
y |
| 10 # later version. |
| 11 # |
| 12 # logilab-common is distributed in the hope that it will be useful, but WITHOUT |
| 13 # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS |
| 14 # FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more |
| 15 # details. |
| 16 # |
| 17 # You should have received a copy of the GNU Lesser General Public License along |
| 18 # with logilab-common. If not, see <http://www.gnu.org/licenses/>. |
| 19 """This is a DBF reader which reads Visual Fox Pro DBF format with Memo field |
| 20 |
| 21 Usage: |
| 22 |
| 23 >>> rec = readDbf('test.dbf') |
| 24 >>> for line in rec: |
| 25 >>> print line['name'] |
| 26 |
| 27 |
| 28 :date: 13/07/2007 |
| 29 |
| 30 http://www.physics.ox.ac.uk/users/santoso/Software.Repository.html |
| 31 page says code is "available as is without any warranty or support". |
| 32 """ |
| 33 from __future__ import print_function |
| 34 |
| 35 import struct |
| 36 import os, os.path |
| 37 import sys |
| 38 import csv |
| 39 import tempfile |
| 40 |
| 41 from six.moves import range |
| 42 |
| 43 class Dbase: |
| 44 def __init__(self): |
| 45 self.fdb = None |
| 46 self.fmemo = None |
| 47 self.db_data = None |
| 48 self.memo_data = None |
| 49 self.fields = None |
| 50 self.num_records = 0 |
| 51 self.header = None |
| 52 self.memo_file = '' |
| 53 self.memo_header = None |
| 54 self.memo_block_size = 0 |
| 55 self.memo_header_len = 0 |
| 56 |
| 57 def _drop_after_NULL(self, txt): |
| 58 for i in range(0, len(txt)): |
| 59 if ord(struct.unpack('c', txt[i])[0])==0: |
| 60 return txt[:i] |
| 61 return txt |
| 62 |
| 63 def _reverse_endian(self, num): |
| 64 if not len(num): |
| 65 return 0 |
| 66 val = struct.unpack('<L', num) |
| 67 val = struct.pack('>L', val[0]) |
| 68 val = struct.unpack('>L', val) |
| 69 return val[0] |
| 70 |
| 71 def _assign_ids(self, lst, ids): |
| 72 result = {} |
| 73 idx = 0 |
| 74 for item in lst: |
| 75 id = ids[idx] |
| 76 result[id] = item |
| 77 idx += 1 |
| 78 return result |
| 79 |
| 80 def open(self, db_name): |
| 81 filesize = os.path.getsize(db_name) |
| 82 if filesize <= 68: |
| 83 raise IOError('The file is not large enough to be a dbf file') |
| 84 |
| 85 self.fdb = open(db_name, 'rb') |
| 86 |
| 87 self.memo_file = '' |
| 88 if os.path.isfile(db_name[0:-1] + 't'): |
| 89 self.memo_file = db_name[0:-1] + 't' |
| 90 elif os.path.isfile(db_name[0:-3] + 'fpt'): |
| 91 self.memo_file = db_name[0:-3] + 'fpt' |
| 92 |
| 93 if self.memo_file: |
| 94 #Read memo file |
| 95 self.fmemo = open(self.memo_file, 'rb') |
| 96 self.memo_data = self.fmemo.read() |
| 97 self.memo_header = self._assign_ids(struct.unpack('>6x1H', self.memo
_data[:8]), ['Block size']) |
| 98 block_size = self.memo_header['Block size'] |
| 99 if not block_size: |
| 100 block_size = 512 |
| 101 self.memo_block_size = block_size |
| 102 self.memo_header_len = block_size |
| 103 memo_size = os.path.getsize(self.memo_file) |
| 104 |
| 105 #Start reading data file |
| 106 data = self.fdb.read(32) |
| 107 self.header = self._assign_ids(struct.unpack('<B 3B L 2H 20x', data), ['
id', 'Year', 'Month', 'Day', '# of Records', 'Header Size', 'Record Size']) |
| 108 self.header['id'] = hex(self.header['id']) |
| 109 |
| 110 self.num_records = self.header['# of Records'] |
| 111 data = self.fdb.read(self.header['Header Size']-34) |
| 112 self.fields = {} |
| 113 x = 0 |
| 114 header_pattern = '<11s c 4x B B 14x' |
| 115 ids = ['Field Name', 'Field Type', 'Field Length', 'Field Precision'] |
| 116 pattern_len = 32 |
| 117 for offset in range(0, len(data), 32): |
| 118 if ord(data[offset])==0x0d: |
| 119 break |
| 120 x += 1 |
| 121 data_subset = data[offset: offset+pattern_len] |
| 122 if len(data_subset) < pattern_len: |
| 123 data_subset += ' '*(pattern_len-len(data_subset)) |
| 124 self.fields[x] = self._assign_ids(struct.unpack(header_pattern, data
_subset), ids) |
| 125 self.fields[x]['Field Name'] = self._drop_after_NULL(self.fields[x][
'Field Name']) |
| 126 |
| 127 self.fdb.read(3) |
| 128 if self.header['# of Records']: |
| 129 data_size = (self.header['# of Records'] * self.header['Record Size'
]) - 1 |
| 130 self.db_data = self.fdb.read(data_size) |
| 131 else: |
| 132 self.db_data = '' |
| 133 self.row_format = '<' |
| 134 self.row_ids = [] |
| 135 self.row_len = 0 |
| 136 for key in self.fields: |
| 137 field = self.fields[key] |
| 138 self.row_format += '%ds ' % (field['Field Length']) |
| 139 self.row_ids.append(field['Field Name']) |
| 140 self.row_len += field['Field Length'] |
| 141 |
| 142 def close(self): |
| 143 if self.fdb: |
| 144 self.fdb.close() |
| 145 if self.fmemo: |
| 146 self.fmemo.close() |
| 147 |
| 148 def get_numrecords(self): |
| 149 return self.num_records |
| 150 |
| 151 def get_record_with_names(self, rec_no): |
| 152 """ |
| 153 This function accept record number from 0 to N-1 |
| 154 """ |
| 155 if rec_no < 0 or rec_no > self.num_records: |
| 156 raise Exception('Unable to extract data outside the range') |
| 157 |
| 158 offset = self.header['Record Size'] * rec_no |
| 159 data = self.db_data[offset:offset+self.row_len] |
| 160 record = self._assign_ids(struct.unpack(self.row_format, data), self.row
_ids) |
| 161 |
| 162 if self.memo_file: |
| 163 for key in self.fields: |
| 164 field = self.fields[key] |
| 165 f_type = field['Field Type'] |
| 166 f_name = field['Field Name'] |
| 167 c_data = record[f_name] |
| 168 |
| 169 if f_type=='M' or f_type=='G' or f_type=='B' or f_type=='P': |
| 170 c_data = self._reverse_endian(c_data) |
| 171 if c_data: |
| 172 record[f_name] = self.read_memo(c_data-1).strip() |
| 173 else: |
| 174 record[f_name] = c_data.strip() |
| 175 return record |
| 176 |
| 177 def read_memo_record(self, num, in_length): |
| 178 """ |
| 179 Read the record of given number. The second parameter is the length of |
| 180 the record to read. It can be undefined, meaning read the whole record, |
| 181 and it can be negative, meaning at most the length |
| 182 """ |
| 183 if in_length < 0: |
| 184 in_length = -self.memo_block_size |
| 185 |
| 186 offset = self.memo_header_len + num * self.memo_block_size |
| 187 self.fmemo.seek(offset) |
| 188 if in_length<0: |
| 189 in_length = -in_length |
| 190 if in_length==0: |
| 191 return '' |
| 192 return self.fmemo.read(in_length) |
| 193 |
| 194 def read_memo(self, num): |
| 195 result = '' |
| 196 buffer = self.read_memo_record(num, -1) |
| 197 if len(buffer)<=0: |
| 198 return '' |
| 199 length = struct.unpack('>L', buffer[4:4+4])[0] + 8 |
| 200 |
| 201 block_size = self.memo_block_size |
| 202 if length < block_size: |
| 203 return buffer[8:length] |
| 204 rest_length = length - block_size |
| 205 rest_data = self.read_memo_record(num+1, rest_length) |
| 206 if len(rest_data)<=0: |
| 207 return '' |
| 208 return buffer[8:] + rest_data |
| 209 |
| 210 def readDbf(filename): |
| 211 """ |
| 212 Read the DBF file specified by the filename and |
| 213 return the records as a list of dictionary. |
| 214 |
| 215 :param: filename File name of the DBF |
| 216 :return: List of rows |
| 217 """ |
| 218 db = Dbase() |
| 219 db.open(filename) |
| 220 num = db.get_numrecords() |
| 221 rec = [] |
| 222 for i in range(0, num): |
| 223 record = db.get_record_with_names(i) |
| 224 rec.append(record) |
| 225 db.close() |
| 226 return rec |
| 227 |
| 228 if __name__=='__main__': |
| 229 rec = readDbf('dbf/sptable.dbf') |
| 230 for line in rec: |
| 231 print('%s %s' % (line['GENUS'].strip(), line['SPECIES'].strip())) |
OLD | NEW |