OLD | NEW |
1 # copyright 2003-2012 LOGILAB S.A. (Paris, FRANCE), all rights reserved. | 1 # copyright 2003-2011 LOGILAB S.A. (Paris, FRANCE), all rights reserved. |
2 # contact http://www.logilab.fr/ -- mailto:contact@logilab.fr | 2 # contact http://www.logilab.fr/ -- mailto:contact@logilab.fr |
3 # | 3 # |
4 # This file is part of logilab-common. | 4 # This file is part of logilab-common. |
5 # | 5 # |
6 # logilab-common is free software: you can redistribute it and/or modify it unde
r | 6 # logilab-common is free software: you can redistribute it and/or modify it unde
r |
7 # the terms of the GNU Lesser General Public License as published by the Free | 7 # the terms of the GNU Lesser General Public License as published by the Free |
8 # Software Foundation, either version 2.1 of the License, or (at your option) an
y | 8 # Software Foundation, either version 2.1 of the License, or (at your option) an
y |
9 # later version. | 9 # later version. |
10 # | 10 # |
11 # logilab-common is distributed in the hope that it will be useful, but WITHOUT | 11 # logilab-common is distributed in the hope that it will be useful, but WITHOUT |
12 # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS | 12 # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS |
13 # FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more | 13 # FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more |
14 # details. | 14 # details. |
15 # | 15 # |
16 # You should have received a copy of the GNU Lesser General Public License along | 16 # You should have received a copy of the GNU Lesser General Public License along |
17 # with logilab-common. If not, see <http://www.gnu.org/licenses/>. | 17 # with logilab-common. If not, see <http://www.gnu.org/licenses/>. |
18 """Unicode email support (extends email from stdlib)""" | 18 """Unicode email support (extends email from stdlib). |
19 | 19 |
| 20 |
| 21 |
| 22 |
| 23 """ |
20 __docformat__ = "restructuredtext en" | 24 __docformat__ = "restructuredtext en" |
21 | 25 |
22 import email | 26 import email |
23 from encodings import search_function | 27 from encodings import search_function |
24 import sys | 28 import sys |
25 if sys.version_info >= (2, 5): | 29 if sys.version_info >= (2, 5): |
26 from email.utils import parseaddr, parsedate | 30 from email.utils import parseaddr, parsedate |
27 from email.header import decode_header | 31 from email.header import decode_header |
28 else: | 32 else: |
29 from email.Utils import parseaddr, parsedate | 33 from email.Utils import parseaddr, parsedate |
30 from email.Header import decode_header | 34 from email.Header import decode_header |
31 | 35 |
32 from datetime import datetime | 36 from datetime import datetime |
33 | 37 |
34 try: | 38 try: |
35 from mx.DateTime import DateTime | 39 from mx.DateTime import DateTime |
36 except ImportError: | 40 except ImportError: |
37 DateTime = datetime | 41 DateTime = datetime |
38 | 42 |
39 import logilab.common as lgc | 43 import logilab.common as lgc |
40 | 44 |
41 | 45 |
42 def decode_QP(string): | 46 def decode_QP(string): |
43 parts = [] | 47 parts = [] |
44 for decoded, charset in decode_header(string): | 48 for decoded, charset in decode_header(string): |
45 if not charset : | 49 if not charset : |
46 charset = 'iso-8859-15' | 50 charset = 'iso-8859-15' |
47 parts.append(decoded.decode(charset, 'replace')) | 51 parts.append(unicode(decoded, charset, 'replace')) |
48 | 52 |
49 if sys.version_info < (3, 3): | 53 return u' '.join(parts) |
50 # decoding was non-RFC compliant wrt to whitespace handling | |
51 # see http://bugs.python.org/issue1079 | |
52 return u' '.join(parts) | |
53 return u''.join(parts) | |
54 | 54 |
55 def message_from_file(fd): | 55 def message_from_file(fd): |
56 try: | 56 try: |
57 return UMessage(email.message_from_file(fd)) | 57 return UMessage(email.message_from_file(fd)) |
58 except email.Errors.MessageParseError: | 58 except email.Errors.MessageParseError: |
59 return '' | 59 return '' |
60 | 60 |
61 def message_from_string(string): | 61 def message_from_string(string): |
62 try: | 62 try: |
63 return UMessage(email.message_from_string(string)) | 63 return UMessage(email.message_from_string(string)) |
64 except email.Errors.MessageParseError: | 64 except email.Errors.MessageParseError: |
65 return '' | 65 return '' |
66 | 66 |
67 class UMessage: | 67 class UMessage: |
68 """Encapsulates an email.Message instance and returns only unicode objects. | 68 """Encapsulates an email.Message instance and returns only unicode objects. |
69 """ | 69 """ |
70 | 70 |
71 def __init__(self, message): | 71 def __init__(self, message): |
72 self.message = message | 72 self.message = message |
73 | 73 |
74 # email.Message interface ################################################# | 74 # email.Message interface ################################################# |
75 | 75 |
76 def get(self, header, default=None): | 76 def get(self, header, default=None): |
77 value = self.message.get(header, default) | 77 value = self.message.get(header, default) |
78 if value: | 78 if value: |
79 return decode_QP(value) | 79 return decode_QP(value) |
80 return value | 80 return value |
81 | 81 |
82 def __getitem__(self, header): | |
83 return self.get(header) | |
84 | |
85 def get_all(self, header, default=()): | 82 def get_all(self, header, default=()): |
86 return [decode_QP(val) for val in self.message.get_all(header, default) | 83 return [decode_QP(val) for val in self.message.get_all(header, default) |
87 if val is not None] | 84 if val is not None] |
88 | 85 |
| 86 def get_payload(self, index=None, decode=False): |
| 87 message = self.message |
| 88 if index is None: |
| 89 payload = message.get_payload(index, decode) |
| 90 if isinstance(payload, list): |
| 91 return [UMessage(msg) for msg in payload] |
| 92 if message.get_content_maintype() != 'text': |
| 93 return payload |
| 94 |
| 95 charset = message.get_content_charset() or 'iso-8859-1' |
| 96 if search_function(charset) is None: |
| 97 charset = 'iso-8859-1' |
| 98 return unicode(payload or '', charset, "replace") |
| 99 else: |
| 100 payload = UMessage(message.get_payload(index, decode)) |
| 101 return payload |
| 102 |
89 def is_multipart(self): | 103 def is_multipart(self): |
90 return self.message.is_multipart() | 104 return self.message.is_multipart() |
91 | 105 |
92 def get_boundary(self): | 106 def get_boundary(self): |
93 return self.message.get_boundary() | 107 return self.message.get_boundary() |
94 | 108 |
95 def walk(self): | 109 def walk(self): |
96 for part in self.message.walk(): | 110 for part in self.message.walk(): |
97 yield UMessage(part) | 111 yield UMessage(part) |
98 | 112 |
99 if sys.version_info < (3, 0): | 113 def get_content_maintype(self): |
| 114 return unicode(self.message.get_content_maintype()) |
100 | 115 |
101 def get_payload(self, index=None, decode=False): | 116 def get_content_type(self): |
102 message = self.message | 117 return unicode(self.message.get_content_type()) |
103 if index is None: | |
104 payload = message.get_payload(index, decode) | |
105 if isinstance(payload, list): | |
106 return [UMessage(msg) for msg in payload] | |
107 if message.get_content_maintype() != 'text': | |
108 return payload | |
109 | 118 |
110 charset = message.get_content_charset() or 'iso-8859-1' | 119 def get_filename(self, failobj=None): |
111 if search_function(charset) is None: | 120 value = self.message.get_filename(failobj) |
112 charset = 'iso-8859-1' | 121 if value is failobj: |
113 return unicode(payload or '', charset, "replace") | 122 return value |
114 else: | 123 try: |
115 payload = UMessage(message.get_payload(index, decode)) | 124 return unicode(value) |
116 return payload | 125 except UnicodeDecodeError: |
117 | 126 return u'error decoding filename' |
118 def get_content_maintype(self): | |
119 return unicode(self.message.get_content_maintype()) | |
120 | |
121 def get_content_type(self): | |
122 return unicode(self.message.get_content_type()) | |
123 | |
124 def get_filename(self, failobj=None): | |
125 value = self.message.get_filename(failobj) | |
126 if value is failobj: | |
127 return value | |
128 try: | |
129 return unicode(value) | |
130 except UnicodeDecodeError: | |
131 return u'error decoding filename' | |
132 | |
133 else: | |
134 | |
135 def get_payload(self, index=None, decode=False): | |
136 message = self.message | |
137 if index is None: | |
138 payload = message.get_payload(index, decode) | |
139 if isinstance(payload, list): | |
140 return [UMessage(msg) for msg in payload] | |
141 return payload | |
142 else: | |
143 payload = UMessage(message.get_payload(index, decode)) | |
144 return payload | |
145 | |
146 def get_content_maintype(self): | |
147 return self.message.get_content_maintype() | |
148 | |
149 def get_content_type(self): | |
150 return self.message.get_content_type() | |
151 | |
152 def get_filename(self, failobj=None): | |
153 return self.message.get_filename(failobj) | |
154 | 127 |
155 # other convenience methods ############################################### | 128 # other convenience methods ############################################### |
156 | 129 |
157 def headers(self): | 130 def headers(self): |
158 """return an unicode string containing all the message's headers""" | 131 """return an unicode string containing all the message's headers""" |
159 values = [] | 132 values = [] |
160 for header in self.message.keys(): | 133 for header in self.message.keys(): |
161 values.append(u'%s: %s' % (header, self.get(header))) | 134 values.append(u'%s: %s' % (header, self.get(header))) |
162 return '\n'.join(values) | 135 return '\n'.join(values) |
163 | 136 |
(...skipping 21 matching lines...) Expand all Loading... |
185 pass | 158 pass |
186 if value is not None: | 159 if value is not None: |
187 datetuple = parsedate(value) | 160 datetuple = parsedate(value) |
188 if datetuple: | 161 if datetuple: |
189 if lgc.USE_MX_DATETIME: | 162 if lgc.USE_MX_DATETIME: |
190 return DateTime(*datetuple[:6]) | 163 return DateTime(*datetuple[:6]) |
191 return datetime(*datetuple[:6]) | 164 return datetime(*datetuple[:6]) |
192 elif not return_str: | 165 elif not return_str: |
193 return None | 166 return None |
194 return value | 167 return value |
OLD | NEW |