1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21 """ This module contains general functions for processing mail locally
22 e.g. for filtering and classification.
23 """
24
25 import re
26 import subprocess
27 import tempfile
28 import os
29 from email.generator import Generator
30 from cStringIO import StringIO
31
32 from ProcImap.ImapMessage import ImapMessage
33
35 """ This class wraps around a file containing emailadresses.
36 It is intended to help with Whitelisting, Blacklisting, etc.
37 """
38 - def __init__(self, filename, inmemory=False, regexes=False):
39 """ Initialize AddressListFile:
40 If inmemory is True, the file is loaded into memory.
41 If regexes is True, the lines in the file are compiled
42 as regexes.
43 """
44 self._cache = {}
45 self._data = []
46 self.filename = filename
47 self._inmemory = inmemory
48 self._use_regexes = regexes
49 if self._inmemory:
50 if self._use_regexes:
51 infile = open(filename)
52 for line in infile:
53 self._data.append(re.compile(line.strip()))
54 infile.close()
55 else:
56 infile = open(filename)
57 self._data = infile.read().split("\n")
58 self._data = [x for x in self._data if x != '']
59 infile.close()
61 """ Return True if there is a line in the represented file that is
62 contained in lookupstring. E.g., if you search for
63 'someone@gmail.com' and the file contains a line '@gmail.com',
64 True is returned.
65 If regexes are used, return True if there is a regex in the file
66 that matches the lookupstring completely.
67 """
68 if lookupstring is None:
69 return False
70 if self._cache.has_key(lookupstring):
71 return self._cache[lookupstring]
72 if self._use_regexes:
73 if self._inmemory:
74 for regex in self._data:
75 if regex.match(lookupstring):
76 self._cache[lookupstring] = True
77 return True
78 else:
79 infile = open(self.filename)
80 for line in infile:
81 regex = re.compile(line.strip())
82 if regex.match(lookupstring):
83 self._cache[lookupstring] = True
84 return True
85 infile.close()
86 else:
87 if self._inmemory:
88 for in_file_string in self._data:
89 if in_file_string in lookupstring:
90 self._cache[lookupstring] = True
91 return True
92 else:
93 infile = open(self.filename)
94 for line in infile:
95 line = line.strip()
96 if line in lookupstring:
97 self._cache[lookupstring] = True
98 return True
99 infile.close()
100 self._cache[lookupstring] = False
101 return False
102 - def add(self, line):
103 """ Add line to self.filename """
104 outfile = open(self.filename, "a")
105 outfile.write(line)
106 if not line[-1] == "\n":
107 outfile.write("\n")
108 outfile.close()
109
110
111
113 """ This class wraps around a file containing email address
114 replacements.
115 The text file contains lines such as
116 noreply@couchsurfing.com :: Couchsurfing <noreply@couchsurfing.com>
117 The intention is to to replace 'noreply@couchsurfing.com'
118 with 'Couchsurfing <noreply@couchsurfing.com>'.
119 You can use this to make the from-line look nice in your email
120 reader, if people send you crippled from-lines.
121 """
122 - def __init__(self, filename, inmemory=False, regexes=False, partial=False):
123 self._cache = {}
124 self._data = None
125 if regexes:
126 self._data = []
127 else:
128 self._data = {}
129 self.filename = filename
130 self._inmemory = inmemory
131 self._use_regexes = regexes
132 self._partial = partial
133 if self._inmemory:
134 if self._use_regexes:
135 infile = open(filename)
136 for line in infile:
137 (original, replacement) = line.split("::", 1)
138 original = re.compile(original.strip())
139 replacement = replacement.strip()
140 self._data.append((original, replacement))
141 infile.close()
142 else:
143 infile = open(filename)
144 for line in infile:
145 (original, replacement) = line.split("::", 1)
146 original = original.strip()
147 replacement = replacement.strip()
148 self._data[original] = replacement
149 infile.close()
150 - def lookup(self, searchstring):
151 """ Return a replacement. If no replacement is found, return
152 the searchstring.
153 """
154 if searchstring is None:
155 return None
156 if self._cache.has_key(searchstring):
157 return self._cache[searchstring]
158 if self._use_regexes:
159 if self._inmemory:
160 for (regex, replacement) in self._data:
161 if regex.match(searchstring):
162 self._cache[searchstring] = replacement
163 return replacement
164 else:
165 infile = open(self.filename)
166 for line in infile:
167 (original, replacement) = line.split("::", 1)
168 original = re.compile(original.strip())
169 replacement = replacement.strip()
170 regex = re.compile(line[:-1])
171 if regex.match(searchstring):
172 self._cache[searchstring] = replacement
173 return replacement
174 infile.close()
175 else:
176 if self._inmemory:
177 if self._data.has_key(searchstring):
178 replacement = self._data[searchstring]
179 self._cache[searchstring] = replacement
180 return replacement
181 else:
182 if self._partial:
183 for (original, replacement) in self._data.items():
184 if original in searchstring:
185 self._cache[searchstring] = replacement
186 return replacement
187 else:
188 return searchstring
189 else:
190 infile = open(self.filename)
191 for line in infile:
192 (original, replacement) = line.split("::", 1)
193 original = original.strip()
194 replacement = replacement.strip()
195 if original in searchstring:
196 self._cache[searchstring] = replacement
197 return replacement
198 infile.close()
199 self._cache[searchstring] = searchstring
200 return searchstring
201 - def add(self, line):
202 """ Add line to self.filename """
203 outfile = open(self.filename, "a")
204 outfile.write(line)
205 if not line[-1] == "\n":
206 outfile.write("\n")
207 outfile.close()
208
209
210
212 """ Pipe the message through a shell command:
213 cat message | commmand > message
214 message is assumed to be an instance of ImapMessage
215 Returns modified message as instance of ImapMessage
216 """
217 p = subprocess.Popen([command], shell=True,
218 stdin=subprocess.PIPE, stdout=subprocess.PIPE, close_fds=True)
219 (child_stdout, child_stdin) = (p.stdout, p.stdin)
220
221 memoryfile = StringIO()
222 generator = Generator(memoryfile, mangle_from_=False, maxheaderlen=60)
223 generator.flatten(message)
224 child_stdin.write(memoryfile.getvalue())
225 child_stdin.close()
226 modified_message = ImapMessage(child_stdout)
227 child_stdout.close()
228 modified_message.set_imapflags(message.get_imapflags())
229 modified_message.internaldate = message.internaldate
230 if hasattr(message, 'myflags'):
231 modified_message.myflags = message.myflags
232 if hasattr(message, 'mailbox'):
233 modified_message.mailbox = message.mailbox
234 return modified_message
235
237 """ This takes a string or unicode string in unknown encoding, tries to
238 guess the encoding and to replace Latin-1 characters with something
239 equivalent in 7-bit ASCII. Decoding an unknown string is based on
240 heuristics. This function may return complete garbage.
241 The function returns a plain ASCII string, making a best effort to
242 convert Latin-1 characters into ASCII equivalents. It does not just
243 strip out the Latin-1 characters. All characters in the standard 7-bit
244 ASCII range are preserved. In the 8th bit range all the Latin-1
245 accented letters are converted to unaccented equivalents. Most symbol
246 characters are converted to something meaningful. Anything not
247 converted is deleted.
248
249 Adapted from
250 http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/251871
251 """
252 xlate = {
253
254 u'\N{ACUTE ACCENT}' : ( "", 0),
255 u'\N{BROKEN BAR}' : ( '|', 0),
256 u'\N{CEDILLA}' : ( '', 0),
257 u'\N{CENT SIGN}' : ( ' cent', 0),
258 u'\N{COPYRIGHT SIGN}' : ( '(c)', 1),
259 u'\N{CURRENCY SIGN}' : ( '', 0),
260 u'\N{DEGREE SIGN}' : ( '', 1),
261 u'\N{DIAERESIS}' : ( '', 0),
262 u'\N{DIVISION SIGN}' : ( '/', 1),
263 u'\N{FEMININE ORDINAL INDICATOR}' : ( '', 0),
264 u'\N{INVERTED EXCLAMATION MARK}' : ( '!', 1),
265 u'\N{INVERTED QUESTION MARK}' : ( '?', 1),
266 u'\N{LATIN CAPITAL LETTER A WITH ACUTE}' : ( 'A', 1),
267 u'\N{LATIN CAPITAL LETTER A WITH CIRCUMFLEX}' : ( 'A', 1),
268 u'\N{LATIN CAPITAL LETTER A WITH DIAERESIS}' : ( 'Ae', 1),
269 u'\N{LATIN CAPITAL LETTER A WITH GRAVE}' : ( 'A', 1),
270 u'\N{LATIN CAPITAL LETTER A WITH RING ABOVE}' : ( 'A', 1),
271 u'\N{LATIN CAPITAL LETTER A WITH TILDE}' : ( 'A', 1),
272 u'\N{LATIN CAPITAL LETTER AE}' : ( 'Ae', 2),
273 u'\N{LATIN CAPITAL LETTER C WITH CEDILLA}' : ( 'C', 1),
274 u'\N{LATIN CAPITAL LETTER E WITH ACUTE}' : ( 'E', 1),
275 u'\N{LATIN CAPITAL LETTER E WITH CIRCUMFLEX}' : ( 'E', 1),
276 u'\N{LATIN CAPITAL LETTER E WITH DIAERESIS}' : ( 'E', 1),
277 u'\N{LATIN CAPITAL LETTER E WITH GRAVE}' : ( 'E', 1),
278 u'\N{LATIN CAPITAL LETTER ETH}' : ( 'Th', 1),
279 u'\N{LATIN CAPITAL LETTER I WITH ACUTE}' : ( 'I', 1),
280 u'\N{LATIN CAPITAL LETTER I WITH CIRCUMFLEX}' : ( 'I', 1),
281 u'\N{LATIN CAPITAL LETTER I WITH DIAERESIS}' : ( 'I', 1),
282 u'\N{LATIN CAPITAL LETTER I WITH GRAVE}' : ( 'I', 1),
283 u'\N{LATIN CAPITAL LETTER N WITH TILDE}' : ( 'N', 1),
284 u'\N{LATIN CAPITAL LETTER O WITH ACUTE}' : ( 'O', 1),
285 u'\N{LATIN CAPITAL LETTER O WITH CIRCUMFLEX}' : ( 'O', 1),
286 u'\N{LATIN CAPITAL LETTER O WITH DIAERESIS}' : ( 'Oe', 2),
287 u'\N{LATIN CAPITAL LETTER O WITH GRAVE}' : ( 'O', 1),
288 u'\N{LATIN CAPITAL LETTER O WITH STROKE}' : ( 'O', 1),
289 u'\N{LATIN CAPITAL LETTER O WITH TILDE}' : ( 'O', 1),
290 u'\N{LATIN CAPITAL LETTER THORN}' : ( 'th', 1),
291 u'\N{LATIN CAPITAL LETTER U WITH ACUTE}' : ( 'U', 1),
292 u'\N{LATIN CAPITAL LETTER U WITH CIRCUMFLEX}' : ( 'U', 1),
293 u'\N{LATIN CAPITAL LETTER U WITH DIAERESIS}' : ( 'Ue', 2),
294 u'\N{LATIN CAPITAL LETTER U WITH GRAVE}' : ( 'U', 1),
295 u'\N{LATIN CAPITAL LETTER Y WITH ACUTE}' : ( 'Y', 1),
296 u'\N{LATIN SMALL LETTER A WITH ACUTE}' : ( 'a', 1),
297 u'\N{LATIN SMALL LETTER A WITH CIRCUMFLEX}' : ( 'a', 1),
298 u'\N{LATIN SMALL LETTER A WITH DIAERESIS}' : ( 'ae', 2),
299 u'\N{LATIN SMALL LETTER A WITH GRAVE}' : ( 'a', 1),
300 u'\N{LATIN SMALL LETTER A WITH RING ABOVE}' : ( 'a', 1),
301 u'\N{LATIN SMALL LETTER A WITH TILDE}' : ( 'a', 1),
302 u'\N{LATIN SMALL LETTER AE}' : ( 'ae', 3),
303 u'\N{LATIN SMALL LETTER C WITH CEDILLA}' : ( 'c', 1),
304 u'\N{LATIN SMALL LETTER E WITH ACUTE}' : ( 'e', 1),
305 u'\N{LATIN SMALL LETTER E WITH CIRCUMFLEX}' : ( 'e', 1),
306 u'\N{LATIN SMALL LETTER E WITH DIAERESIS}' : ( 'e', 1),
307 u'\N{LATIN SMALL LETTER E WITH GRAVE}' : ( 'e', 1),
308 u'\N{LATIN SMALL LETTER ETH}' : ( 'th', 1),
309 u'\N{LATIN SMALL LETTER I WITH ACUTE}' : ( 'i', 1),
310 u'\N{LATIN SMALL LETTER I WITH CIRCUMFLEX}' : ( 'i', 1),
311 u'\N{LATIN SMALL LETTER I WITH DIAERESIS}' : ( 'i', 1),
312 u'\N{LATIN SMALL LETTER I WITH GRAVE}' : ( 'i', 1),
313 u'\N{LATIN SMALL LETTER N WITH TILDE}' : ( 'n', 1),
314 u'\N{LATIN SMALL LETTER O WITH ACUTE}' : ( 'o', 1),
315 u'\N{LATIN SMALL LETTER O WITH CIRCUMFLEX}' : ( 'o', 1),
316 u'\N{LATIN SMALL LETTER O WITH DIAERESIS}' : ( 'oe', 2),
317 u'\N{LATIN SMALL LETTER O WITH GRAVE}' : ( 'o', 1),
318 u'\N{LATIN SMALL LETTER O WITH STROKE}' : ( 'o', 1),
319 u'\N{LATIN SMALL LETTER O WITH TILDE}' : ( 'o', 1),
320 u'\N{LATIN SMALL LETTER SHARP S}' : ( 'ss', 2),
321 u'\N{LATIN SMALL LETTER THORN}' : ( 'th', 0),
322 u'\N{LATIN SMALL LETTER U WITH ACUTE}' : ( 'u', 1),
323 u'\N{LATIN SMALL LETTER U WITH CIRCUMFLEX}' : ( 'u', 1),
324 u'\N{LATIN SMALL LETTER U WITH DIAERESIS}' : ( 'ue', 2),
325 u'\N{LATIN SMALL LETTER U WITH GRAVE}' : ( 'u', 1),
326 u'\N{LATIN SMALL LETTER Y WITH ACUTE}' : ( 'y', 1),
327 u'\N{LATIN SMALL LETTER Y WITH DIAERESIS}' : ( 'y', 1),
328 u'\N{LEFT-POINTING DOUBLE ANGLE QUOTATION MARK}' : ( '"', 0),
329 u'\N{MACRON}' : ( '', 0),
330 u'\N{MASCULINE ORDINAL INDICATOR}' : ( '', 0),
331 u'\N{MICRO SIGN}' : ( 'micro', 0),
332 u'\N{MIDDLE DOT}' : ( '*', 0),
333 u'\N{MULTIPLICATION SIGN}' : ( '*', 0),
334 u'\N{NOT SIGN}' : ( 'not', 0),
335 u'\N{PILCROW SIGN}' : ( '', 0),
336 u'\N{PLUS-MINUS SIGN}' : ( '+/-', 0),
337 u'\N{POUND SIGN}' : ( ' pound', 0),
338 u'\N{REGISTERED SIGN}' : ( '(R)', 0),
339 u'\N{RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK}' : ( '"', 0),
340 u'\N{SECTION SIGN}' : ( '', 0),
341 u'\N{SOFT HYPHEN}' : ( '-', 0),
342 u'\N{SUPERSCRIPT ONE}' : ( '1', 0),
343 u'\N{SUPERSCRIPT THREE}' : ( '3', 0),
344 u'\N{SUPERSCRIPT TWO}' : ( '2', 0),
345 u'\N{VULGAR FRACTION ONE HALF}' : ( '{1/2}', 0),
346 u'\N{VULGAR FRACTION ONE QUARTER}' : ( '{1/4}', 0),
347 u'\N{VULGAR FRACTION THREE QUARTERS}' : ( '{3/4}', 0),
348 u'\N{YEN SIGN}' : ('yen', 0)
349 }
350 try:
351 unistring = unicode(inputstring, 'ascii')
352 return inputstring
353 except UnicodeDecodeError:
354 pass
355 if isinstance(inputstring, unicode):
356 unistring = inputstring
357 else:
358
359 encodings = ['utf8', 'latin_1', 'cp037', 'cp437' , 'cp850', 'cp852',
360 'cp863', 'cp865', 'cp1140', 'cp1250', 'cp1252',
361 'iso8859_15', 'mac_latin2', 'utf_16']
362 found_encoding = 'ascii'
363 alphabet = u"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ :!,"
364 max_score = 0.0
365 for encoding in encodings:
366
367
368
369 try:
370 unistring = unicode(inputstring, encoding)
371 successcount = 0
372 totalcount = 0
373 for character in unistring:
374 totalcount += 1
375 if xlate.has_key(character):
376
377
378 weight = xlate[character][1]
379 successcount += weight
380 if character in alphabet:
381
382
383 successcount += 2
384 score = float(successcount) / float(totalcount)
385 if score > max_score:
386
387 found_encoding = encoding
388 max_score = score
389 except UnicodeDecodeError:
390
391 continue
392 unistring = unicode(inputstring, found_encoding, 'replace')
393 result = ''
394 for character in unistring:
395 if xlate.has_key(character):
396 result += xlate[character][0]
397 elif ord(character) >= 0x80:
398 pass
399 else:
400 result += str(character)
401 return result
402
404 """ Put displaystring through the 'less' pager """
405 (temp_fd, tempname) = tempfile.mkstemp(".mail")
406 temp_fh = os.fdopen(temp_fd, "w")
407 temp_fh.write(displaystring)
408 temp_fh.close()
409 os.system("%s %s" % (pager, tempname))
410 os.unlink(tempname)
411
413 """ Extract the message ids from the "References" and "In-Reply-To"
414 Headers.
415 """
416 id_pattern = re.compile('<\S+@\S+>')
417 result = set()
418 references = header['References']
419 if references is not None:
420 for id in id_pattern.findall(references):
421 result.add(id)
422 reply_to = header['In-Reply-To']
423 if reply_to is not None:
424 for id in id_pattern.findall(reply_to):
425 result.add(id)
426 return list(result)
427