Archive
January 2010
December 2009
November 2009
October 2009
September 2009
June 2009
April 2009
March 2009
February 2009
January 2009
December 2008
November 2008
October 2008
July 2008
June 2008
October 2007
September 2007
December 2009
November 2009
October 2009
September 2009
June 2009
April 2009
March 2009
February 2009
January 2009
December 2008
November 2008
October 2008
July 2008
June 2008
October 2007
September 2007
Tags
android
(3)
ant
(2)
beautifulsoup
(1)
debian
(1)
decorators
(1)
django
(9)
dovecot
(1)
encryption
(1)
fix
(4)
gotcha
(2)
hobo
(1)
htmlparser
(1)
imaplib
(2)
java
(1)
json
(2)
kerberos
(2)
linux
(7)
lxml
(5)
markdown
(4)
mechanize
(6)
multiprocessing
(1)
mysql
(2)
nagios
(2)
new_features
(3)
open_source
(5)
optparse
(2)
parsing
(1)
perl
(2)
postgres
(1)
preseed
(1)
pxe
(4)
pyqt4
(1)
python
(41)
raid
(1)
rails
(1)
red_hat
(1)
reportlab
(4)
request_tracker
(2)
rt
(2)
ruby
(1)
scala
(1)
screen_scraping
(7)
shell_scripting
(8)
soap
(1)
solaris
(3)
sql
(2)
sqlalchemy
(2)
tips_and_tricks
(1)
twitter
(2)
ubuntu
(1)
vmware
(2)
windows
(1)
zimbra
(2)
Entries tagged as parsing
This was a nice little learning exercise of my skills, so I'd like to share it. This parses the main dovecot log and the rawlogs for each mailbox to generate a HTML report of which host/ip has done what. The actions are still raw IMAP, but are pretty understandable.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 | #!/usr/bin/env python import cgi import datetime import glob import os import re import socket import sys HOMEDIRSPATH = '/home' MAILLOG = '/var/log/mail.log' # Regex for mail.log TIMESTAMP_RE = re.compile('.*(\d\d:\d\d:\d\d)') RIP_RE = re.compile('.*rip=(\d{1,3}.\d{1,3}.\d{1,3}.\d{1,3})') MB_RE = re.compile('.*user=<(\w+?)>') # Regex for dovecot.rawlog RAWLOG_RES = [ re.compile('\w+? CREATE "', re.I), # Folder Created re.compile('\w+? DELETE "', re.I), # Folder Deleted re.compile('\w+? RENAME "', re.I), # Folder Moved/Renamed re.compile('\w+? APPEND "', re.I), # Mail Added re.compile('\w+? UID STORE.*DELETED', re.I), # Mail Deleted ] RAWLOG_SELECT_RE = re.compile('\w+? SELECT "', re.I) # Folder selected RAWLOG_COPY_RE = re.compile('\w+? UID COPY', re.I) # Folder Copied/Being Moved RAWLOG_STRIP_RE = re.compile('\w+? (.*)') # Remove the action id HTML_HEADER = """<?xml version="1.0" encoding="utf-8"?> <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> <head> <title>%s</title> <style type="text/css"> .mb { background-color: #BDB; margin: 0px 0px 40px 0px; padding: 5px; } .ip { background-color: #CEC; margin: 10px; padding: 5px; } .log { background-color: #DFD; margin: 10px; padding: 5px; } span { font-size: small; } </style> </head> <body> """ HTML_FOOTER = """ </body> </html> """ class parsedc: def __init__(self, day=None, maillog=MAILLOG, homedirs=HOMEDIRSPATH): if day is None: # Set to yesterday by default self.day = datetime.date.today() - datetime.timedelta(days=1) else: self.day = day self.maillog = maillog self.homedirs = homedirs self.results = {} self.current_mb = '' self.current_ip = '' def feed(self): f = open(self.maillog, 'r') s = f.read() f.close() dayts = self.day.strftime('%Y%m%d') timed = self.parsetimes(s, self.day) mailboxes = timed.keys() mailboxes.sort() for mb in mailboxes: self.current_mb = mb if not mb in self.results: self.results[mb] = {} os.chdir(os.path.join(self.homedirs, mb, 'dovecot.rawlog')) offset = 0 last = '' for rec in timed[mb]: time = rec[0] self.current_ip = ip = rec[1] if not ip in self.results[mb]: self.results[mb][ip] = [] if time == last: offset += 1 else: offset = 0 last = time logs = glob.glob('-'.join([dayts, time, '*.in'])) try: f = open(logs[offset]) except IndexError: continue # dovecot may not have made a rawlog self.parserawlog(f) f.close() def parsetimes(self, s, dt): monthday = self.day.strftime('%c')[4:10] times = {} for line in s.split('\n'): if line.startswith(monthday): time = rip = mb = '' m = TIMESTAMP_RE.match(line) if m: time = m.group(1).replace(':', '') m = RIP_RE.match(line) if m: rip = m.group(1) m = MB_RE.match(line) if m: mb = m.group(1) if time and rip and mb: if not mb in times: times[mb] = [] times[mb].append((time, rip)) return times def parserawlog(self, f): lastselect = '' for line in f.readlines(): for p in RAWLOG_RES: if p.match(line): self.results[self.current_mb][self.current_ip].append(line) continue if RAWLOG_COPY_RE.match(line): self.results[self.current_mb][self.current_ip].extend([lastselect, line]) if RAWLOG_SELECT_RE.match(line): lastselect = line def print_report(self): mailboxes = self.results.keys() mailboxes.sort() print HTML_HEADER % self.day.isoformat() for mb in mailboxes: print '<div class="mb"><span>', mb, '</span>' ips = self.results[mb].keys() ips.sort() for ip in ips: if self.results[mb][ip]: try: host, aliases, addrs = socket.gethostbyaddr(ip) except socket.herror: host = None print '<div class="ip"><span>' if not host is None: print '(%s)' % host print ip print '</span>' print '<div class="log"><span>' for line in self.results[mb][ip]: m = RAWLOG_STRIP_RE.match(line.strip()) print '', '', cgi.escape(m.group(1)), '<br />' print '</span></div>' print '</div>' print '</div>' print HTML_FOOTER if __name__ == '__main__': pdc = parsedc() pdc.feed() pdc.print_report() |
