Archive
Tags
android (3)
ant (2)
beautifulsoup (1)
debian (1)
decorators (1)
django (9)
dovecot (1)
encryption (1)
fix (4)
gotcha (2)
hobo (1)
htmlparser (1)
imaplib (2)
java (1)
json (2)
kerberos (2)
linux (7)
lxml (5)
markdown (4)
mechanize (6)
multiprocessing (1)
mysql (2)
nagios (2)
new_features (3)
open_source (5)
optparse (2)
parsing (1)
perl (2)
postgres (1)
preseed (1)
pxe (4)
pyqt4 (1)
python (41)
raid (1)
rails (1)
red_hat (1)
reportlab (4)
request_tracker (2)
rt (2)
ruby (1)
scala (1)
screen_scraping (7)
shell_scripting (8)
soap (1)
solaris (3)
sql (2)
sqlalchemy (2)
tips_and_tricks (1)
twitter (2)
ubuntu (1)
vmware (2)
windows (1)
zimbra (2)

This was a nice little learning exercise of my skills, so I'd like to share it. This parses the main dovecot log and the rawlogs for each mailbox to generate a HTML report of which host/ip has done what. The actions are still raw IMAP, but are pretty understandable.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
#!/usr/bin/env python

import cgi
import datetime
import glob
import os
import re
import socket
import sys

HOMEDIRSPATH = '/home'
MAILLOG = '/var/log/mail.log'

# Regex for mail.log
TIMESTAMP_RE = re.compile('.*(\d\d:\d\d:\d\d)')
RIP_RE = re.compile('.*rip=(\d{1,3}.\d{1,3}.\d{1,3}.\d{1,3})')
MB_RE = re.compile('.*user=<(\w+?)>')

# Regex for dovecot.rawlog
RAWLOG_RES = [
    re.compile('\w+? CREATE "', re.I), # Folder Created
    re.compile('\w+? DELETE "', re.I), # Folder Deleted
    re.compile('\w+? RENAME "', re.I), # Folder Moved/Renamed
    re.compile('\w+? APPEND "', re.I), # Mail Added
    re.compile('\w+? UID STORE.*DELETED', re.I), # Mail Deleted
]
RAWLOG_SELECT_RE = re.compile('\w+? SELECT "', re.I) # Folder selected
RAWLOG_COPY_RE = re.compile('\w+? UID COPY', re.I) # Folder Copied/Being Moved
RAWLOG_STRIP_RE = re.compile('\w+? (.*)') # Remove the action id

HTML_HEADER = """<?xml version="1.0" encoding="utf-8"?>
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
    <head>
        <title>%s</title>
        <style type="text/css">
            .mb { background-color: #BDB; margin: 0px 0px 40px 0px; padding: 5px; }
            .ip { background-color: #CEC; margin: 10px; padding: 5px; }
            .log { background-color: #DFD; margin: 10px; padding: 5px; }
            span { font-size: small; }
        </style>
    </head>
    <body>
"""

HTML_FOOTER = """
    </body>
</html>
"""


class parsedc:

    def __init__(self, day=None, maillog=MAILLOG, homedirs=HOMEDIRSPATH):
        if day is None:
            # Set to yesterday by default
            self.day = datetime.date.today() - datetime.timedelta(days=1)
        else:
            self.day = day
        self.maillog = maillog
        self.homedirs = homedirs 
        self.results = {}
        self.current_mb = ''
        self.current_ip = ''

    def feed(self):
        f = open(self.maillog, 'r')
        s = f.read()
        f.close()
        dayts = self.day.strftime('%Y%m%d')
        timed = self.parsetimes(s, self.day) 
        mailboxes = timed.keys()
        mailboxes.sort()
        for mb in mailboxes:
            self.current_mb = mb
            if not mb in self.results:
                self.results[mb] = {}
            os.chdir(os.path.join(self.homedirs, mb, 'dovecot.rawlog'))
            offset = 0
            last = ''
            for rec in timed[mb]:
                time = rec[0]
                self.current_ip = ip = rec[1]
                if not ip in self.results[mb]:
                    self.results[mb][ip] = []
                if time == last:
                    offset += 1
                else:
                    offset = 0
                    last = time
                logs = glob.glob('-'.join([dayts, time, '*.in']))
                try:
                    f = open(logs[offset])
                except IndexError:
                    continue # dovecot may not have made a rawlog
                self.parserawlog(f)
                f.close()

    def parsetimes(self, s, dt):
        monthday = self.day.strftime('%c')[4:10]
        times = {}
        for line in s.split('\n'):
            if line.startswith(monthday):
                time = rip = mb = ''
                m = TIMESTAMP_RE.match(line)
                if m:
                    time = m.group(1).replace(':', '')
                m = RIP_RE.match(line)
                if m:
                    rip = m.group(1)
                m = MB_RE.match(line)
                if m:
                    mb = m.group(1)
                if time and rip and mb:
                    if not mb in times:
                        times[mb] = [] 
                    times[mb].append((time, rip))
        return times

    def parserawlog(self, f):
        lastselect = ''
        for line in f.readlines():
            for p in RAWLOG_RES:
                if p.match(line):
                    self.results[self.current_mb][self.current_ip].append(line)
                    continue
            if RAWLOG_COPY_RE.match(line):
                self.results[self.current_mb][self.current_ip].extend([lastselect,
                    line])
            if RAWLOG_SELECT_RE.match(line):
                lastselect = line

    def print_report(self):
        mailboxes = self.results.keys()
        mailboxes.sort()
        print HTML_HEADER % self.day.isoformat()
        for mb in mailboxes:
            print '<div class="mb"><span>', mb, '</span>'
            ips = self.results[mb].keys()
            ips.sort()
            for ip in ips:
                if self.results[mb][ip]:
                    try:
                        host, aliases, addrs = socket.gethostbyaddr(ip)
                    except socket.herror:
                        host = None
                    print '<div class="ip"><span>'
                    if not host is None:
                        print '(%s)' % host
                    print ip
                    print '</span>'
                    print '<div class="log"><span>'
                    for line in self.results[mb][ip]:
                        m = RAWLOG_STRIP_RE.match(line.strip())
                        print '', '', cgi.escape(m.group(1)), '<br />'
                    print '</span></div>'
                    print '</div>'
            print '</div>'
        print HTML_FOOTER

if __name__ == '__main__':
    pdc = parsedc()
    pdc.feed()
    pdc.print_report()
Posted by Tyler Lesmann on October 24, 2008 at 15:40
Tagged as: dovecot linux parsing python