Archive
Tags
android (3)
ant (2)
beautifulsoup (1)
debian (1)
decorators (1)
django (9)
dovecot (1)
encryption (1)
fix (4)
gotcha (2)
hobo (1)
htmlparser (1)
imaplib (2)
java (1)
json (2)
kerberos (2)
linux (7)
lxml (5)
markdown (4)
mechanize (6)
multiprocessing (1)
mysql (2)
nagios (2)
new_features (3)
open_source (5)
optparse (2)
parsing (1)
perl (2)
postgres (1)
preseed (1)
pxe (4)
pyqt4 (1)
python (41)
raid (1)
rails (1)
red_hat (1)
reportlab (4)
request_tracker (2)
rt (2)
ruby (1)
scala (1)
screen_scraping (7)
shell_scripting (8)
soap (1)
solaris (3)
sql (2)
sqlalchemy (2)
tips_and_tricks (1)
twitter (2)
ubuntu (1)
vmware (2)
windows (1)
zimbra (2)

If you don't know about Hobo, it's a framework built on top of rails. It has some really slick features, but is a great example of how not to do documentation.

So I've started a new blog, written using Hobo, to help readers learn to use Hobo. The source for this blog is available in full on Google Code. It is licensed under the BSD License.

Posted by Tyler Lesmann on January 28, 2010 at 21:20
Tagged as: hobo rails

Last night, I got my first non-spam comment in some time. Harshad Joshi isn't a fan of the re and wanted to know if the code from my eariler post could be rewritten to not include it. Revisiting the code, I realize now that using python's regular expressions was a bit of overkill for this little script. So here it is without re. It also includes more resilience, logging, and getopt has been replaced by optparse.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
#!/usr/bin/env python

import logging
import simplejson
import sys
import time
import traceback
import twitter
import urllib2
from getpass import getpass
from optparse import OptionParser
from urllib import urlencode

MAX_ERRORS = 10

def resilient_apply(func, *args, **kwargs):
    """
    If something goes awry, don't die!  Log it!

    Works just like regular apply.  max_errors keyword arg determines when we
    should quit trying and exit.
    """

    if 'max_errors' in kwargs:
        max_errors = kwargs['max_errors']
        del kwargs['max_errors']
    else:
        max_errors = MAX_ERRORS

    errors = 0
    while errors < max_errors:
        try:
            return apply(func, args, kwargs)
        except KeyboardInterrupt:
            raise SystemExit(1)
        except:
            logging.error("".join(
                traceback.format_exception(*sys.exc_info())))
        errors += 1
    logging.error("Maximum errors (%d) exceeded" % max_errors)
    raise SystemExit(1)

def compile_filter(query):
    good = []
    bad = []
    words = query.split()
    for word in words:
        word = word.lower()
        if word.startswith('-'):
            bad.append(word[1:])
        else:
            good.append(word)
    return (good, bad)

def filter_user_by_bio(user, filter, api=None):
    logging.debug('Looking up %s' % user)
    api = resilient_apply(twitter.Api)
    bio = resilient_apply(api.GetUser, user).GetDescription()
    if bio is None:
        return False # We only follow those with bios

    bio = bio.lower()
    good, bad = filter
    goodmatches = []
    for word in bad:
        if word in bio:
            return False
    for word in good:
        if word in bio:
            goodmatches.append(word)
    if good == goodmatches:
        return True
    return False

def follow_by_query(username, password, q, rpp=None, lang=None):
    filter = compile_filter(q)
    api = resilient_apply(twitter.Api, username=username, password=password)
    friends = [user.GetScreenName()
        for user in resilient_apply(api.GetFriends)]

    goodusers = []
    for user in get_users_from_search(q, rpp, lang):
        if filter_user_by_bio(user, filter, api):
            goodusers.append(user)
    newusers = []
    for user in goodusers:
        if not user in friends:
            logging.debug('Creating friendship %s' % user)
            resilient_apply(api.CreateFriendship, user)
            friends.append(user)
            newusers.append(user)
    return newusers

def get_users_from_search(query, resultnum=None, lang=None):
    q = []
    rpp = 10
    q.append(urlencode({'q': query}))
    if not lang is None:
        q.append(urlencode({'lang': lang}))
    if not resultnum is None:
        rpp = resultnum
    q.append(urlencode({'rpp': rpp}))
    response = resilient_apply(
        urllib2.urlopen,
        'http://search.twitter.com/search.json?',
        '&'.join(q)
    )
    data = simplejson.load(response)
    for result in data['results']:
        yield result['from_user']

def main():
    parser = OptionParser('usage: %prog [options] search terms')
    parser.add_option('-u', '--username', dest='username', default=None)
    parser.add_option('-p', '--password', dest='password', default=None)
    parser.add_option('-r', '--results', dest='rpp', default=None)
    parser.add_option('-l', '--lang', dest='lang', default=None)
    parser.add_option('-f', '--logfile', dest='logfile', default=None)
    parser.add_option('-v', '--logginglevel', dest='level', default='INFO')
    options, args = parser.parse_args()

    if not hasattr(logging, options.level):
        parser.error("level %s is not acceptable" % options.level)

    if options.username is None:
        parser.error("username is required")

    logging_args = {
         'format': '%(asctime)s %(levelname)s %(message)s',
         'level': getattr(logging, options.level),
    }
    if options.logfile is None:
        logging_args['stream'] = sys.stdout
    else:
        logging_args['filename'] = options.logfile
    logging.basicConfig(**logging_args)

    if options.password is None:
        options.password = getpass()

    newusers = follow_by_query(
        options.username,
        options.password,
        " ".join(args),
        options.rpp,
        options.lang,
    )
    if newusers:
        logging.info(", ".join(newusers) + ' Added!')

if __name__ == '__main__':
    main()
Posted by Tyler Lesmann on January 6, 2010 at 23:11
Tagged as: json optparse python twitter