Last night, I got my first non-spam comment in some time. Harshad Joshi isn't a fan of the re and wanted to know if the code from my eariler post could be rewritten to not include it. Revisiting the code, I realize now that using python's regular expressions was a bit of overkill for this little script. So here it is without re. It also includes more resilience, logging, and getopt has been replaced by optparse.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 | #!/usr/bin/env python import logging import simplejson import sys import time import traceback import twitter import urllib2 from getpass import getpass from optparse import OptionParser from urllib import urlencode MAX_ERRORS = 10 def resilient_apply(func, *args, **kwargs): """ If something goes awry, don't die! Log it! Works just like regular apply. max_errors keyword arg determines when we should quit trying and exit. """ if 'max_errors' in kwargs: max_errors = kwargs['max_errors'] del kwargs['max_errors'] else: max_errors = MAX_ERRORS errors = 0 while errors < max_errors: try: return apply(func, args, kwargs) except KeyboardInterrupt: raise SystemExit(1) except: logging.error("".join( traceback.format_exception(*sys.exc_info()))) errors += 1 logging.error("Maximum errors (%d) exceeded" % max_errors) raise SystemExit(1) def compile_filter(query): good = [] bad = [] words = query.split() for word in words: word = word.lower() if word.startswith('-'): bad.append(word[1:]) else: good.append(word) return (good, bad) def filter_user_by_bio(user, filter, api=None): logging.debug('Looking up %s' % user) api = resilient_apply(twitter.Api) bio = resilient_apply(api.GetUser, user).GetDescription() if bio is None: return False # We only follow those with bios bio = bio.lower() good, bad = filter goodmatches = [] for word in bad: if word in bio: return False for word in good: if word in bio: goodmatches.append(word) if good == goodmatches: return True return False def follow_by_query(username, password, q, rpp=None, lang=None): filter = compile_filter(q) api = resilient_apply(twitter.Api, username=username, password=password) friends = [user.GetScreenName() for user in resilient_apply(api.GetFriends)] goodusers = [] for user in get_users_from_search(q, rpp, lang): if filter_user_by_bio(user, filter, api): goodusers.append(user) newusers = [] for user in goodusers: if not user in friends: logging.debug('Creating friendship %s' % user) resilient_apply(api.CreateFriendship, user) friends.append(user) newusers.append(user) return newusers def get_users_from_search(query, resultnum=None, lang=None): q = [] rpp = 10 q.append(urlencode({'q': query})) if not lang is None: q.append(urlencode({'lang': lang})) if not resultnum is None: rpp = resultnum q.append(urlencode({'rpp': rpp})) response = resilient_apply( urllib2.urlopen, 'http://search.twitter.com/search.json?', '&'.join(q) ) data = simplejson.load(response) for result in data['results']: yield result['from_user'] def main(): parser = OptionParser('usage: %prog [options] search terms') parser.add_option('-u', '--username', dest='username', default=None) parser.add_option('-p', '--password', dest='password', default=None) parser.add_option('-r', '--results', dest='rpp', default=None) parser.add_option('-l', '--lang', dest='lang', default=None) parser.add_option('-f', '--logfile', dest='logfile', default=None) parser.add_option('-v', '--logginglevel', dest='level', default='INFO') options, args = parser.parse_args() if not hasattr(logging, options.level): parser.error("level %s is not acceptable" % options.level) if options.username is None: parser.error("username is required") logging_args = { 'format': '%(asctime)s %(levelname)s %(message)s', 'level': getattr(logging, options.level), } if options.logfile is None: logging_args['stream'] = sys.stdout else: logging_args['filename'] = options.logfile logging.basicConfig(**logging_args) if options.password is None: options.password = getpass() newusers = follow_by_query( options.username, options.password, " ".join(args), options.rpp, options.lang, ) if newusers: logging.info(", ".join(newusers) + ' Added!') if __name__ == '__main__': main() |
