#!/usr/bin/python # # This program is for automatically handling multiple mailing lists, # allowing automatic rejection of specified messages # # It was inspired by a Perl script written by Vadim Zeitlin # , whose work is gratefully acknowledged. # # The original version of this script was written by William Brack # who is responsible for any errors or stupid # mistakes. All other rights have been transferred to Daniel Veillard # # import sys import user import string import re import getopt import urllib, urllib2 import libxml2 # global variables VERSION = '0.0' # see how quickly this number grows... msgTbl = {} # dictionary to contain messages to be processed rejectList = [] # used for logging discarded messages postReply = {} # data for a POST reply for discarded messages # names for subfields in msgTbl: From = 0 Subj = 1 Reas = 2 Head = 3 Cont = 4 fldNames = ['From', 'Subject', 'Reason', 'Msg Header', 'Content'] # # possible params set on entry # verbose = False pretend = False quiet = False proxy = '' bounceFile = user.home + '/.bounces' logFile = '' # # the following vars are set from the "bounce" file # lists = {} # dictionary, key list name, contains url and password filterFrom = [] filterSubj = [] filterReas = [] filterHead = [] filterCont = [] # # processMessages uses a parsed html file and extracts the messages # being held for admin approval # def processMessages(doc): global msgTbl # all results are put to this dictionary msgTbl = {} # Clear the message table for this list # Let's use the power of XPath to get what we need. # The page looks like this: # # #
# # message 1 data #
# message 2 data # .... # # So, first let's get a list of the messsages # The file is a little dicy, and has some errors, so we do a minimum # search for the beginning of the messages messages = doc.xpathEval("//body/form/table") # Each message looks like this: #
# # # .... # where the "items" are From, Subject, Reason, Action, Message Headers # and Message Excerpt (there are some others, but these are the ones we # are interested in. # The Action item contains a table which includes a critical field # 'name' (the key which mailman uses to identify the message) # #
(item1 label) # (item1 content) #
Action label # # # # #
(action description) # ... # # where "action" is Defer, Approve, Reject and Discard #
# includes attribute 'name' # ... # # # So, with all that explanation, the routine is actually pretty simple: context = doc.xpathNewContext() # create a new XPath context for msg in messages: # set context node to the current message context.setContextNode(msg) # We need to get the message ID assigned by mailman - my apologies # for the rather complicated expression I use to do this! msgNum = context.xpathEval \ ("tr/td[1][.='Action:']/following-sibling::td/" \ "descendant::input[4]/@name")[0].content msgFrom = context.xpathEval \ ("tr/td[1][.='From:']/following-sibling::td")[0].content msgSubj = context.xpathEval \ ("tr/td[1][.='Subject:']/following-sibling::td")[0].content msgReas = context.xpathEval \ ("tr/td[1][.='Reason:']/following-sibling::td")[0].content msgHdr = context.xpathEval \ ("tr/td[1][.='Message Headers:']/following-sibling::td")[0].content msgCont = context.xpathEval \ ("tr/td[1][.='Message Excerpt:']/following-sibling::td")[0].content # now store the results into a dictionary for later processing # Assure the dictionary index is a string (*not* numeric) value # to avoid later trouble when generating a POST msgNum = "%s" % msgNum msgTbl[msgNum] = [msgFrom] msgTbl[msgNum].append(msgSubj) msgTbl[msgNum].append(msgReas) msgTbl[msgNum].append(msgHdr) msgTbl[msgNum].append(msgCont) def usage(): print ( """ Usage: %s [options] --help Give the usage message showing the program options. --version Show the program version and exit. --verbose Give verbose informational messages. --quiet Be silent, only error messages are given --proxy=url Use the given proxy. If 'no' don't use proxy at all (not even the environment variable http_proxy which is used by default). Format: hostname:port (i.e. no leading 'http://') --bounce=file Filename for reading filter params and autologin requests (defaults to $HOME/.bounces) --log=file Filename to record details of all rejected messages. The supplied name will be used with an added suffix of the listname (as provided in the filter parameter file) --try Do everything except returning the reject commands to mailman """ % sys.argv[0]) def procArgs(): global logFile, proxy, pretend global quiet, verbose, bounceFile try: opts, args = getopt.getopt(sys.argv[1:], 'b:hl:p:qtvV', ['bounce=' , 'help', 'log=', 'proxy=', 'quiet', 'try', 'verbose', 'version']) except getopt.GetoptError: # print help information and exit: usage() sys.exit(2) for o, a in opts: ix = string.find(o, '=') if ix > 0: o = o[:ix] if o in ('-b', '--bounce'): bounceFile = a elif o in ('-h', '--help'): usage() sys.exit() elif o in ('-l', '--log'): logFile = a elif o in ('-p', '--proxy'): proxy = a elif o in ('-q', '--quiet'): quiet = True elif o in ('-t', '--try'): pretend = True elif o in ('-v', '--verbose'): verbose = True elif o in ('-V', '--version'): print '%s: Version %s' % (sys.argv[0], VERSION) if quiet and verbose: print "You asked for both 'verbose' and 'quiet' " \ "- I'm ignoring 'quiet'!" quiet = False def getBounceFile(): global filterFrom, filterSubj, filterReas, filterHead, filterCont, lists try: bf = open(bounceFile, 'r') except: print "Error opening bounce file '%s' - " \ "correct and restart" % bounceFile sys.exit(2) for line in bf.readlines(): line = string.strip(line) if (line == '') or (line[0] == '#'): continue ix = string.find(line, ' ') if ix < 0: print "Invalid line in bounce file: '%s'" % line continue tag1 = line[:ix] part1 = string.strip(line[ix:]) if tag1 == 'list': fields = string.split(part1) lists[fields[0]] = [fields[1]] # list url lists[fields[0]].append(fields[2]) # list password elif tag1 == 'reject': ix = string.find(part1, ' ') if ix < 0: print "Invalid line in bounce file: '%s'" % line continue tag2 = part1[:ix] part2 = string.strip(part1[ix+1:]) expr = re.compile(part2, re.IGNORECASE) if tag2 == 'from': filterFrom.append(expr) elif tag2 == 'subject': filterSubj.append(expr) elif tag2 == 'reason': filterReas.append(expr) elif tag2 == 'header': filterHead.append(expr) elif tag2 == 'content': filterCont.append(expr) else: print "Invalid line '%s'" % line def messageFilter(msg, filterName, ix): global rejectList, postReply discard = False # flag to show message rejected rej = '3' # 'radio button' value to reject a message for filt in filterName: # the elements of filterName are compiled RE's if filt.search(msgTbl[msg][ix]): discard = True # set the return flag rejectList.append([msg, fldNames[ix]]) # save details for logging postReply[msg] = rej# add to data for POST reply break return discard # # The main routine interprets the runtime parameters, then calls the # page processing routine using those params. # def main(): global rejectList, postReply procArgs() getBounceFile() # # The arguments have been processed, and the # control file as well. Now we need to get to # work. # # Our main loop is on "list", each of which is processed sequentially for list in lists.keys(): remaining = 0 if not quiet: print "Processing list '%s'" % list # The url and password have been fetched from the bounce file url = lists[list][0] # separate vars for clarity passwd = lists[list][1] # Set up the basic request req = urllib2.Request(url) if proxy != '': if proxy == 'no': req.set_proxy(None, None) else: req.set_proxy(proxy, 'http') # Now access the web page (POST with password) page = urllib2.urlopen(req, urllib.urlencode({'adminpw': passwd})) content = page.read() try: rcookie = page.info()['Set-Cookie'] cookie = string.split(rcookie, ';')[0] # trim trailing junk req.add_header('Cookie', cookie) except: print "Big trouble - no cookie found on initial request!" sys.exit(2) # We have successfully read in the admin page from mailman. Now # we parse the page and format the message data # Unfortunately..... the mailman page occasionally has some invalid # data (including embedded NULL bytes) which can cause the parsing # to fail. We need to get rid of that junk. While we are working # on this, the mailman page also has an invalid BODY tag # near the beginning which causes an error logging message on # parsing, so I have put in a small, dirty hack to get rid of # it. If mailman ever gets fixed this can be removed. ix = string.find(content,'\0') while ix > 0: content = content[:ix] + content[ix+1:] ix = string.find(content, '\0') ix = string.find(content,' 0: content = content[:ix] + content[ix+6:] try: doc = libxml2.htmlParseDoc(content, None) except: # just for debugging, if any error occurs log the content lg = open('dumplog.err', 'w') lg.write(content) lg.close() print "Error parsing 'content'" raise # Now process parsed data and extract/format the message information processMessages(doc) remaining = len(msgTbl) if not quiet and remaining > 0: # Assure proper english on our logging information message :-) if remaining==1: suf = '' else: suf = 's' print "%s message%s held for approval for list %s" % ( remaining, suf, list) rejectList = [] # info for logging / monitoring postReply = {} # this will become the POST information # We go through all of the messages supplied, trying out each of # the supplied patterns to decide whether or not the message can # be "automatically" rejected. If a match is found, all remaining # processing for that message is bypassed. msgIx = msgTbl.keys() msgIx.sort() for msg in msgIx: if messageFilter(msg, filterFrom, From) : continue; if messageFilter(msg, filterSubj, Subj) : continue; if messageFilter(msg, filterReas, Reas) : continue; if messageFilter(msg, filterHead, Head) : continue; if messageFilter(msg, filterCont, Cont) : continue; # If there were no rejects, we are finished. Otherwise we have # a little more to do remaining = remaining - len(postReply) if (len(postReply) > 0): if verbose: # Just for information, log out which messages are being rejected, and why for rej in rejectList: id, why = rej print "Message %s rejected because of %s" % (id, why) if (logFile != ''): logName = logFile + '.' + list if verbose: print "Writing rejects to file %s" % logName log = open(logName, 'w') for rej in rejectList: id, why = rej log.write('Rejected %s because of %s\n' % (id, why)) log.write('Mailman reason for hold: %s\n' % msgTbl[id][Reas]) log.write('Message Header:\n%s\n' % msgTbl[id][Head]) log.write('Content:\n%s\n' % msgTbl[id][Cont]) log.close() # If any messages are being rejected, the POST page to mailman if not pretend : # Encode our POST data postData = urllib.urlencode(postReply) # Off it goes.... page = urllib2.urlopen(req, postData) if not quiet and remaining > 0: if remaining > 1: plural = "s" else: plural = "" print "Left %d message%s at %s" % (remaining, plural, url) if not quiet: print "End of run" if __name__ == "__main__": main()