summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorbwarsaw1999-11-24 19:37:53 +0000
committerbwarsaw1999-11-24 19:37:53 +0000
commit94ac9f8ed9bb490b4e86a8562dbf98ff61067c9c (patch)
treeab21a9bb8d2b03cdaf565db05a254613070cdf8b
parent2aac6c761efcaa26912ecd2ae18b21ba027bddf5 (diff)
downloadmailman-94ac9f8ed9bb490b4e86a8562dbf98ff61067c9c.tar.gz
mailman-94ac9f8ed9bb490b4e86a8562dbf98ff61067c9c.tar.zst
mailman-94ac9f8ed9bb490b4e86a8562dbf98ff61067c9c.zip
A serious rewrite to make it more robust, and to add a somewhat useful
command line interface. This latter allows you to manually poll a specific list's newsgroup for a range of message numbers, sending them to the list. Using this interface does /not/ update the watermark file. We also don't update the watermark entry for a specific list until that list's child process (which does the actually gating of the messages from Usenet to mailing list) exits with a zero status. If it exits with non-zero, it is taken as a complete failure of the gating process and the watermark isn't updated. This could cause duplicates, so a really anal approach would be to create a pipe between child and parent and write the new watermark after each message is successfully gated.
-rwxr-xr-xcron/gate_news285
1 files changed, 200 insertions, 85 deletions
diff --git a/cron/gate_news b/cron/gate_news
index bb8e3c79a..b259c954e 100755
--- a/cron/gate_news
+++ b/cron/gate_news
@@ -17,8 +17,29 @@
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
"""Poll the NNTP servers for messages to be gatewayed to mailing lists.
+
+Usage is either
+
+ gate_news
+
+or
+
+ gate_news listname first last
+
+In the former case, the watermark file is used to find the first message to
+post, and all messages up to the last available on the newsgroup are gated.
+The watermark file is then updated for the next run. This is the way the
+script should be run from cron.
+
+In the latter case, only the specified mailing list's newsgroup is polled, and
+only the message from first to last (inclusive) are gated to the mailing
+list. In this case, the watermark file is /not/ updated.
+
+Either form can be run from the command line.
+
"""
+import sys
import os
import string
import time
@@ -26,11 +47,14 @@ import marshal
import paths
import nntplib
import errno
+import traceback
from Mailman import MailList
from Mailman import mm_cfg
from Mailman import Utils
from Mailman import LockFile
+from Mailman import Message
+from Mailman.Logging.Utils import LogStdErr
# Work around known problems with some RedHat cron daemons
@@ -41,21 +65,46 @@ signal.signal(signal.SIGCHLD, signal.SIG_DFL)
WATERMARK_FILE = os.path.join(mm_cfg.DATA_DIR, 'gate_watermarks')
LIST_LOCK_FILE = os.path.join(mm_cfg.LOCK_DIR, 'gate_lock.')
+LogStdErr('fromusenet', 'gate_news')
+
def main():
- names = Utils.list_names()
- try:
- fp = open(WATERMARK_FILE)
- watermarks = marshal.load(fp)
- fp.close()
- except IOError, (code, msg):
- if code <> errno.ENOENT:
- Utils.reraise()
- watermarks = {}
- # marshal or open could raise other exceptions, namely EOFError,
- # ValueError or TypeError. TBD: should we zap the watermarks file if that
- # happens?
+ # check command line options
+ names = None
+ updatewatermarks = 1
+ if len(sys.argv) == 1:
+ names = Utils.list_names()
+ first = -1
+ last = -1
+ elif len(sys.argv) == 4:
+ names = [sys.argv[1]]
+ try:
+ first = int(sys.argv[2])
+ last = int(sys.argv[3])
+ except ValueError:
+ names = None
+ updatewatermarks = 0
+ if not names:
+ print __doc__
+ sys.exit(1)
+ # try to open the watermarks file
+ if updatewatermarks:
+ try:
+ fp = open(WATERMARK_FILE)
+ watermarks = marshal.load(fp)
+ fp.close()
+ # TBD: marshal or open could raise other exceptions, namely EOFError,
+ # ValueError or TypeError. Should we zap the watermarks file if that
+ # happens?
+ except IOError, (code, msg):
+ if code <> errno.ENOENT:
+ raise
+ watermarks = {}
+ # We need to reap the child processes, which will tell us the last article
+ # number actually gated
+ children = {}
+ # cruise through lists
for name in names:
# check to see if the list is gating news to mail. If not, skip the
# list. If so, then we have to poll the newsgroup and gate any
@@ -63,71 +112,108 @@ def main():
mlist = MailList.MailList(name, lock=0)
if not mlist.gateway_to_mail:
continue
+ # open up a connection to the gated newsgroup. we want to get the
+ # watermark for the group in the parent process so that we can safely
+ # update the gate_watermarks file. we'll actually do the gating in a
+ # child process
+ conn = nntplib.NNTP(mlist.nntp_host)
+ r,c,f,l,n = conn.group(mlist.linked_newsgroup)
+ if not updatewatermarks:
+ # just post the specified messages and be done with it
+ poll_newsgroup(mlist, conn, first, last+1)
+ return
+ # Otherwise, let's figure out what needs to be done
+ first = int(f)
+ last = int(l)
+ wm = watermarks.get(name, 0)
+ if wm == 0:
+ # This is the first time we've tried to gate this newsgroup. We
+ # essentially do a mass catch-up, otherwise we'd flood the mailing
+ # list. If you want to post all earlier messages, do this
+ # manually using the comand line interface.
+ watermarks[name] = last
+ continue
# try to get a per-list lock because it makes no sense to have more
# than one process gating a newsgroup. if we can't get the lock, just
# ignore the list for now... 5 minutes (usually how cron invokes this)
- # later we'll try again anyway. We don't need to be anal about giving
- # up the lock because we're setting a hung_time out of 4 minutes.
- # This means that if we crashed, the next time the cron job runs,
- # it'll just wax the lock and try again.
+ # later we'll try again anyway.
lock = LockFile.LockFile(LIST_LOCK_FILE + name, lifetime=240)
try:
- lock.lock(timeout=0.001)
+ lock.lock(timeout=0.5)
except LockFile.TimeOutError:
+ # TBD: It is possible that some other process has laid claim to
+ # the gate lock for this list, but that said process has exited
+ # uncleanly. If that's the case, and it leaves it's lock claim on
+ # disk, we will never be able to gate from usenet to the list
+ # again, until the stale lock is removed. For now, we just log
+ # this potentially deadlocked situation, but this should really be
+ # fixed (probably in LockFile.py though).
+ sys.stderr.write('Could not acquire gate_news lock for %s\n' %
+ name)
# someone else is gating this list already
continue
- # open up a connection to the gated newsgroup. we want to get the
- # watermark for the group in the parent process so that we can safely
- # update the gate_watermarks file. we'll actually do the gating in a
- # child process
- conn = nntplib.NNTP(mlist.nntp_host)
- r,c,first,last,n = conn.group(mlist.linked_newsgroup)
- first = int(first)
- last = int(last)
- wm = watermarks.get(name, 0)
- watermarks[name] = last
- if wm <> 0:
- # TBD: Essentially this does a mass catch-up on the newsgroup.
- # The first time this script is run, no messages will be will be
- # forwarded. We *could* have an option to control this, but who
- # wants that? ;-)
- if not os.fork():
- # in the child.
- #
- # steal the lock from the parent because we're going to manage
- # it from here on, and we have a different PID than our
- # parent. we't want to minimize any race conditions where
- # someone else can steal the lock from us. I think there's
- # still a race condition during the time we've actually got
- # the file open for writing and when we're done writing it
- # (during the steal()), but that should be very small.
+ # Fork a child to do the actual gating.
+ #
+ # TBD: There are several issues here, revolving around finding out
+ # from the child exactly which messages were successfully gated.
+ # Let's say the child is going to gate messages 125-175. If we were
+ # really anal, we'd open a pipe and let the child tell us the last
+ # message it successfully gatewayed. Can't use an exit status here
+ # because message numbers can easily be > 255. But managing all those
+ # child pipes means pipes and selects, which is probably overkill.
+ #
+ # Instead what we do is just get the exit status of the child. If the
+ # child completes successfully, we assume it gated all the requested
+ # messages. If it exits with a non-zero status, we assume it gated
+ # none of them. This is probably good enough, although some
+ # duplicates are theoretically possible.
+ pid = os.fork()
+ if pid:
+ # in the parent. record the pid of the child, the child's list
+ # name, and last message number. when the child successfully
+ # exits, we'll update it's watermark
+ children[pid] = (name, last)
+ else:
+ # in the child.
+ #
+ # Steal the list's gateway lock from the parent because we're
+ # going to manage it from here on, and we have a different PID
+ # than our parent. We want to minimize any race conditions where
+ # someone else can steal the lock from us. I think there's still
+ # a race condition during the time we've actually got the file
+ # open for writing and when we're done writing it (during the
+ # steal()), but that should be very small.
+ try:
lock.steal()
- poll_newsgroup(mlist, conn, wm, first, last)
- lock.unlock()
+ poll_newsgroup(mlist, conn, max(wm+1, first), last+1)
+ try:
+ lock.unlock()
+ except LockFile.NotLockedError:
+ # I think it's okay to ignore these specific exceptions
+ pass
os._exit(0)
- # Save the new watermarks after every newsgroup gating has
- # started, so in case of a system crash we reduce the number of
- # multiply gated messages. it might be better to save after every
- # post, but that is harder to coordinate safely between the
- # subprocesses, and would probably be *much* slower
- omask = os.umask(002)
- try:
- fp = open(WATERMARK_FILE + '.tmp', 'w')
- marshal.dump(watermarks, fp)
- fp.close()
- os.rename(WATERMARK_FILE + '.tmp', WATERMARK_FILE)
- finally:
- os.umask(omask)
+ except:
+ # if anything else bad happens, log the exception to stderr.
+ # TBD: we should probably generalize scripts/driver to handle
+ # this situation
+ traceback.print_exc()
+ os._exit(1)
+ # wait on at least one child
+ reap(children, watermarks)
+ # we're done forking off all the gating children, now just wait for them
+ # all to exit, and then we're done
+ while children:
+ reap(children, watermarks)
# XXX: Bogus, but might as we do it `legally'
QuickEscape = 'QuickEscape'
-def poll_newsgroup(mlist, conn, wm, first, last):
+def poll_newsgroup(mlist, conn, first, last):
# NEWNEWS is not portable and has synchronization issues... Use a
# watermark system instead.
- for num in range(max(wm+1, first), last+1):
+ for num in range(first, last):
try:
headers = conn.head(`num`)[3]
found_to = 0
@@ -141,42 +227,71 @@ def poll_newsgroup(mlist, conn, wm, first, last):
if header[i:] == ': %s' % mlist.GetListEmail():
raise QuickEscape
body = conn.body(`num`)[3]
- # Create the pipe to the Mail posting script. Note that it is not
- # installed executable, so we'll tack on the path to Python we
- # discovered when we configured Mailman. The extra argument to
- # `post' informs the system that the message is originating from
- # Usenet and so should not get posted back to Usenet. I think
- # this is mostly redundant with the X-BeenThere header, but I'm a
- # little afraid to muck with that.
- #
- # TBD: This should just be injected directly into the message
- # delivery pipeline.
- cmd = '%s %s %s fromusenet' % (
- mm_cfg.PYTHON,
- os.path.join(mm_cfg.SCRIPTS_DIR, 'post'),
- mlist.internal_name())
- file = os.popen(cmd, 'w')
# Usenet originated messages will not have a Unix envelope
# (i.e. "From " header). This breaks Pipermail archiving, so
# we will synthesize one. Be sure to use the format searched
# for by mailbox.UnixMailbox._isrealfromline()
timehdr = time.asctime(time.localtime(time.time()))
- envhdr = 'From ' + mlist.GetAdminEmail() + ' ' + timehdr
- file.write(envhdr + '\n')
- file.write(string.join(headers,'\n'))
- # If there wasn't already a TO: header, add one.
+ lines = ['From ' + mlist.GetAdminEmail() + ' ' + timehdr]
+ lines.extend(headers)
+ lines.append('')
+ lines.extend(body)
+ lines.append('')
+ msg = Message.OutgoingMessage(string.join(lines, '\n'))
+ msg.fromusenet = 1
if not found_to:
- file.write("\nTo: %s" % mlist.GetListEmail())
- file.write('\n\n')
- file.write(string.join(body,'\n'))
- file.write('\n')
- file.close()
- except nntplib.error_temp:
- pass # Probably canceled, etc...
+ msg['To'] = mlist.GetListEmail()
+ # the list must be locked during posting
+ lockflag = mlist.Locked()
+ try:
+ try:
+ mlist.Lock()
+ except Locked.AlreadyLockedError:
+ pass
+ mlist.Post(msg)
+ finally:
+ mlist.Save()
+ if not lockflag:
+ mlist.Unlock()
+ sys.stderr.write('%s: gated msg id %d\n' %
+ (mlist.internal_name(), num))
+ except nntplib.error_temp, msg:
+ sys.stderr.write('%s: NNTP error: %s\n' %
+ (mlist.internal_name(), msg))
+ pass # Probably canceled, etc...
except QuickEscape:
pass # We gated this TO news, don't repost it!
+def reap(children, watermarks):
+ if not children:
+ return
+ # see if any children have exited yet
+ pid, status = os.waitpid(-1, os.WNOHANG)
+ if pid == 0:
+ # nope, none are ready
+ return
+ name, last = children[pid]
+ del children[pid]
+ if not status:
+ # successful exit
+ watermarks[name] = last
+ # Save the new watermarks after every newsgroup gating has started, so in
+ # case of a system crash we reduce the number of multiply gated messages.
+ # it might be better to save after every post, but that is harder to
+ # coordinate safely between the subprocesses, and would probably be *much*
+ # slower.
+ omask = os.umask(002)
+ try:
+ fp = open(WATERMARK_FILE + '.tmp', 'w')
+ marshal.dump(watermarks, fp)
+ fp.close()
+ os.rename(WATERMARK_FILE + '.tmp', WATERMARK_FILE)
+ finally:
+ os.umask(omask)
+
+
+
if __name__ == '__main__':
main()