summaryrefslogtreecommitdiff
path: root/src/mailman/bin/gate_news.py
diff options
context:
space:
mode:
Diffstat (limited to 'src/mailman/bin/gate_news.py')
-rw-r--r--src/mailman/bin/gate_news.py243
1 files changed, 243 insertions, 0 deletions
diff --git a/src/mailman/bin/gate_news.py b/src/mailman/bin/gate_news.py
new file mode 100644
index 000000000..eac30422d
--- /dev/null
+++ b/src/mailman/bin/gate_news.py
@@ -0,0 +1,243 @@
+# Copyright (C) 1998-2009 by the Free Software Foundation, Inc.
+#
+# This file is part of GNU Mailman.
+#
+# GNU Mailman is free software: you can redistribute it and/or modify it under
+# the terms of the GNU General Public License as published by the Free
+# Software Foundation, either version 3 of the License, or (at your option)
+# any later version.
+#
+# GNU Mailman is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+# more details.
+#
+# You should have received a copy of the GNU General Public License along with
+# GNU Mailman. If not, see <http://www.gnu.org/licenses/>.
+
+import os
+import sys
+import time
+import socket
+import logging
+import nntplib
+import optparse
+import email.Errors
+
+from email.Parser import Parser
+from locknix import lockfile
+
+from mailman import MailList
+from mailman import Message
+from mailman import Utils
+from mailman import loginit
+from mailman.configuration import config
+from mailman.i18n import _
+from mailman.queue import Switchboard
+from mailman.version import MAILMAN_VERSION
+
+# Work around known problems with some RedHat cron daemons
+import signal
+signal.signal(signal.SIGCHLD, signal.SIG_DFL)
+
+NL = '\n'
+
+log = None
+
+class _ContinueLoop(Exception):
+ pass
+
+
+
+def parseargs():
+ parser = optparse.OptionParser(version=MAILMAN_VERSION,
+ usage=_("""\
+%prog [options]
+
+Poll the NNTP servers for messages to be gatewayed to mailing lists."""))
+ parser.add_option('-C', '--config',
+ help=_('Alternative configuration file to use'))
+ opts, args = parser.parse_args()
+ if args:
+ parser.print_help()
+ print >> sys.stderr, _('Unexpected arguments')
+ sys.exit(1)
+ return opts, args, parser
+
+
+
+_hostcache = {}
+
+def open_newsgroup(mlist):
+ # Split host:port if given
+ nntp_host, nntp_port = Utils.nntpsplit(mlist.nntp_host)
+ # Open up a "mode reader" connection to nntp server. This will be shared
+ # for all the gated lists having the same nntp_host.
+ conn = _hostcache.get(mlist.nntp_host)
+ if conn is None:
+ try:
+ conn = nntplib.NNTP(nntp_host, nntp_port,
+ readermode=True,
+ user=config.NNTP_USERNAME,
+ password=config.NNTP_PASSWORD)
+ except (socket.error, nntplib.NNTPError, IOError), e:
+ log.error('error opening connection to nntp_host: %s\n%s',
+ mlist.nntp_host, e)
+ raise
+ _hostcache[mlist.nntp_host] = conn
+ # Get the GROUP information for the list, but we're only really interested
+ # in the first article number and the last article number
+ r, c, f, l, n = conn.group(mlist.linked_newsgroup)
+ return conn, int(f), int(l)
+
+
+def clearcache():
+ for conn in set(_hostcache.values()):
+ conn.quit()
+ _hostcache.clear()
+
+
+
+# This function requires the list to be locked.
+def poll_newsgroup(mlist, conn, first, last, glock):
+ listname = mlist.internal_name()
+ # NEWNEWS is not portable and has synchronization issues.
+ for num in range(first, last):
+ glock.refresh()
+ try:
+ headers = conn.head(repr(num))[3]
+ found_to = False
+ beenthere = False
+ for header in headers:
+ i = header.find(':')
+ value = header[:i].lower()
+ if i > 0 and value == 'to':
+ found_to = True
+ if value <> 'x-beenthere':
+ continue
+ if header[i:] == ': %s' % mlist.posting_address:
+ beenthere = True
+ break
+ if not beenthere:
+ body = conn.body(repr(num))[3]
+ # Usenet originated messages will not have a Unix envelope
+ # (i.e. "From " header). This breaks Pipermail archiving, so
+ # we will synthesize one. Be sure to use the format searched
+ # for by mailbox.UnixMailbox._isrealfromline(). BAW: We use
+ # the -bounces address here in case any downstream clients use
+ # the envelope sender for bounces; I'm not sure about this,
+ # but it's the closest to the old semantics.
+ lines = ['From %s %s' % (mlist.GetBouncesEmail(),
+ time.ctime(time.time()))]
+ lines.extend(headers)
+ lines.append('')
+ lines.extend(body)
+ lines.append('')
+ p = Parser(Message.Message)
+ try:
+ msg = p.parsestr(NL.join(lines))
+ except email.Errors.MessageError, e:
+ log.error('email package exception for %s:%d\n%s',
+ mlist.linked_newsgroup, num, e)
+ raise _ContinueLoop
+ if found_to:
+ del msg['X-Originally-To']
+ msg['X-Originally-To'] = msg['To']
+ del msg['To']
+ msg['To'] = mlist.posting_address
+ # Post the message to the locked list
+ inq = Switchboard(config.INQUEUE_DIR)
+ inq.enqueue(msg,
+ listname=mlist.internal_name(),
+ fromusenet=True)
+ log.info('posted to list %s: %7d', listname, num)
+ except nntplib.NNTPError, e:
+ log.exception('NNTP error for list %s: %7d', listname, num)
+ except _ContinueLoop:
+ continue
+ # Even if we don't post the message because it was seen on the
+ # list already, update the watermark
+ mlist.usenet_watermark = num
+
+
+
+def process_lists(glock):
+ for listname in config.list_manager.names:
+ glock.refresh()
+ # Open the list unlocked just to check to see if it is gating news to
+ # mail. If not, we're done with the list. Otherwise, lock the list
+ # and gate the group.
+ mlist = MailList.MailList(listname, lock=False)
+ if not mlist.gateway_to_mail:
+ continue
+ # Get the list's watermark, i.e. the last article number that we gated
+ # from news to mail. None means that this list has never polled its
+ # newsgroup and that we should do a catch up.
+ watermark = getattr(mlist, 'usenet_watermark', None)
+ # Open the newsgroup, but let most exceptions percolate up.
+ try:
+ conn, first, last = open_newsgroup(mlist)
+ except (socket.error, nntplib.NNTPError):
+ break
+ log.info('%s: [%d..%d]', listname, first, last)
+ try:
+ try:
+ if watermark is None:
+ mlist.Lock(timeout=config.LIST_LOCK_TIMEOUT)
+ # This is the first time we've tried to gate this
+ # newsgroup. We essentially do a mass catch-up, otherwise
+ # we'd flood the mailing list.
+ mlist.usenet_watermark = last
+ log.info('%s caught up to article %d', listname, last)
+ else:
+ # The list has been polled previously, so now we simply
+ # grab all the messages on the newsgroup that have not
+ # been seen by the mailing list. The first such article
+ # is the maximum of the lowest article available in the
+ # newsgroup and the watermark. It's possible that some
+ # articles have been expired since the last time gate_news
+ # has run. Not much we can do about that.
+ start = max(watermark + 1, first)
+ if start > last:
+ log.info('nothing new for list %s', listname)
+ else:
+ mlist.Lock(timeout=config.LIST_LOCK_TIMEOUT)
+ log.info('gating %s articles [%d..%d]',
+ listname, start, last)
+ # Use last+1 because poll_newsgroup() employes a for
+ # loop over range, and this will not include the last
+ # element in the list.
+ poll_newsgroup(mlist, conn, start, last + 1, glock)
+ except lockfile.TimeOutError:
+ log.error('Could not acquire list lock: %s', listname)
+ finally:
+ if mlist.Locked():
+ mlist.Save()
+ mlist.Unlock()
+ log.info('%s watermark: %d', listname, mlist.usenet_watermark)
+
+
+
+def main():
+ opts, args, parser = parseargs()
+ config.load(opts.config)
+
+ GATENEWS_LOCK_FILE = os.path.join(config.LOCK_DIR, 'gate_news.lock')
+ LOCK_LIFETIME = config.hours(2)
+
+ loginit.initialize(propagate=True)
+ log = logging.getLogger('mailman.fromusenet')
+
+ try:
+ with lockfile.Lock(GATENEWS_LOCK_FILE,
+ # It's okay to hijack this
+ lifetime=LOCK_LIFETIME) as lock:
+ process_lists(lock)
+ clearcache()
+ except lockfile.TimeOutError:
+ log.error('Could not acquire gate_news lock')
+
+
+
+if __name__ == '__main__':
+ main()