r15287 - meta - Add scripts dir, irc log htmlizer.

Joshua Sled jsled at cvs.gnucash.org
Mon Jan 1 11:01:48 EST 2007


Author: jsled
Date: 2007-01-01 11:01:47 -0500 (Mon, 01 Jan 2007)
New Revision: 15287
Trac: http://svn.gnucash.org/trac/changeset/15287

Added:
   meta/scripts/
   meta/scripts/irc_log_htmlizer.py
Log:
Add scripts dir, irc log htmlizer.

Added: meta/scripts/irc_log_htmlizer.py
===================================================================
--- meta/scripts/irc_log_htmlizer.py	2006-12-30 20:00:44 UTC (rev 15286)
+++ meta/scripts/irc_log_htmlizer.py	2007-01-01 16:01:47 UTC (rev 15287)
@@ -0,0 +1,136 @@
+#!/usr/bin/python
+
+'''
+HTML-izes IRC logs as output by the Supybot ChannelLogger plugin.
+
+Takes -- on stdin -- lines preceeded with basic ISO 8601-format timestamps
+(YYYY-MM-DDTHH:MM:SS) and writes into files of the form `YYYY-MM-DD.html` in
+cfg['output.dir'].
+
+Run something like:
+
+    $ nohup tail -0 -f #gnucash.log | ~/scripts/irc_log_htmlizer.py &
+
+Or, to catch up on a period of time:
+
+    $ rm ~/public/logs/2007-01-*
+    $ egrep "^2007-01-..T" | ~/scripts/irc_log_htmlizer.py
+
+ at author jsled
+'''
+
+import cgi
+import os
+import os.path
+import sys
+import textwrap
+
+
+cfg = { 'output.dir': '/home/supybot/public/logs', }
+
+
+def log_parse(line):
+    '''Parses supybot.plugin.ChannelLogger-formatted lines.'''
+    datetime,rest = line.split('  ', 1)
+    date,time = datetime.split('T', 1)
+    return date,time,rest
+
+
+class HtmlLogFile (object):
+    '''
+    Keeps an open file handle and the current point of insertion, just before
+    the closing </body></html> tags; on `write`, overwrites those tags with
+    the new line, and re-inserts the closing tags before flushing the file.
+    '''
+
+    html_header = textwrap.dedent('''\
+    <html>
+      <head>
+        <title>Logs for %(date)s</title>
+        <style type="text/css">
+        BODY { background-color: white; color: black; }
+        H1 A { color: black; text-decoration: none; }
+        </style>
+      </head>
+    <body>
+    <h1>%(date)s <a href="/">GnuCash</a> IRC logs</h1>
+    ''')
+
+    html_footer = textwrap.dedent('''\
+    </body>
+    </html>
+    ''')
+    
+    def __init__(self, date):
+        filename = os.path.join(cfg['output.dir'], '%s.html' % (date))
+        if not os.path.exists(filename):
+            # create
+            self._f = open(filename, 'w')
+            self._f.write(HtmlLogFile.html_header % {'date': date})
+            self._f.write(HtmlLogFile.html_footer)
+            self._f.close()
+        self._f = open(filename, 'r+')
+        # seek to "end", record position
+        contents = self._f.read(-1)
+        pos = contents.rfind('</body>')
+        if pos == -1:
+            raise Exception('inappropriate html file [%s]' % (filename))
+        self._pos = pos
+
+    def write(self, date, time, stmt):
+        # @fixme detect, link-ify URLs in lines
+        html_line = '''<a name="T%(time)s">%(time)s</a> %(escaped_text)s<br />\n''' \
+                    % {'time': time, 'escaped_text': cgi.escape(stmt)}
+        self._f.seek(self._pos)
+        self._f.write(html_line)
+        self._pos = self._f.tell()
+        self._f.write(HtmlLogFile.html_footer)
+        self._f.flush()
+
+    def close(self):
+        self._f.close()
+
+
+class HtmlFiles (object):
+    '''
+    A dictionary-like object that maps 'YYYY-MM-DD' strings to HtmlLogFile
+    objects; it closes any open when returning a new value, to keep things
+    tidy.
+    '''
+    
+    def __init__(self):
+        self._files = {}
+    
+    def __getitem__(self, date):
+        if not self._files.has_key(date):
+            log_file = HtmlLogFile(date)
+            self._files[date] = log_file
+            # clean up any other open files
+            self.close_all_except(date)
+        return self._files[date]
+
+    def close_all(self):
+        self.close_all_except(None)
+
+    def close_all_except(self, except_key):
+        for key in [key for key in self._files.keys if key != except_key]:
+            self._files[key].close()
+            del self._files[key]
+
+
+def main(in_file):
+    files = HtmlFiles()
+    try:
+        for line in in_file:
+            date,time,stmt = log_parse(line)
+            files[date].write(date, time, stmt)
+    except Exception, e:
+        print "closing, error: %s" % (str(e))
+        files.close_all()
+        
+
+if __name__ == '__main__':
+    if len(sys.argv) > 1:
+        # handle args
+        raise Exception('unhandled command-line args.')
+    main(sys.argv)



More information about the gnucash-changes mailing list