r15294 - meta/scripts - combine HTML escaping with URL-detection-and-linkification, plus unit/doc-tests!
Joshua Sled
jsled at cvs.gnucash.org
Mon Jan 1 13:05:27 EST 2007
Author: jsled
Date: 2007-01-01 13:05:26 -0500 (Mon, 01 Jan 2007)
New Revision: 15294
Trac: http://svn.gnucash.org/trac/changeset/15294
Modified:
meta/scripts/irc_log_htmlizer.py
Log:
combine HTML escaping with URL-detection-and-linkification, plus unit/doc-tests!
Modified: meta/scripts/irc_log_htmlizer.py
===================================================================
--- meta/scripts/irc_log_htmlizer.py 2007-01-01 17:00:58 UTC (rev 15293)
+++ meta/scripts/irc_log_htmlizer.py 2007-01-01 18:05:26 UTC (rev 15294)
@@ -16,12 +16,15 @@
$ rm ~/public/logs/2007-01-*
$ egrep "^2007-01-..T" | ~/scripts/irc_log_htmlizer.py
+ at todo Supybot emits "[...]T11:53:16 <jsled> -= THIS MESSAGE NOT LOGGED =-" lines; remove these.
+
@author jsled
'''
import cgi
import os
import os.path
+import re
import sys
import textwrap
@@ -30,13 +33,64 @@
def log_parse(line):
- '''Parses supybot.plugin.ChannelLogger-formatted lines.'''
+ '''
+ Parses supybot.plugin.ChannelLogger-formatted lines.
+
+ >>> log_parse('2007-01-01T12:34:56 Testing, yo. Testing.')
+ ('2007-01-01', '12:34:56', 'Testing, yo. Testing.')
+
+ '''
datetime,rest = line.split(' ', 1)
date,time = datetime.split('T', 1)
rest = rest.strip()
return date,time,rest
+# Minimize the first group to work left-to-right...
+link_re = re.compile(r'''(^.*?)((https?|ftp|telnet|irc|mailto|file):([-a-zA-Z0-9$_.+!*'(),;/?:@&=]|%[0-9a-fA-F][0-9a-fA-F])+)(.*$)''')
+# ' # emacs/font-lock balance
+def escape_and_linkify(s):
+ '''
+ Adds <a href>s around links in a string, and html-escapes the rest.
+
+ >>> escape_and_linkify('foobar')
+ 'foobar'
+
+ >>> escape_and_linkify('Check out http://www.gnucash.org/testing')
+ 'Check out <a href="http://www.gnucash.org/testing">http://www.gnucash.org/testing</a>'
+
+ >>> escape_and_linkify('The real #gnucash is at <irc://irc.gnome.org/gnucash>.')
+ 'The real #gnucash is at <<a href="irc://irc.gnome.org/gnucash">irc://irc.gnome.org/gnucash</a>>.'
+
+ >>> escape_and_linkify('Send mailto:gnucash-devel at gnucash.org?Subject=Foobar&Body=Testing')
+ 'Send <a href="mailto:gnucash-devel at gnucash.org?Subject=Foobar&Body=Testing">mailto:gnucash-devel at gnucash.org?Subject=Foobar&Body=Testing</a>'
+
+ >>> escape_and_linkify('<http://www.gnucash.org/?test=%2F> and <file:///usr/bin/gnucash>.')
+ '<<a href="http://www.gnucash.org/?test=%2F">http://www.gnucash.org/?test=%2F</a>> and <<a href="file:///usr/bin/gnucash">file:///usr/bin/gnucash</a>>.'
+
+ '''
+ # ' # emacs/font-lock balance.
+ # From http://www.ietf.org/rfc/rfc1738.txt:
+ # genericurl = scheme ":" schemepart
+ # [...]
+ # schemepart = *xchar | ip-schemepart
+ # safe = "$" | "-" | "_" | "." | "+"
+ # extra = "!" | "*" | "'" | "(" | ")" | ","
+ # reserved = ";" | "/" | "?" | ":" | "@" | "&" | "="
+ # unreserved = alpha | digit | safe | extra
+ # uchar = unreserved | escape
+ # xchar = unreserved | reserved | escape
+ # digits = 1*digit
+ global link_re
+ match = link_re.match(s)
+ if not match:
+ return cgi.escape(s)
+ return '''%(escaped)s<a href="%(url)s">%(url)s</a>%(rest)s''' \
+ % {'escaped': cgi.escape(match.group(1)),
+ 'url': match.group(2),
+ 'rest': escape_and_linkify(match.group(5))}
+
+
class HtmlLogFile (object):
'''
Keeps an open file handle and the current point of insertion, just before
@@ -81,7 +135,7 @@
def write(self, date, time, stmt):
# @fixme detect, link-ify URLs in lines
html_line = '''<a id="T%(time)s" href="#T%(time)s">%(time)s</a> %(escaped_text)s<br />\n''' \
- % {'time': time, 'escaped_text': cgi.escape(stmt)}
+ % {'time': time, 'escaped_text': escape_and_linkify(stmt)}
self._f.seek(self._pos)
self._f.write(html_line)
self._pos = self._f.tell()
@@ -131,10 +185,18 @@
except Exception, e:
print "closing, error: %s" % (str(e))
files.close_all()
-
+
+def unittest():
+ import doctest
+ doctest.testmod()
+
+
if __name__ == '__main__':
if len(sys.argv) > 1:
- # handle args
- raise Exception('unhandled command-line args.')
+ if sys.argv[1] == '--test':
+ unittest()
+ else:
+ raise Exception('unhandled command-line args.')
+ sys.exit(1)
main(sys.stdin)
More information about the gnucash-changes
mailing list