r15294 - meta/scripts - combine HTML escaping with URL-detection-and-linkification, plus unit/doc-tests!

Joshua Sled jsled at cvs.gnucash.org
Mon Jan 1 13:05:27 EST 2007


Author: jsled
Date: 2007-01-01 13:05:26 -0500 (Mon, 01 Jan 2007)
New Revision: 15294
Trac: http://svn.gnucash.org/trac/changeset/15294

Modified:
   meta/scripts/irc_log_htmlizer.py
Log:
combine HTML escaping with URL-detection-and-linkification, plus unit/doc-tests!


Modified: meta/scripts/irc_log_htmlizer.py
===================================================================
--- meta/scripts/irc_log_htmlizer.py	2007-01-01 17:00:58 UTC (rev 15293)
+++ meta/scripts/irc_log_htmlizer.py	2007-01-01 18:05:26 UTC (rev 15294)
@@ -16,12 +16,15 @@
     $ rm ~/public/logs/2007-01-*
     $ egrep "^2007-01-..T" | ~/scripts/irc_log_htmlizer.py
 
+ at todo Supybot emits "[...]T11:53:16 <jsled> -= THIS MESSAGE NOT LOGGED =-" lines; remove these.
+
 @author jsled
 '''
 
 import cgi
 import os
 import os.path
+import re
 import sys
 import textwrap
 
@@ -30,13 +33,64 @@
 
 
 def log_parse(line):
-    '''Parses supybot.plugin.ChannelLogger-formatted lines.'''
+    '''
+    Parses supybot.plugin.ChannelLogger-formatted lines.
+
+    >>> log_parse('2007-01-01T12:34:56  Testing, yo.  Testing.')
+    ('2007-01-01', '12:34:56', 'Testing, yo.  Testing.')
+
+    '''
     datetime,rest = line.split('  ', 1)
     date,time = datetime.split('T', 1)
     rest = rest.strip()
     return date,time,rest
 
 
+# Minimize the first group to work left-to-right...
+link_re = re.compile(r'''(^.*?)((https?|ftp|telnet|irc|mailto|file):([-a-zA-Z0-9$_.+!*'(),;/?:@&=]|%[0-9a-fA-F][0-9a-fA-F])+)(.*$)''')
+# ' # emacs/font-lock balance
+def escape_and_linkify(s):
+    '''
+    Adds <a href>s around links in a string, and html-escapes the rest.
+
+    >>> escape_and_linkify('foobar')
+    'foobar'
+    
+    >>> escape_and_linkify('Check out http://www.gnucash.org/testing')
+    'Check out <a href="http://www.gnucash.org/testing">http://www.gnucash.org/testing</a>'
+    
+    >>> escape_and_linkify('The real #gnucash is at <irc://irc.gnome.org/gnucash>.')
+    'The real #gnucash is at &lt;<a href="irc://irc.gnome.org/gnucash">irc://irc.gnome.org/gnucash</a>&gt;.'
+
+    >>> escape_and_linkify('Send mailto:gnucash-devel at gnucash.org?Subject=Foobar&Body=Testing')
+    'Send <a href="mailto:gnucash-devel at gnucash.org?Subject=Foobar&Body=Testing">mailto:gnucash-devel at gnucash.org?Subject=Foobar&Body=Testing</a>'
+
+    >>> escape_and_linkify('<http://www.gnucash.org/?test=%2F> and <file:///usr/bin/gnucash>.')
+    '&lt;<a href="http://www.gnucash.org/?test=%2F">http://www.gnucash.org/?test=%2F</a>&gt; and &lt;<a href="file:///usr/bin/gnucash">file:///usr/bin/gnucash</a>&gt;.'
+
+    '''
+    # ' # emacs/font-lock balance.
+    # From http://www.ietf.org/rfc/rfc1738.txt:
+    # genericurl     = scheme ":" schemepart
+    # [...]
+    # schemepart     = *xchar | ip-schemepart
+    # safe           = "$" | "-" | "_" | "." | "+"
+    # extra          = "!" | "*" | "'" | "(" | ")" | ","
+    # reserved       = ";" | "/" | "?" | ":" | "@" | "&" | "="
+    # unreserved     = alpha | digit | safe | extra
+    # uchar          = unreserved | escape
+    # xchar          = unreserved | reserved | escape
+    # digits         = 1*digit
+    global link_re
+    match = link_re.match(s)
+    if not match:
+        return cgi.escape(s)
+    return '''%(escaped)s<a href="%(url)s">%(url)s</a>%(rest)s''' \
+           % {'escaped': cgi.escape(match.group(1)),
+              'url': match.group(2),
+              'rest': escape_and_linkify(match.group(5))}
+
+
 class HtmlLogFile (object):
     '''
     Keeps an open file handle and the current point of insertion, just before
@@ -81,7 +135,7 @@
     def write(self, date, time, stmt):
         # @fixme detect, link-ify URLs in lines
         html_line = '''<a id="T%(time)s" href="#T%(time)s">%(time)s</a> %(escaped_text)s<br />\n''' \
-                    % {'time': time, 'escaped_text': cgi.escape(stmt)}
+                    % {'time': time, 'escaped_text': escape_and_linkify(stmt)}
         self._f.seek(self._pos)
         self._f.write(html_line)
         self._pos = self._f.tell()
@@ -131,10 +185,18 @@
     except Exception, e:
         print "closing, error: %s" % (str(e))
         files.close_all()
-        
 
+
+def unittest():
+    import doctest
+    doctest.testmod()
+
+
 if __name__ == '__main__':
     if len(sys.argv) > 1:
-        # handle args
-        raise Exception('unhandled command-line args.')
+        if sys.argv[1] == '--test':
+            unittest()
+        else:
+            raise Exception('unhandled command-line args.')
+        sys.exit(1)
     main(sys.stdin)



More information about the gnucash-changes mailing list