Tools for the HTML manuals

Christopher Browne cbbrowne@hex.net
Mon, 14 Aug 2000 08:34:32 -0500


On Mon, 14 Aug 2000 11:47:04 -0000, the world broke into rejoicing as
Yannick LE NY <y-le-ny@ifrance.com>  said:
> I want to convert the french HTML files in SGML and Dave said:
> 
> >We need it in SGML, but Christopher Browne has a utility to convert
> >html->docbook. He's going to apply this to the French documentation
> >as well so you have something to start from.
> 
> What is the utility  to convert html->docbook and where find it?

Save the material below as ~/etc/h2d.dsl, and then run it thus:
% jade -t sgml -d ~/etc/h2d.dsl inputfile.html > outputfile.sgml

Suggestions for improvements are welcome.

The _big_ problem is that all it does is to essentially translate tags.
It does _nothing_ to help out with the internal links.  For instance,
<a href="xacc-currency.html"> currencies </a> should transform
not to <ulink url="xacc-currency.html"> currencies </ulink>, which
is the "literal" translation, but rather the "currencies" section
should have ID="xacc-currency", and this link should become <link
linkend="xacc-currency"> currencies </link>, which is a link that will
be managed by the DocBook tools.

------------ Set Phasers to CUT HERE ---------------
<!doctype style-sheet PUBLIC "-//James Clark//DTD DSSSL Style Sheet//EN">

(define debug
  (external-procedure "UNREGISTERED::James Clark//Procedure::debug"))

(declare-flow-object-class element
  "UNREGISTERED::James Clark//Flow Object Class::element")
(declare-flow-object-class empty-element
  "UNREGISTERED::James Clark//Flow Object Class::empty-element")
(declare-flow-object-class document-type
  "UNREGISTERED::James Clark//Flow Object Class::document-type")
(declare-flow-object-class processing-instruction
  "UNREGISTERED::James Clark//Flow Object Class::processing-instruction")
(declare-flow-object-class formatting-instruction
  "UNREGISTERED::James Clark//Flow Object Class::formatting-instruction")
(declare-characteristic preserve-sdata?
  "UNREGISTERED::James Clark//Characteristic::preserve-sdata?"
  #t)

(define (copy-attributes #!optional (nd (current-node)))
  (let loop ((atts (named-node-list-names (attributes nd))))
    (if (null? atts)
        '()
        (let* ((name (car atts))
               (value (attribute-string name nd)))
          (if value
              (cons (list name value)
                    (loop (cdr atts)))
              (loop (cdr atts)))))))

(default (if (node-property 'momitend (current-node))
		(make empty-element attributes: (copy-attributes))
		(make element attributes: (copy-attributes))))

(define (write-string str)
  (make formatting-instruction
        data: str))

(element HTML
    (make sequence
	(make document-type 
		name: "ARTICLE" 
		public-id: "-//Davenport//DTD DocBook V3.0//EN")
	(process-children)))

(element article (make element))

(element title (make element))

(element head
    (make element gi: "Artheader"))

(element BODY
        (make element gi: "Para"))

(element h1
   (sosofo-append
    (write-string "<") (write-string "Sect1") (write-string ">")
    (make element gi: "Title" )))

(element h2
   (sosofo-append
    (write-string "<") (write-string "Sect2") (write-string ">")
    (make element gi: "Title" )))

(element h3
   (sosofo-append
    (write-string "<") (write-string "Sect3") (write-string ">")
    (make element gi: "Title" )))

(element h4
   (sosofo-append
    (write-string "<") (write-string "Sect4") (write-string ">")
    (make element gi: "Title" )))

(element h5
   (sosofo-append
    (write-string "<") (write-string "Sect5") (write-string ">")
    (make element gi: "Title" )))

(element heading
    (make element gi: "Title"))

(element p
    (make element gi: "Para"))

(element tt
    (make element gi: "Literal"
	attributes: `(("remap" "tt")))) ;; fixme

(element tscreen (process-children)) ; FIXME

(element ul
    (make element gi: "ItemizedList"))

(element li
   (make element gi: "ListItem" 
	(make element gi: "Para")))

(element URL
    (make element gi: "ULink"
	  attributes: `(("URL" ,(attribute-string "URL")))
	  (if (attribute-string "NAME")
		(literal (attribute-string "NAME"))
		(literal (attribute-string "URL")))))

(element IMG
   (make element gi: "Inlinegraphic"
        attributes: `(("Fileref" ,(attribute-string "SRC")) (copy-attributes))))

(element A
    (if
        (attribute-string "HREF")
        (make element gi: "Ulink"
               attributes: `(("URL" ,(attribute-string "HREF"))(copy-attributes)))
        (make element gi: "Anchor"
               attributes: `(("ID" ,(attribute-string "NAME"))(copy-attributes)))))

(element label 
   (make empty-element gi: "Anchor"
	attributes: (copy-attributes)))

(element ol
    (make element gi: "OrderedList"))

(element em
    (make element gi: "Emphasis"))

(element bf
    (make element gi: "Literal"
		  attributes: `(("remap" "bf"))))

(element pre
    (make element gi: "ProgramListing"))

(element quotep (process-children))


(element dl
   (make element gi: "GlossList"
	(process-matching-children "DT")))

(define (get-sibs)
    (let loop ( (rest (follow (current-node)))
		(accum (empty-sosofo)))
	(let ( (tag (gi (node-list-first rest))))
	    (if (or (not tag)
		    (string=? tag "DT"))
		 accum
		(loop (node-list-rest rest)
		    (sosofo-append accum 
			(process-node-list 
			    (node-list-first rest))))))))

(element DT
   (make element gi: "GlossEntry"
        (make element gi: "GlossTerm")
        (make element gi: "GlossDef" 
	     (get-sibs))))

(element BR
    (make element gi: "Emphasis"))
--
cbbrowne@ntlug.org - <http://www.hex.net/~cbbrowne/lsf.html>
"Without  insects,  our ecosystem  would  collapse  and  we would  all
die.  In  that respect,  insects  are  far  more important  than  mere
end-users."  -- Eugene O'Neil <eugene@cs.umb.edu>