Message-Id: <9207240518.AA06296@pixel.convex.com> To: www-talk@nxoc01.cern.ch Subject: html2mml.l -- FrameMaker support for HTML Date: Fri, 24 Jul 92 00:18:34 CDT From: Dan Connolly <connolly@pixel.convex.com> --cut-here Here's a lisp program I cooked up to convert HTML files to Frame's Maker Markup Language. It uses a real SGML parser and a lisp interpreter, so you'll have to build those if you don't have them handy. I haven't tested it extensively, and I don't have a down translator (MIF to HTML) yet... but I thought I'd pass it along. Oh... it assumes the HTML file conforms to the DTD I sent out a while ago (I'd give you a WWW HREF/URL, but the server's down right now and I can't find it.) Dan --cut-here ;;; html2mml.l -- translate HyperText Markup Language to Maker Markup Language. ;;; ;;; USE ;;; sgmls file.html | xlisp html2mml.l >file.mml ;;; ;;; Where xlisp is Tom Almy's improved release of David Betz's XLISP 2.1, ;;; available in export.lcs.mit.edu:/contrib/winterp/xlisp/xlisp-2.1.almy.tar.Z ;;; and sgmls is built from ;;; ifi.uio.no:/pub/SGML/SGMLS/sgmls-0.8.tar ;;; aka ;;; ftp.uu.net:/pub/text-processing/sgml/sgmls-0.8.tar.Z ;;; ;;; The resulting file will have the OS Banner from XLisp at the ;;; top. For some reason, XLisp writes everything to stdout. ;;; I patched it to write diagnostic output to stderr. I'll have ;;; to get the patches incorporated soon. ;;; ;;; Anyway, just edit the banner out so the first line of the file is ;;; <MML ...> ;;; ;;; Then import the mml file to FrameMaker. ;;; (setq *tracenable* t) (setq *breakenable* t) (princ "<MML \"from html2mml.l by connolly@convex.com\">\n") (setq *para-tags* '(title h1 h2 h3 h4 h5 body ol ul dl menu dir xmp listing)) (setq *literal-tags* '(xmp listing)) (setq *style-sheet* " <!DefinePar Title <Alignment r> <plain> <pts 18> <bold> > <!DefinePar H1 <Alignment c> <plain><pts 18> <SpaceBefore 12pt><SpaceAfter 12pt> > <!DefinePar H2 <LeftIndent 0in><FirstIndent 0in> <SpaceBefore 12pt><SpaceAfter 6pt> <Alignment l> <plain><pts 14> > <!DefinePar H3 <plain> <bold> <LeftIndent 0.25in><FirstIndent 0.25in> <SpaceBefore 6pt><SpaceAfter 3pt> <Alignment l><pts 12> > <!DefinePar H4 <Alignment l> > <!DefinePar H5 <Alignment l> > <!DefinePar BODY <LeftIndent 0.75in><FirstIndent 0.75in> <SpaceBefore 3pt><SpaceAfter 3pt><BlockSize 3> <Alignment l><plain><pts 12> > <!DefinePar OL <FirstIndent 1.0in> <LeftIndent 1.5in> <TabStops <TabStop 1.25in>> <SpaceBefore 3pt><SpaceAfter 3pt><BlockSize 3> <AutoNumber Yes> <NumberFormat \"<n+>\t\"> <Alignment l><plain><pts 12> > <!DefinePar UL <SpaceBefore 3pt><SpaceAfter 3pt><BlockSize 3> <FirstIndent 1.0in> <LeftIndent 1.5in> <TabStops <TabStop 1.25in>> <AutoNumber Yes> <NumberFormat \"o\\t\"> <Alignment l><plain><pts 12> > <!DefinePar DL <AutoNumber No> <SpaceBefore 3pt><SpaceAfter 3pt><BlockSize 3> <FirstIndent 0.5in> <LeftIndent 2.5in> <TabStops <TabStop 2.5in>> <Alignment l><plain><pts 12> > <!DefinePar MENU <AutoNumber No> <WithNext yes><WithPrev yes> <Alignment l><plain><pts 12> > <!DefinePar DIR <AutoNumber No> <Alignment l><plain><pts 12> > <!DefinePar XMP <AutoNumber No> <FirstIndent 0in> <LeftIndent 0in> <Alignment l><plain> <Family Courier><pts 9> <SpaceBefore 3pt><SpaceAfter 3pt><BlockSize 3> > <!DefinePar LISTING <AutoNumber No> <FirstIndent 0in> <LeftIndent 0in> <Alignment l><plain> <Family Courier><pts 8> <SpaceBefore 3pt><SpaceAfter 3pt><BlockSize 3> > ") (princ *style-sheet*) ;; From almy2.1 ;; push and pop treat variable v as a stack (defmacro push (v l) `(setf ,l (cons ,v ,l))) (defmacro pop (l) `(prog1 (first ,l) (setf ,l (rest ,l)))) (defun start-para (stream tag) (format stream "<~A>~%" tag) ) (defun end-para (stream) (format stream "~%~%") ) (defun convert-data (literal) (do ((c (read-char) (read-char)) d1 d2 d3 ) ((eq c #\Newline) nil) (cond ((eq c #\\) (cond ((setf d1 (digit-char-p (setf c (read-char)))) (setf d2 (digit-char-p (read-char))) (setf d3 (digit-char-p (read-char))) (princ (int-char (+ d3 (* 8 (+ d2 (* 8 d1)))))) ) ((eq c #\\) (princ "\\\\")) ((eq c #\n) (format t (if literal "<HardReturn>" " "))) ((eq c #\|) ;;nothing ) ((eq c #\s) (princ " ")) ) ) ((member c '(#\< #\>)) (format t "\\~A" c)) ((eq c #\space) (format t (if literal "<HardSpace>" " "))) ((eql c 7) (format t "<tab>")) (t (princ c)) ) ) ) (defun html2mml () (do ((c (read-char) (read-char)) stack tag attrs ) ((null c)) ;; quit at end of file (case c (#\Newline ;; do nothing ) (#\( (let ((gi (read)) ) ;; open tag (push gi stack) (cond ((member gi *para-tags*) (setq tag gi) (start-para t tag) ) ((eq gi 'a) (let ((href (second (assoc 'href attrs))) ) ;; watch out for >'s and 's (format t "<Marker <MType 8> <MText `message www ~A'>><italic>" href) ) ) ) (setq attrs nil) )) (#\) (let ((gi (read)) ) (pop stack) (cond ((member gi *para-tags*) (setq tag nil)) ((eq gi 'a) (format t "<noitalic>")) ((eq gi 'dt) (format t "<tab>")) ((member gi '(p dd li)) (format t "<par>")) ) )) (#\- (unless tag (end-para t) (dolist (gi stack) (when (member gi *para-tags*) (setq tag gi) (return) ) ) (start-para t tag) ) (convert-data (member tag *literal-tags*)) ) (#\& (let ((name (read)) ) ;; name )) (#\? (let ((pi (read-line)) ) ;; processing instruction )) (#\A (let ((name (read)) (token (read)) ) (case token (IMPLIED ;; nothing ) (CDATA (let ((data (read-line)) ) (push (list name data) attrs) )) (TOKEN (let ((tokens (read-line)) ;;@@ read tokens til \n ) ;; tokens )) (NOTATION (let ((name (read)) ) ;; notation )) (ENTITY (let ((name (read)) ) ;; general entity )) (ID (let ((id (read)) ) ;; id )) (IDREF (let ((ids (read-line)) ;; @@ read ids til \n ) ;; id's )) ) ) ) (#\D (read-line) ;; do like A but for external data name ) ) ) ) (html2mml) --cut-here--