Message-Id: <9207160349.AA25229@pixel.convex.com> To: www-talk@nxoc01.cern.ch Subject: perl script to legalize HTML files Date: Wed, 15 Jul 92 22:49:21 CDT From: Dan Connolly <connolly@pixel.convex.com> #!/usr/local/bin/perl # # USE # fix-html.pl <W3-file.html >W3-file.sgml # # SEE ALSO # the html.dtd. # print "<!DOCTYPE HTML SYSTEM>\n"; @html = <>; # read whole file $_ = join('', @html); while(/</){ &out($`); $_ = $'; if(s/^A\s+//i){ &fix_anchor; }elsif(s/^NEXTID\s+(\d+)\s*>//){ &out("<NEXTID N=$1>"); }else{ &out('<'); } } &out($_); sub out{ print $_[0]; } sub fix_anchor{ local($name, $href, $type); # What exactly is the syntax of an SGML attribute value? while(s/^(\w+)\s*=\s*((\"[^\"]*\")|([^\s>]+))\s*//){ local($v) = ($3 || $4); local($a) = $1; $href = $v if $a =~ /^href$/i; $name = $v if $a =~ /^name$/i; $type = $v if $a =~ /^type$/i; } s/[^>]*>//; &out("<A"); &out(" NAME=\"$name\"") if $name ne ''; &out(" TYPE=\"$type\"") if $type ne ''; &out(" HREF=\"$href\"") if $href ne ''; &out(">"); }