- From: Terje Bless <link@tss.no>
- Date: Wed, 09 Aug 2000 12:38:38 +0200
- To: W3C Validator <www-validator@w3.org>
- Message-ID: <399134AE.727D3013@tss.no>
Since I already had the CVS tree checked out, I drummed up a quick patch to do some cleanup. The attached patch kills the last of the major globals -- replacing them with more or less sane lexicals -- as well as a boatload of dead code. The code in question (weblint, make_log_entry, and a couple more) hadn't been in use in ages and didn't look like it would be any time soon. Besides, it's easily available from CVS if/when it's ever needed. I don't think there are any bugs here, but testing has been minimal so... :-) HTH, -link
Index: check =================================================================== RCS file: /sources/public/validator/httpd/cgi-bin/check,v retrieving revision 1.66 diff -c -r1.66 check *** check 2000/06/30 20:33:50 1.66 --- check 2000/08/09 10:29:30 *************** *** 37,48 **** # # Define global variables ! use vars qw($VERSION $DATE $MAINTAINER); # Strings we need. ! use vars qw($frag $pub_ids $element_uri $file_type); # Cfg hashes. # # Paths and file locations - my $logfile = '/var/log/httpd/val-svc'; my $base_path = '/usr/local/validator/'; my $html_path = $base_path . 'htdocs/'; my $elem_db = $html_path . 'config/eref.cfg'; --- 37,47 ---- # # Define global variables ! use vars qw($VERSION $DATE $MAINTAINER $NOTICE); # Strings we need. ! use vars qw($pub_ids $element_uri $file_type); # Cfg hashes. # # Paths and file locations my $base_path = '/usr/local/validator/'; my $html_path = $base_path . 'htdocs/'; my $elem_db = $html_path . 'config/eref.cfg'; *************** *** 65,73 **** # URIs and fragments my $abs_svc_uri = 'http://validator.w3.org/'; my $uri_def_uri = 'http://www.w3.org/Addressing/#terms'; - my $faqloc = 'http://www.cs.duke.edu/~dsb/kgv-faq/'; my $rel_img_uri = '/images/'; - my $faqerrloc = $faqloc . 'errors.html'; my $abs_img_uri = $abs_svc_uri . 'images/'; my $element_ref = 'http://www.htmlhelp.com/reference/html40/'; --- 64,70 ---- *************** *** 77,83 **** $VERSION =~ s/Revision: ([\d\.]+) /$1/; $DATE = q$Date: 2000/06/30 20:33:50 $; $MAINTAINER = 'gerald@w3.org'; ! my $notice = ''; # "<p><strong>Note: This service will be ...</strong>"; # # DOCTYPEs --- 74,80 ---- $VERSION =~ s/Revision: ([\d\.]+) /$1/; $DATE = q$Date: 2000/06/30 20:33:50 $; $MAINTAINER = 'gerald@w3.org'; ! $NOTICE = ''; # "<p><strong>Note: This service will be ...</strong>"; # # DOCTYPEs *************** *** 91,115 **** # HTML fragments my $lt = "\020"; my $gt = "\021"; - my $gifborder = ' border=0'; - my $xhtmlendtag; # - # The option names - my @options = qw(weblint pw outline ss sp noatt); - - # - # Stopgap to shut -w up. It won't actually fix anything, but it'll keep us - # running without warnings until we can fix the problems. - my ($validity, %undef_frag, $effective_charset, $catalog, - @fake_errors, $guessed_doctype, $doctype, $line, $col, $type, $msg, $diff, - $pos, $indent, $gifname, $alttext, $gifhw, $nicegifname, $pedanticflags, - $pedantic_blurb, $level, $prevlevel, $prevdata); - - # # Read configuration files. ! $frag = &read_cfg($frag_db); # FPIs -> plain text version string ! $pub_ids = &read_cfg($fpis_db); # Errors -> fragment identifier $element_uri = &read_cfg($elem_db); # Element -> URI fragment $file_type = &read_cfg($type_db); # Content -> File -type --- 88,97 ---- # HTML fragments my $lt = "\020"; my $gt = "\021"; # # Read configuration files. ! $pub_ids = &read_cfg($fpis_db); # FPIs -> plain text version string $element_uri = &read_cfg($elem_db); # Element -> URI fragment $file_type = &read_cfg($type_db); # Content -> File -type *************** *** 118,124 **** $SIG{TERM} = \&erase_stuff; $SIG{KILL} = \&erase_stuff; $SIG{PIPE} = 'IGNORE'; - # $SIG{CHLD} = \&erase_stuff; # # delete() the, possibly tainted, $PATH. --- 100,105 ---- *************** *** 158,164 **** # # Supercede URI with an uploaded file. if ($q->param('uploaded_file')) { ! $q->param('uri', 'file://' . $q->param('uploaded_file')); } # --- 139,145 ---- # # Supercede URI with an uploaded file. if ($q->param('uploaded_file')) { ! $q->param('uri', 'upload://' . $q->param('uploaded_file')); } # *************** *** 198,209 **** <a href="/">HTML Validation Service</a> Results</h1> EOF ! $header .= $notice; # # Punt if we don't recognize this URI scheme. ! # @@ LWP does a whole bunch more: transparently! ! unless ($q->param('uri') =~ m(^(http|file)://)) { print $header; print <<"EOF"; <p> --- 179,189 ---- <a href="/">HTML Validation Service</a> Results</h1> EOF ! $header .= $NOTICE; # # Punt if we don't recognize this URI scheme. ! unless ($q->param('uri') =~ m(^(http|upload)://)) { print $header; print <<"EOF"; <p> *************** *** 257,262 **** --- 237,243 ---- # # Try to extract or guess the DOCTYPE for HTML and XHTML files. + my($guessed_doctype, $doctype); if ($File->{Type} eq 'html' or $File->{Type} eq 'xhtml') { ($guessed_doctype, $doctype) = &check_for_doctype($File->{Content}); } *************** *** 335,341 **** my $xmlflags = ''; ! my $decl = ''; if ($File->{Type} eq 'xhtml') { $ENV{SGML_CATALOG_FILES} = $sgmlstuff . '/REC-xhtml1-20000126/xhtml.soc'; --- 316,323 ---- my $xmlflags = ''; ! my $catalog = ''; ! my $decl = ''; if ($File->{Type} eq 'xhtml') { $ENV{SGML_CATALOG_FILES} = $sgmlstuff . '/REC-xhtml1-20000126/xhtml.soc'; *************** *** 421,427 **** $version = $pub_ids->{$fpi} || 'unknown'; if ($guessed_doctype == 1) { ! push( @fake_errors, "$sp:<OSFD>0:2:1:E: Missing DOCTYPE declaration at start of document (<a href=\"http://www.htmlhelp.org/tools/validator/doctype.html\">explanation...</a>)\n" ); } print ' ' x 4, q(<li>Character encoding: ), $File->{Charset}; --- 403,409 ---- $version = $pub_ids->{$fpi} || 'unknown'; if ($guessed_doctype == 1) { ! unshift @errors, "$sp:<OSFD>0:2:1:E: Missing DOCTYPE declaration at start of document (<a href=\"http://www.htmlhelp.org/tools/validator/doctype.html\">explanation...</a>)\n"; } print ' ' x 4, q(<li>Character encoding: ), $File->{Charset}; *************** *** 441,458 **** if (($File->{Type} eq 'xml') || ($File->{Type} eq 'xhtml')) { print ' ' x 4, qq(<li>Document type: ), $version; ! if ( ( $type eq "html" ) && ! ( $root_namespace ne "http://www.w3.org/1999/xhtml" ) ) { ! print "<br>warning: unknown namespace for text/html document!"; ! if ( $root_namespace ne '' ) { ! print qq{, <a href="$root_namespace">$root_namespace</a>}; ! } ! print "\n"; ! } ! else { ! if ( $root_namespace ne '' ) { ! print qq( with namespace <a href="$root_namespace">$root_namespace</a>); ! } } if ( $#other_namespaces >= 0 ) { --- 423,439 ---- if (($File->{Type} eq 'xml') || ($File->{Type} eq 'xhtml')) { print ' ' x 4, qq(<li>Document type: ), $version; ! if ($File->{Type} eq 'html' and ! $root_namespace ne "http://www.w3.org/1999/xhtml") { ! print "<br>warning: unknown namespace for text/html document!"; ! if ($root_namespace ne '') { ! print qq{, <a href="$root_namespace">$root_namespace</a>}; ! } ! print "\n"; ! } else { ! if ($root_namespace ne '') { ! print qq( with namespace <a href="$root_namespace">$root_namespace</a>); ! } } if ( $#other_namespaces >= 0 ) { *************** *** 497,508 **** if ( $? || ($guessed_doctype == 1) ) { print "<ul>\n"; ! for ((@fake_errors,@errors)) { next if /^<OSFD>0:[0-9]+:[0-9]+:[^A-Z]/; next if / numbers exceeding 65535 not supported$/; next if /:W: SGML declaration was not implied$/ && ($File->{Type} =~ /^x(ht)?ml$/); s/^$sp:<OSFD>//g; if ( ! (($line, $col, $type, $msg)=(/^[^:]*:([0-9]+):([0-9]+):([A-Z]?):? (.*)/))) { print "Uh oh! I got the following unknown error:\n\n $_\n\n"; print "Please make sure you specified the DOCTYPE properly!\n\n"; --- 478,490 ---- if ( $? || ($guessed_doctype == 1) ) { print "<ul>\n"; ! for (@errors) { next if /^<OSFD>0:[0-9]+:[0-9]+:[^A-Z]/; next if / numbers exceeding 65535 not supported$/; next if /:W: SGML declaration was not implied$/ && ($File->{Type} =~ /^x(ht)?ml$/); s/^$sp:<OSFD>//g; + my($line, $col, $type, $msg); if ( ! (($line, $col, $type, $msg)=(/^[^:]*:([0-9]+):([0-9]+):([A-Z]?):? (.*)/))) { print "Uh oh! I got the following unknown error:\n\n $_\n\n"; print "Please make sure you specified the DOCTYPE properly!\n\n"; *************** *** 543,549 **** elsif ( $col > 70 ) { # keep rightmost 70 chars; adjust $col accordingly # (truncate left side only) ! $diff = $col - 50; $newline = "... " . substr( $newline, $diff, 70 ); if ( length( $newline ) == (70 + 4) ) { $newline .= " ..."; --- 525,531 ---- elsif ( $col > 70 ) { # keep rightmost 70 chars; adjust $col accordingly # (truncate left side only) ! my $diff = $col - 50; $newline = "... " . substr( $newline, $diff, 70 ); if ( length( $newline ) == (70 + 4) ) { $newline .= " ..."; *************** *** 574,580 **** # figure out the index into the %frag associative array for the # "explanation..." links to the KGV FAQ. my $msgindex = $msg; ! $msgindex =~ s/"[^"]+"/FOO/g; $msgindex =~ s/[^A-Za-z ]//; $newline =~ s/&/&/go; $newline =~ s/</</go; --- 556,562 ---- # figure out the index into the %frag associative array for the # "explanation..." links to the KGV FAQ. my $msgindex = $msg; ! $msgindex =~ s/"[^\"]+"/FOO/g; $msgindex =~ s/[^A-Za-z ]//; $newline =~ s/&/&/go; $newline =~ s/</</go; *************** *** 593,612 **** print " " x 4 if $col != $orig_col; # only for truncated lines print "<span class=markup>^</span></pre>\n"; print "<p>\n"; - - print qq{<span class=error>Error: $msg</span>}; - - if ( defined $frag->{$msgindex} ) { - # temporarily commented out due to broken links - # print qq{ (<a - # href="$faqerrloc#$frag->{$msgindex}">explanation...</a>)}; - } - else { # remember msgindexes without frags, to get the KGV FAQ updated. - $undef_frag{$msgindex} = 1; - } - - print "</p>\n"; } print "</ul>\n"; print "<hr>\n"; --- 575,582 ---- print " " x 4 if $col != $orig_col; # only for truncated lines print "<span class=markup>^</span></pre>\n"; print "<p>\n"; + print qq{<span class=error>Error: $msg</span></p>\n}; } print "</ul>\n"; print "<hr>\n"; *************** *** 617,632 **** print "\n <p>\n Sorry, this document does not validate as $version.\n </p>\n\n"; &output_css_validator_blurb( $q->param('uri') ); } ! $validity="invalid"; ! } ! else { if (($File->{Type} eq 'xml') || ($File->{Type} eq 'xhtml')) { print "\n <pre>\n No errors found! "; print "<a href=\"#sp-lim\">*</a></pre>\n\n"; ! } ! else { print "\n <pre>\n No errors found!</pre>\n\n"; } if ( $version ne "unknown" ) { if ( $version =~ /^HTML 2\.0$/ ) { $gifname = "vh20"; --- 587,602 ---- print "\n <p>\n Sorry, this document does not validate as $version.\n </p>\n\n"; &output_css_validator_blurb( $q->param('uri') ); } ! } else { if (($File->{Type} eq 'xml') || ($File->{Type} eq 'xhtml')) { print "\n <pre>\n No errors found! "; print "<a href=\"#sp-lim\">*</a></pre>\n\n"; ! } else { print "\n <pre>\n No errors found!</pre>\n\n"; } + + my $gifborder = ' border=0'; # The default. + my($gifname, $alttext, $gifhw, $nicegifname, $xhtmlendtag); if ( $version ne "unknown" ) { if ( $version =~ /^HTML 2\.0$/ ) { $gifname = "vh20"; *************** *** 739,745 **** </p> EOHD } - $validity="valid"; if (($File->{Type} eq 'xml') || ($File->{Type} eq 'xhtml')) { print qq{ <h2><a name="sp-lim">Caveat</a></h2> <p> --- 709,714 ---- *************** *** 751,813 **** } } - if ( $q->param('weblint') ) { - if ( $q->param('pw') ) { - $pedanticflags = '-pedantic -e mailto-link'; - $pedantic_blurb = ' (in "pedantic" mode)'; - } - else { - $pedanticflags = ''; - } - - print <<"EOF"; - <hr> - <h2><a name="weblint">Weblint Results</a></h2> - - <p> - Below are the results of running <a - href="http://www.weblint.org/">Weblint</a> - on this document$pedantic_blurb: - </p> - - <p> - <strong>Note</strong>: - Weblint is a useful HTML syntax and style checker, but does - not do true HTML validation. - Also, the version of weblint used by this service has not - been updated for some time, so some of the messages below may - be misleading or inaccurate. - </p> - EOF - - open WEBLINT, "|$weblint -s $pedanticflags - 2>&1 >$temp.weblint" - or die "open($weblint) returned: $!\n"; - for (@{$File->{Content}}) {print WEBLINT $_, "\n"}; - close WEBLINT; - - print "\n\n"; - if ( $? ) { - print " <ul>\n"; - - open( WEBLINTOUT, "$temp.weblint" ) - || die "couldn't open weblint results in $temp: $!"; - - while (<WEBLINTOUT>) { - s/ \(use "-x <extension>" to allow this\)\.$/./go; - s/&/&/go; - s/</</go; - s/>/>/go; - print " <li>$_"; - } - - close( WEBLINTOUT ) || die "couldn't close weblint results: $!"; - print " </ul>\n"; - } - else { - print "\n <blockquote>\n Looks good to me!\n </blockquote>\n"; - } - print "\n\n"; - } if ($q->param('outline')) { print <<'EOF'; --- 720,725 ---- *************** *** 889,895 **** $gd =~ s/&/&/go; $gd =~ s/</</go; printf "%4d: %s", 0, $gd; } ! $line = 1; for (@{$File->{Content}}) { s/&/&/go; s/</</go; printf "<a name=\"line-%s\">%4d</a>: %s\n", $line, $line, $_; --- 801,807 ---- $gd =~ s/&/&/go; $gd =~ s/</</go; printf "%4d: %s", 0, $gd; } ! my $line = 1; for (@{$File->{Content}}) { s/&/&/go; s/</</go; printf "<a name=\"line-%s\">%4d</a>: %s\n", $line, $line, $_; *************** *** 932,937 **** --- 844,850 ---- $line =~ s/\\n/ /g; $line =~ s/\\011/ /g; + $line =~ s/\\012/ /g; $line =~ s/\s+/ /g; next if $line =~ /^-\s*$/; *************** *** 1037,1069 **** } sub erase_stuff { ! ! unlink $temp or warn "unlink($temp) returned: $!\n"; ! unlink "$temp.esis" or warn "unlink($temp.esis) returned: $!\n"; ! unlink "$temp.weblint"; ! ! } ! ! sub make_log_entry { ! ! my $msgindex; ! ! open(LOG,">>$logfile") || die "couldn't append to log: $!"; ! print LOG "$ENV{REMOTE_HOST}\t$validity $version\t", $q->param('uri'), "\n"; ! foreach $msgindex (keys %undef_frag) { ! print LOG "frag not defined for msgindex: $msgindex\n"; ! } ! close( LOG ) || die "couldn't close log: $!"; ! } sub clean_up_and_exit { ! ! &output_closing; ! &erase_stuff; ! # &make_log_entry; ! exit; ! } sub redirect_to_home_page { --- 950,964 ---- } sub erase_stuff { ! unlink $temp or warn "unlink($temp) returned: $!\n"; ! unlink "$temp.esis" or warn "unlink($temp.esis) returned: $!\n"; ! unlink "$temp.weblint"; } sub clean_up_and_exit { ! &output_closing; ! &erase_stuff; ! exit; } sub redirect_to_home_page { *************** *** 1087,1103 **** $count++ if $q->param('ss'); $count++ if $q->param('sp'); - $count++ if $q->param('weblint'); $count++ if $q->param('outline'); if ( $count ) { $text .= " <p>\n Jump to: "; - if ( $q->param('weblint') ) { - $text .= "<a\n href=\"#weblint\">Weblint Results</a>"; - $count--; - $text .= " or " if ( $count == 1 ); - $text .= ", " if ( $count > 1 ); - } if ( $q->param('outline') ) { $text .= "<a\n href=\"#outline\">Outline</a>"; $count--; --- 982,991 ---- *************** *** 1137,1143 **** # does an HTML element precede the doctype on the same line? if ( $line =~ /<[a-z].*<!doctype/i ) { ! if ( $line =~ /<[a-z]+ xmlns=['"]([^ '"]*)/i ) {# look for an xmlns attr return 2, $1; } last; --- 1025,1031 ---- # does an HTML element precede the doctype on the same line? if ( $line =~ /<[a-z].*<!doctype/i ) { ! if ( $line =~ /<[a-z]+ xmlns=[\'\"]([^ \'\"]*)/i ) {# look for an xmlns attr return 2, $1; } last; *************** *** 1154,1160 **** # Strip comments, so the next line doesn't find commented-out markup etc. # (this doesn't handle multi-line comments, unfortunately) if ( $line =~ /<[a-z]/i ) { # found an element ! if ( $line =~ /<[a-z]+ xmlns=['"]([^ '"]*)/i ) {# look for an xmlns attr return 2, $1; } last; --- 1042,1048 ---- # Strip comments, so the next line doesn't find commented-out markup etc. # (this doesn't handle multi-line comments, unfortunately) if ( $line =~ /<[a-z]/i ) { # found an element ! if ( $line =~ /<[a-z]+ xmlns=[\'\"]([^ \'\"]*)/i ) {# look for an xmlns attr return 2, $1; } last; *************** *** 1356,1362 **** my($ct, @param) = split /\s*;\s*/, lc $Content_Type; if (exists $file_type->{$ct}) { ! $type = $file_type->{$ct}; } foreach my $param (@param) { --- 1244,1250 ---- my($ct, @param) = split /\s*;\s*/, lc $Content_Type; if (exists $file_type->{$ct}) { ! $ct = $file_type->{$ct}; } foreach my $param (@param) { *************** *** 1368,1374 **** } } ! return $type, $charset; } # --- 1256,1262 ---- } } ! return $ct, $charset; } #
Received on Wednesday, 9 August 2000 06:38:35 UTC