- From: Terje Bless <link@tss.no>
- Date: Wed, 09 Aug 2000 12:38:38 +0200
- To: W3C Validator <www-validator@w3.org>
- Message-ID: <399134AE.727D3013@tss.no>
Since I already had the CVS tree checked out, I drummed up a quick patch to do some cleanup. The attached patch kills the last of the major globals -- replacing them with more or less sane lexicals -- as well as a boatload of dead code. The code in question (weblint, make_log_entry, and a couple more) hadn't been in use in ages and didn't look like it would be any time soon. Besides, it's easily available from CVS if/when it's ever needed. I don't think there are any bugs here, but testing has been minimal so... :-) HTH, -link
Index: check
===================================================================
RCS file: /sources/public/validator/httpd/cgi-bin/check,v
retrieving revision 1.66
diff -c -r1.66 check
*** check 2000/06/30 20:33:50 1.66
--- check 2000/08/09 10:29:30
***************
*** 37,48 ****
#
# Define global variables
! use vars qw($VERSION $DATE $MAINTAINER); # Strings we need.
! use vars qw($frag $pub_ids $element_uri $file_type); # Cfg hashes.
#
# Paths and file locations
- my $logfile = '/var/log/httpd/val-svc';
my $base_path = '/usr/local/validator/';
my $html_path = $base_path . 'htdocs/';
my $elem_db = $html_path . 'config/eref.cfg';
--- 37,47 ----
#
# Define global variables
! use vars qw($VERSION $DATE $MAINTAINER $NOTICE); # Strings we need.
! use vars qw($pub_ids $element_uri $file_type); # Cfg hashes.
#
# Paths and file locations
my $base_path = '/usr/local/validator/';
my $html_path = $base_path . 'htdocs/';
my $elem_db = $html_path . 'config/eref.cfg';
***************
*** 65,73 ****
# URIs and fragments
my $abs_svc_uri = 'http://validator.w3.org/';
my $uri_def_uri = 'http://www.w3.org/Addressing/#terms';
- my $faqloc = 'http://www.cs.duke.edu/~dsb/kgv-faq/';
my $rel_img_uri = '/images/';
- my $faqerrloc = $faqloc . 'errors.html';
my $abs_img_uri = $abs_svc_uri . 'images/';
my $element_ref = 'http://www.htmlhelp.com/reference/html40/';
--- 64,70 ----
***************
*** 77,83 ****
$VERSION =~ s/Revision: ([\d\.]+) /$1/;
$DATE = q$Date: 2000/06/30 20:33:50 $;
$MAINTAINER = 'gerald@w3.org';
! my $notice = ''; # "<p><strong>Note: This service will be ...</strong>";
#
# DOCTYPEs
--- 74,80 ----
$VERSION =~ s/Revision: ([\d\.]+) /$1/;
$DATE = q$Date: 2000/06/30 20:33:50 $;
$MAINTAINER = 'gerald@w3.org';
! $NOTICE = ''; # "<p><strong>Note: This service will be ...</strong>";
#
# DOCTYPEs
***************
*** 91,115 ****
# HTML fragments
my $lt = "\020";
my $gt = "\021";
- my $gifborder = ' border=0';
- my $xhtmlendtag;
#
- # The option names
- my @options = qw(weblint pw outline ss sp noatt);
-
- #
- # Stopgap to shut -w up. It won't actually fix anything, but it'll keep us
- # running without warnings until we can fix the problems.
- my ($validity, %undef_frag, $effective_charset, $catalog,
- @fake_errors, $guessed_doctype, $doctype, $line, $col, $type, $msg, $diff,
- $pos, $indent, $gifname, $alttext, $gifhw, $nicegifname, $pedanticflags,
- $pedantic_blurb, $level, $prevlevel, $prevdata);
-
- #
# Read configuration files.
! $frag = &read_cfg($frag_db); # FPIs -> plain text version string
! $pub_ids = &read_cfg($fpis_db); # Errors -> fragment identifier
$element_uri = &read_cfg($elem_db); # Element -> URI fragment
$file_type = &read_cfg($type_db); # Content -> File -type
--- 88,97 ----
# HTML fragments
my $lt = "\020";
my $gt = "\021";
#
# Read configuration files.
! $pub_ids = &read_cfg($fpis_db); # FPIs -> plain text version string
$element_uri = &read_cfg($elem_db); # Element -> URI fragment
$file_type = &read_cfg($type_db); # Content -> File -type
***************
*** 118,124 ****
$SIG{TERM} = \&erase_stuff;
$SIG{KILL} = \&erase_stuff;
$SIG{PIPE} = 'IGNORE';
- # $SIG{CHLD} = \&erase_stuff;
#
# delete() the, possibly tainted, $PATH.
--- 100,105 ----
***************
*** 158,164 ****
#
# Supercede URI with an uploaded file.
if ($q->param('uploaded_file')) {
! $q->param('uri', 'file://' . $q->param('uploaded_file'));
}
#
--- 139,145 ----
#
# Supercede URI with an uploaded file.
if ($q->param('uploaded_file')) {
! $q->param('uri', 'upload://' . $q->param('uploaded_file'));
}
#
***************
*** 198,209 ****
<a href="/">HTML Validation Service</a> Results</h1>
EOF
! $header .= $notice;
#
# Punt if we don't recognize this URI scheme.
! # @@ LWP does a whole bunch more: transparently!
! unless ($q->param('uri') =~ m(^(http|file)://)) {
print $header;
print <<"EOF";
<p>
--- 179,189 ----
<a href="/">HTML Validation Service</a> Results</h1>
EOF
! $header .= $NOTICE;
#
# Punt if we don't recognize this URI scheme.
! unless ($q->param('uri') =~ m(^(http|upload)://)) {
print $header;
print <<"EOF";
<p>
***************
*** 257,262 ****
--- 237,243 ----
#
# Try to extract or guess the DOCTYPE for HTML and XHTML files.
+ my($guessed_doctype, $doctype);
if ($File->{Type} eq 'html' or $File->{Type} eq 'xhtml') {
($guessed_doctype, $doctype) = &check_for_doctype($File->{Content});
}
***************
*** 335,341 ****
my $xmlflags = '';
! my $decl = '';
if ($File->{Type} eq 'xhtml') {
$ENV{SGML_CATALOG_FILES} = $sgmlstuff . '/REC-xhtml1-20000126/xhtml.soc';
--- 316,323 ----
my $xmlflags = '';
! my $catalog = '';
! my $decl = '';
if ($File->{Type} eq 'xhtml') {
$ENV{SGML_CATALOG_FILES} = $sgmlstuff . '/REC-xhtml1-20000126/xhtml.soc';
***************
*** 421,427 ****
$version = $pub_ids->{$fpi} || 'unknown';
if ($guessed_doctype == 1) {
! push( @fake_errors, "$sp:<OSFD>0:2:1:E: Missing DOCTYPE declaration at start of document (<a href=\"http://www.htmlhelp.org/tools/validator/doctype.html\">explanation...</a>)\n" );
}
print ' ' x 4, q(<li>Character encoding: ), $File->{Charset};
--- 403,409 ----
$version = $pub_ids->{$fpi} || 'unknown';
if ($guessed_doctype == 1) {
! unshift @errors, "$sp:<OSFD>0:2:1:E: Missing DOCTYPE declaration at start of document (<a href=\"http://www.htmlhelp.org/tools/validator/doctype.html\">explanation...</a>)\n";
}
print ' ' x 4, q(<li>Character encoding: ), $File->{Charset};
***************
*** 441,458 ****
if (($File->{Type} eq 'xml') || ($File->{Type} eq 'xhtml')) {
print ' ' x 4, qq(<li>Document type: ), $version;
! if ( ( $type eq "html" ) &&
! ( $root_namespace ne "http://www.w3.org/1999/xhtml" ) ) {
! print "<br>warning: unknown namespace for text/html document!";
! if ( $root_namespace ne '' ) {
! print qq{, <a href="$root_namespace">$root_namespace</a>};
! }
! print "\n";
! }
! else {
! if ( $root_namespace ne '' ) {
! print qq( with namespace <a href="$root_namespace">$root_namespace</a>);
! }
}
if ( $#other_namespaces >= 0 ) {
--- 423,439 ----
if (($File->{Type} eq 'xml') || ($File->{Type} eq 'xhtml')) {
print ' ' x 4, qq(<li>Document type: ), $version;
! if ($File->{Type} eq 'html' and
! $root_namespace ne "http://www.w3.org/1999/xhtml") {
! print "<br>warning: unknown namespace for text/html document!";
! if ($root_namespace ne '') {
! print qq{, <a href="$root_namespace">$root_namespace</a>};
! }
! print "\n";
! } else {
! if ($root_namespace ne '') {
! print qq( with namespace <a href="$root_namespace">$root_namespace</a>);
! }
}
if ( $#other_namespaces >= 0 ) {
***************
*** 497,508 ****
if ( $? || ($guessed_doctype == 1) ) {
print "<ul>\n";
! for ((@fake_errors,@errors)) {
next if /^<OSFD>0:[0-9]+:[0-9]+:[^A-Z]/;
next if / numbers exceeding 65535 not supported$/;
next if /:W: SGML declaration was not implied$/ &&
($File->{Type} =~ /^x(ht)?ml$/);
s/^$sp:<OSFD>//g;
if ( ! (($line, $col, $type, $msg)=(/^[^:]*:([0-9]+):([0-9]+):([A-Z]?):? (.*)/))) {
print "Uh oh! I got the following unknown error:\n\n $_\n\n";
print "Please make sure you specified the DOCTYPE properly!\n\n";
--- 478,490 ----
if ( $? || ($guessed_doctype == 1) ) {
print "<ul>\n";
! for (@errors) {
next if /^<OSFD>0:[0-9]+:[0-9]+:[^A-Z]/;
next if / numbers exceeding 65535 not supported$/;
next if /:W: SGML declaration was not implied$/ &&
($File->{Type} =~ /^x(ht)?ml$/);
s/^$sp:<OSFD>//g;
+ my($line, $col, $type, $msg);
if ( ! (($line, $col, $type, $msg)=(/^[^:]*:([0-9]+):([0-9]+):([A-Z]?):? (.*)/))) {
print "Uh oh! I got the following unknown error:\n\n $_\n\n";
print "Please make sure you specified the DOCTYPE properly!\n\n";
***************
*** 543,549 ****
elsif ( $col > 70 ) {
# keep rightmost 70 chars; adjust $col accordingly
# (truncate left side only)
! $diff = $col - 50;
$newline = "... " . substr( $newline, $diff, 70 );
if ( length( $newline ) == (70 + 4) ) {
$newline .= " ...";
--- 525,531 ----
elsif ( $col > 70 ) {
# keep rightmost 70 chars; adjust $col accordingly
# (truncate left side only)
! my $diff = $col - 50;
$newline = "... " . substr( $newline, $diff, 70 );
if ( length( $newline ) == (70 + 4) ) {
$newline .= " ...";
***************
*** 574,580 ****
# figure out the index into the %frag associative array for the
# "explanation..." links to the KGV FAQ.
my $msgindex = $msg;
! $msgindex =~ s/"[^"]+"/FOO/g;
$msgindex =~ s/[^A-Za-z ]//;
$newline =~ s/&/&/go; $newline =~ s/</</go;
--- 556,562 ----
# figure out the index into the %frag associative array for the
# "explanation..." links to the KGV FAQ.
my $msgindex = $msg;
! $msgindex =~ s/"[^\"]+"/FOO/g;
$msgindex =~ s/[^A-Za-z ]//;
$newline =~ s/&/&/go; $newline =~ s/</</go;
***************
*** 593,612 ****
print " " x 4 if $col != $orig_col; # only for truncated lines
print "<span class=markup>^</span></pre>\n";
print "<p>\n";
-
- print qq{<span class=error>Error: $msg</span>};
-
- if ( defined $frag->{$msgindex} ) {
- # temporarily commented out due to broken links
- # print qq{ (<a
- # href="$faqerrloc#$frag->{$msgindex}">explanation...</a>)};
- }
- else { # remember msgindexes without frags, to get the KGV FAQ updated.
- $undef_frag{$msgindex} = 1;
- }
-
- print "</p>\n";
}
print "</ul>\n";
print "<hr>\n";
--- 575,582 ----
print " " x 4 if $col != $orig_col; # only for truncated lines
print "<span class=markup>^</span></pre>\n";
print "<p>\n";
+ print qq{<span class=error>Error: $msg</span></p>\n};
}
print "</ul>\n";
print "<hr>\n";
***************
*** 617,632 ****
print "\n <p>\n Sorry, this document does not validate as $version.\n </p>\n\n";
&output_css_validator_blurb( $q->param('uri') );
}
! $validity="invalid";
! }
! else {
if (($File->{Type} eq 'xml') || ($File->{Type} eq 'xhtml')) {
print "\n <pre>\n No errors found! ";
print "<a href=\"#sp-lim\">*</a></pre>\n\n";
! }
! else {
print "\n <pre>\n No errors found!</pre>\n\n";
}
if ( $version ne "unknown" ) {
if ( $version =~ /^HTML 2\.0$/ ) {
$gifname = "vh20";
--- 587,602 ----
print "\n <p>\n Sorry, this document does not validate as $version.\n </p>\n\n";
&output_css_validator_blurb( $q->param('uri') );
}
! } else {
if (($File->{Type} eq 'xml') || ($File->{Type} eq 'xhtml')) {
print "\n <pre>\n No errors found! ";
print "<a href=\"#sp-lim\">*</a></pre>\n\n";
! } else {
print "\n <pre>\n No errors found!</pre>\n\n";
}
+
+ my $gifborder = ' border=0'; # The default.
+ my($gifname, $alttext, $gifhw, $nicegifname, $xhtmlendtag);
if ( $version ne "unknown" ) {
if ( $version =~ /^HTML 2\.0$/ ) {
$gifname = "vh20";
***************
*** 739,745 ****
</p>
EOHD
}
- $validity="valid";
if (($File->{Type} eq 'xml') || ($File->{Type} eq 'xhtml')) {
print qq{ <h2><a name="sp-lim">Caveat</a></h2>
<p>
--- 709,714 ----
***************
*** 751,813 ****
}
}
- if ( $q->param('weblint') ) {
- if ( $q->param('pw') ) {
- $pedanticflags = '-pedantic -e mailto-link';
- $pedantic_blurb = ' (in "pedantic" mode)';
- }
- else {
- $pedanticflags = '';
- }
-
- print <<"EOF";
- <hr>
- <h2><a name="weblint">Weblint Results</a></h2>
-
- <p>
- Below are the results of running <a
- href="http://www.weblint.org/">Weblint</a>
- on this document$pedantic_blurb:
- </p>
-
- <p>
- <strong>Note</strong>:
- Weblint is a useful HTML syntax and style checker, but does
- not do true HTML validation.
- Also, the version of weblint used by this service has not
- been updated for some time, so some of the messages below may
- be misleading or inaccurate.
- </p>
- EOF
-
- open WEBLINT, "|$weblint -s $pedanticflags - 2>&1 >$temp.weblint"
- or die "open($weblint) returned: $!\n";
- for (@{$File->{Content}}) {print WEBLINT $_, "\n"};
- close WEBLINT;
-
- print "\n\n";
- if ( $? ) {
- print " <ul>\n";
-
- open( WEBLINTOUT, "$temp.weblint" )
- || die "couldn't open weblint results in $temp: $!";
-
- while (<WEBLINTOUT>) {
- s/ \(use "-x <extension>" to allow this\)\.$/./go;
- s/&/&/go;
- s/</</go;
- s/>/>/go;
- print " <li>$_";
- }
-
- close( WEBLINTOUT ) || die "couldn't close weblint results: $!";
- print " </ul>\n";
- }
- else {
- print "\n <blockquote>\n Looks good to me!\n </blockquote>\n";
- }
- print "\n\n";
- }
if ($q->param('outline')) {
print <<'EOF';
--- 720,725 ----
***************
*** 889,895 ****
$gd =~ s/&/&/go; $gd =~ s/</</go;
printf "%4d: %s", 0, $gd;
}
! $line = 1;
for (@{$File->{Content}}) {
s/&/&/go; s/</</go;
printf "<a name=\"line-%s\">%4d</a>: %s\n", $line, $line, $_;
--- 801,807 ----
$gd =~ s/&/&/go; $gd =~ s/</</go;
printf "%4d: %s", 0, $gd;
}
! my $line = 1;
for (@{$File->{Content}}) {
s/&/&/go; s/</</go;
printf "<a name=\"line-%s\">%4d</a>: %s\n", $line, $line, $_;
***************
*** 932,937 ****
--- 844,850 ----
$line =~ s/\\n/ /g;
$line =~ s/\\011/ /g;
+ $line =~ s/\\012/ /g;
$line =~ s/\s+/ /g;
next if $line =~ /^-\s*$/;
***************
*** 1037,1069 ****
}
sub erase_stuff {
!
! unlink $temp or warn "unlink($temp) returned: $!\n";
! unlink "$temp.esis" or warn "unlink($temp.esis) returned: $!\n";
! unlink "$temp.weblint";
!
! }
!
! sub make_log_entry {
!
! my $msgindex;
!
! open(LOG,">>$logfile") || die "couldn't append to log: $!";
! print LOG "$ENV{REMOTE_HOST}\t$validity $version\t", $q->param('uri'), "\n";
! foreach $msgindex (keys %undef_frag) {
! print LOG "frag not defined for msgindex: $msgindex\n";
! }
! close( LOG ) || die "couldn't close log: $!";
!
}
sub clean_up_and_exit {
!
! &output_closing;
! &erase_stuff;
! # &make_log_entry;
! exit;
!
}
sub redirect_to_home_page {
--- 950,964 ----
}
sub erase_stuff {
! unlink $temp or warn "unlink($temp) returned: $!\n";
! unlink "$temp.esis" or warn "unlink($temp.esis) returned: $!\n";
! unlink "$temp.weblint";
}
sub clean_up_and_exit {
! &output_closing;
! &erase_stuff;
! exit;
}
sub redirect_to_home_page {
***************
*** 1087,1103 ****
$count++ if $q->param('ss');
$count++ if $q->param('sp');
- $count++ if $q->param('weblint');
$count++ if $q->param('outline');
if ( $count ) {
$text .= " <p>\n Jump to: ";
- if ( $q->param('weblint') ) {
- $text .= "<a\n href=\"#weblint\">Weblint Results</a>";
- $count--;
- $text .= " or " if ( $count == 1 );
- $text .= ", " if ( $count > 1 );
- }
if ( $q->param('outline') ) {
$text .= "<a\n href=\"#outline\">Outline</a>";
$count--;
--- 982,991 ----
***************
*** 1137,1143 ****
# does an HTML element precede the doctype on the same line?
if ( $line =~ /<[a-z].*<!doctype/i ) {
! if ( $line =~ /<[a-z]+ xmlns=['"]([^ '"]*)/i ) {# look for an xmlns attr
return 2, $1;
}
last;
--- 1025,1031 ----
# does an HTML element precede the doctype on the same line?
if ( $line =~ /<[a-z].*<!doctype/i ) {
! if ( $line =~ /<[a-z]+ xmlns=[\'\"]([^ \'\"]*)/i ) {# look for an xmlns attr
return 2, $1;
}
last;
***************
*** 1154,1160 ****
# Strip comments, so the next line doesn't find commented-out markup etc.
# (this doesn't handle multi-line comments, unfortunately)
if ( $line =~ /<[a-z]/i ) { # found an element
! if ( $line =~ /<[a-z]+ xmlns=['"]([^ '"]*)/i ) {# look for an xmlns attr
return 2, $1;
}
last;
--- 1042,1048 ----
# Strip comments, so the next line doesn't find commented-out markup etc.
# (this doesn't handle multi-line comments, unfortunately)
if ( $line =~ /<[a-z]/i ) { # found an element
! if ( $line =~ /<[a-z]+ xmlns=[\'\"]([^ \'\"]*)/i ) {# look for an xmlns attr
return 2, $1;
}
last;
***************
*** 1356,1362 ****
my($ct, @param) = split /\s*;\s*/, lc $Content_Type;
if (exists $file_type->{$ct}) {
! $type = $file_type->{$ct};
}
foreach my $param (@param) {
--- 1244,1250 ----
my($ct, @param) = split /\s*;\s*/, lc $Content_Type;
if (exists $file_type->{$ct}) {
! $ct = $file_type->{$ct};
}
foreach my $param (@param) {
***************
*** 1368,1374 ****
}
}
! return $type, $charset;
}
#
--- 1256,1262 ----
}
}
! return $ct, $charset;
}
#
Received on Wednesday, 9 August 2000 06:38:35 UTC