[PATCH] Misc. namespace/code cleanup

Since I already had the CVS tree checked out, I drummed up a quick patch
to do some cleanup. The attached patch kills the last of the major
globals -- replacing them with more or less sane lexicals -- as well as
a boatload of dead code. The code in question (weblint, make_log_entry,
and a couple more) hadn't been in use in ages and didn't look like it
would be any time soon. Besides, it's easily available from CVS if/when
it's ever needed.

I don't think there are any bugs here, but testing has been minimal
so... :-)

HTH, -link
Index: check
===================================================================
RCS file: /sources/public/validator/httpd/cgi-bin/check,v
retrieving revision 1.66
diff -c -r1.66 check
*** check	2000/06/30 20:33:50	1.66
--- check	2000/08/09 10:29:30
***************
*** 37,48 ****
  
  #
  # Define global variables
! use vars qw($VERSION $DATE $MAINTAINER);             # Strings we need.
! use vars qw($frag $pub_ids $element_uri $file_type); # Cfg hashes.
  
  #
  # Paths and file locations
- my $logfile   = '/var/log/httpd/val-svc';
  my $base_path = '/usr/local/validator/';
  my $html_path = $base_path . 'htdocs/';
  my $elem_db   = $html_path . 'config/eref.cfg';
--- 37,47 ----
  
  #
  # Define global variables
! use vars qw($VERSION $DATE $MAINTAINER $NOTICE); # Strings we need.
! use vars qw($pub_ids $element_uri $file_type);   # Cfg hashes.
  
  #
  # Paths and file locations
  my $base_path = '/usr/local/validator/';
  my $html_path = $base_path . 'htdocs/';
  my $elem_db   = $html_path . 'config/eref.cfg';
***************
*** 65,73 ****
  # URIs and fragments
  my $abs_svc_uri	= 'http://validator.w3.org/';
  my $uri_def_uri	= 'http://www.w3.org/Addressing/#terms';
- my $faqloc	= 'http://www.cs.duke.edu/~dsb/kgv-faq/';
  my $rel_img_uri	= '/images/';
- my $faqerrloc	= $faqloc . 'errors.html';
  my $abs_img_uri	= $abs_svc_uri . 'images/';
  my $element_ref = 'http://www.htmlhelp.com/reference/html40/';
  
--- 64,70 ----
***************
*** 77,83 ****
  $VERSION    =~ s/Revision: ([\d\.]+) /$1/;
  $DATE       =  q$Date: 2000/06/30 20:33:50 $;
  $MAINTAINER =  'gerald@w3.org';
! my $notice  =  ''; # "<p><strong>Note: This service will be ...</strong>";
  
  #
  # DOCTYPEs
--- 74,80 ----
  $VERSION    =~ s/Revision: ([\d\.]+) /$1/;
  $DATE       =  q$Date: 2000/06/30 20:33:50 $;
  $MAINTAINER =  'gerald@w3.org';
! $NOTICE     =  ''; # "<p><strong>Note: This service will be ...</strong>";
  
  #
  # DOCTYPEs
***************
*** 91,115 ****
  # HTML fragments
  my $lt        = "\020";
  my $gt        = "\021";
- my $gifborder = ' border=0';
- my $xhtmlendtag;
  
  #
- # The option names
- my @options = qw(weblint pw outline ss sp noatt);
- 
- #
- # Stopgap to shut -w up. It won't actually fix anything, but it'll keep us
- # running without warnings until we can fix the problems.
- my ($validity, %undef_frag, $effective_charset, $catalog,
-     @fake_errors, $guessed_doctype, $doctype, $line, $col, $type, $msg, $diff,
-     $pos, $indent, $gifname, $alttext, $gifhw, $nicegifname, $pedanticflags,
-     $pedantic_blurb, $level, $prevlevel, $prevdata);
- 
- #
  # Read configuration files.
! $frag        = &read_cfg($frag_db); # FPIs    -> plain text version string
! $pub_ids     = &read_cfg($fpis_db); # Errors  -> fragment identifier
  $element_uri = &read_cfg($elem_db); # Element -> URI fragment
  $file_type   = &read_cfg($type_db); # Content -> File -type
  
--- 88,97 ----
  # HTML fragments
  my $lt        = "\020";
  my $gt        = "\021";
  
  #
  # Read configuration files.
! $pub_ids     = &read_cfg($fpis_db); # FPIs    -> plain text version string
  $element_uri = &read_cfg($elem_db); # Element -> URI fragment
  $file_type   = &read_cfg($type_db); # Content -> File -type
  
***************
*** 118,124 ****
  $SIG{TERM} = \&erase_stuff;
  $SIG{KILL} = \&erase_stuff;
  $SIG{PIPE} = 'IGNORE';
- # $SIG{CHLD} = \&erase_stuff;
  
  #
  # delete() the, possibly tainted, $PATH.
--- 100,105 ----
***************
*** 158,164 ****
  #
  # Supercede URI with an uploaded file.
  if ($q->param('uploaded_file')) {
!   $q->param('uri', 'file://' . $q->param('uploaded_file'));
  }
  
  #
--- 139,145 ----
  #
  # Supercede URI with an uploaded file.
  if ($q->param('uploaded_file')) {
!   $q->param('uri', 'upload://' . $q->param('uploaded_file'));
  }
  
  #
***************
*** 198,209 ****
        <a href="/">HTML Validation Service</a> Results</h1>
  EOF
  
! $header .= $notice;
  
  #
  # Punt if we don't recognize this URI scheme.
! # @@ LWP does a whole bunch more: transparently!
! unless ($q->param('uri') =~ m(^(http|file)://)) {
    print $header;
    print <<"EOF";
    <p>
--- 179,189 ----
        <a href="/">HTML Validation Service</a> Results</h1>
  EOF
  
! $header .= $NOTICE;
  
  #
  # Punt if we don't recognize this URI scheme.
! unless ($q->param('uri') =~ m(^(http|upload)://)) {
    print $header;
    print <<"EOF";
    <p>
***************
*** 257,262 ****
--- 237,243 ----
  
  #
  # Try to extract or guess the DOCTYPE for HTML and XHTML files.
+ my($guessed_doctype, $doctype);
  if ($File->{Type} eq 'html' or $File->{Type} eq 'xhtml') {
    ($guessed_doctype, $doctype) = &check_for_doctype($File->{Content});
  }
***************
*** 335,341 ****
  
  
  my $xmlflags = '';
! my $decl = '';
  
  if ($File->{Type} eq 'xhtml') {
    $ENV{SGML_CATALOG_FILES} = $sgmlstuff . '/REC-xhtml1-20000126/xhtml.soc';
--- 316,323 ----
  
  
  my $xmlflags = '';
! my $catalog  = '';
! my $decl     = '';
  
  if ($File->{Type} eq 'xhtml') {
    $ENV{SGML_CATALOG_FILES} = $sgmlstuff . '/REC-xhtml1-20000126/xhtml.soc';
***************
*** 421,427 ****
  $version = $pub_ids->{$fpi} || 'unknown';
  
  if ($guessed_doctype == 1) {
!     push( @fake_errors, "$sp:<OSFD>0:2:1:E: Missing DOCTYPE declaration at start of document (<a href=\"http://www.htmlhelp.org/tools/validator/doctype.html\">explanation...</a>)\n" );
  }
  
  print ' ' x 4, q(<li>Character encoding: ), $File->{Charset};
--- 403,409 ----
  $version = $pub_ids->{$fpi} || 'unknown';
  
  if ($guessed_doctype == 1) {
!   unshift @errors, "$sp:<OSFD>0:2:1:E: Missing DOCTYPE declaration at start of document (<a href=\"http://www.htmlhelp.org/tools/validator/doctype.html\">explanation...</a>)\n";
  }
  
  print ' ' x 4, q(<li>Character encoding: ), $File->{Charset};
***************
*** 441,458 ****
  if (($File->{Type} eq 'xml') || ($File->{Type} eq 'xhtml')) {
  
      print ' ' x 4, qq(<li>Document type: ), $version;
!     if ( ( $type eq "html" ) &&
!          ( $root_namespace ne "http://www.w3.org/1999/xhtml" ) ) {
! 	print "<br>warning: unknown namespace for text/html document!";
! 	if ( $root_namespace ne '' ) {
! 	    print qq{, <a href="$root_namespace">$root_namespace</a>};
! 	}
! 	print "\n";
!     }
!     else {
! 	if ( $root_namespace ne '' ) {
! 	    print qq( with namespace <a href="$root_namespace">$root_namespace</a>);
! 	}
      }
  
      if ( $#other_namespaces >= 0 ) {
--- 423,439 ----
  if (($File->{Type} eq 'xml') || ($File->{Type} eq 'xhtml')) {
  
      print ' ' x 4, qq(<li>Document type: ), $version;
!     if ($File->{Type} eq 'html' and
! 	$root_namespace ne "http://www.w3.org/1999/xhtml") {
!       print "<br>warning: unknown namespace for text/html document!";
!       if ($root_namespace ne '') {
! 	print qq{, <a href="$root_namespace">$root_namespace</a>};
!       }
!       print "\n";
!     } else {
!       if ($root_namespace ne '') {
! 	print qq( with namespace <a href="$root_namespace">$root_namespace</a>);
!       }
      }
  
      if ( $#other_namespaces >= 0 ) {
***************
*** 497,508 ****
  
  if ( $? || ($guessed_doctype == 1) ) {
      print "<ul>\n";
!     for ((@fake_errors,@errors)) {
  	next if /^<OSFD>0:[0-9]+:[0-9]+:[^A-Z]/;
  	next if / numbers exceeding 65535 not supported$/;
  	next if /:W: SGML declaration was not implied$/ &&
  	    ($File->{Type} =~ /^x(ht)?ml$/);
  	s/^$sp:<OSFD>//g;
  	if ( ! (($line, $col, $type, $msg)=(/^[^:]*:([0-9]+):([0-9]+):([A-Z]?):? (.*)/))) {
  	    print "Uh oh! I got the following unknown error:\n\n   $_\n\n";
  	    print "Please make sure you specified the DOCTYPE properly!\n\n";
--- 478,490 ----
  
  if ( $? || ($guessed_doctype == 1) ) {
      print "<ul>\n";
!     for (@errors) {
  	next if /^<OSFD>0:[0-9]+:[0-9]+:[^A-Z]/;
  	next if / numbers exceeding 65535 not supported$/;
  	next if /:W: SGML declaration was not implied$/ &&
  	    ($File->{Type} =~ /^x(ht)?ml$/);
  	s/^$sp:<OSFD>//g;
+ 	my($line, $col, $type, $msg);
  	if ( ! (($line, $col, $type, $msg)=(/^[^:]*:([0-9]+):([0-9]+):([A-Z]?):? (.*)/))) {
  	    print "Uh oh! I got the following unknown error:\n\n   $_\n\n";
  	    print "Please make sure you specified the DOCTYPE properly!\n\n";
***************
*** 543,549 ****
  	    elsif ( $col > 70 ) {
  		# keep rightmost 70 chars; adjust $col accordingly
  		# (truncate left side only)
! 		$diff = $col - 50;
  		$newline = "... " . substr( $newline, $diff, 70 );
  		if ( length( $newline ) == (70 + 4) ) {
  		    $newline .= " ...";
--- 525,531 ----
  	    elsif ( $col > 70 ) {
  		# keep rightmost 70 chars; adjust $col accordingly
  		# (truncate left side only)
! 		my $diff = $col - 50;
  		$newline = "... " . substr( $newline, $diff, 70 );
  		if ( length( $newline ) == (70 + 4) ) {
  		    $newline .= " ...";
***************
*** 574,580 ****
  	# figure out the index into the %frag associative array for the
  	# "explanation..." links to the KGV FAQ.
  	my $msgindex = $msg;
! 	$msgindex =~ s/"[^"]+"/FOO/g;
  	$msgindex =~ s/[^A-Za-z ]//;
  
  	$newline =~ s/&/&amp;/go; $newline =~ s/</&lt;/go;
--- 556,562 ----
  	# figure out the index into the %frag associative array for the
  	# "explanation..." links to the KGV FAQ.
  	my $msgindex = $msg;
! 	$msgindex =~ s/"[^\"]+"/FOO/g;
  	$msgindex =~ s/[^A-Za-z ]//;
  
  	$newline =~ s/&/&amp;/go; $newline =~ s/</&lt;/go;
***************
*** 593,612 ****
  	print " " x 4 if $col != $orig_col;	# only for truncated lines
  	print "<span class=markup>^</span></pre>\n";
  	print "<p>\n";
- 
-         print qq{<span class=error>Error: $msg</span>};
- 
- 	if ( defined $frag->{$msgindex} ) {
- 	    # temporarily commented out due to broken links
- 	    # print qq{ (<a
- 	    #     href="$faqerrloc#$frag->{$msgindex}">explanation...</a>)};
- 	}
- 	else { # remember msgindexes without frags, to get the KGV FAQ updated.
- 	    $undef_frag{$msgindex} = 1;
- 	}
- 
- 	print "</p>\n";
  
      }
      print "</ul>\n";
      print "<hr>\n";
--- 575,582 ----
  	print " " x 4 if $col != $orig_col;	# only for truncated lines
  	print "<span class=markup>^</span></pre>\n";
  	print "<p>\n";
  
+         print qq{<span class=error>Error: $msg</span></p>\n};
      }
      print "</ul>\n";
      print "<hr>\n";
***************
*** 617,632 ****
  	print "\n  <p>\n    Sorry, this document does not validate as $version.\n  </p>\n\n";
  	&output_css_validator_blurb( $q->param('uri') );
      }
!     $validity="invalid";
! }
! else {
      if (($File->{Type} eq 'xml') || ($File->{Type} eq 'xhtml')) {
  	print "\n  <pre>\n    No errors found! ";
  	print "<a href=\"#sp-lim\">*</a></pre>\n\n";
!     }
!     else {
  	print "\n  <pre>\n    No errors found!</pre>\n\n";
      }
      if ( $version ne "unknown" ) {
  	if ( $version =~ /^HTML 2\.0$/ ) {
  	    $gifname = "vh20";
--- 587,602 ----
  	print "\n  <p>\n    Sorry, this document does not validate as $version.\n  </p>\n\n";
  	&output_css_validator_blurb( $q->param('uri') );
      }
!   } else {
      if (($File->{Type} eq 'xml') || ($File->{Type} eq 'xhtml')) {
  	print "\n  <pre>\n    No errors found! ";
  	print "<a href=\"#sp-lim\">*</a></pre>\n\n";
!     } else {
  	print "\n  <pre>\n    No errors found!</pre>\n\n";
      }
+ 
+     my $gifborder = ' border=0'; # The default.
+     my($gifname, $alttext, $gifhw, $nicegifname, $xhtmlendtag);
      if ( $version ne "unknown" ) {
  	if ( $version =~ /^HTML 2\.0$/ ) {
  	    $gifname = "vh20";
***************
*** 739,745 ****
    </p>
  EOHD
      }
-     $validity="valid";
      if (($File->{Type} eq 'xml') || ($File->{Type} eq 'xhtml')) {
        print qq{    <h2><a name="sp-lim">Caveat</a></h2>
        <p>
--- 709,714 ----
***************
*** 751,813 ****
      }
  }
  
- if ( $q->param('weblint') ) {
-     if ( $q->param('pw') ) {
- 	$pedanticflags  = '-pedantic -e mailto-link';
- 	$pedantic_blurb = ' (in "pedantic" mode)';
-     }
-     else {
- 	$pedanticflags = '';
-     }
- 
-     print <<"EOF";
-   <hr>
-   <h2><a name="weblint">Weblint Results</a></h2>
- 
-   <p>
-     Below are the results of running <a
-     href="http://www.weblint.org/">Weblint</a>
-     on this document$pedantic_blurb:
-   </p>
- 
-   <p>
-     <strong>Note</strong>:
-     Weblint is a useful HTML syntax and style checker, but does
-     not do true HTML validation.
-     Also, the version of weblint used by this service has not
-     been updated for some time, so some of the messages below may
-     be misleading or inaccurate.
-   </p>
- EOF
- 
-     open WEBLINT, "|$weblint -s $pedanticflags - 2>&1 >$temp.weblint"
-       or die "open($weblint) returned: $!\n";
-     for (@{$File->{Content}}) {print WEBLINT $_, "\n"};
-     close WEBLINT;
- 
-     print "\n\n";
-     if ( $? ) {
- 	print "  <ul>\n";
- 
- 	    open( WEBLINTOUT, "$temp.weblint" )
- 		|| die "couldn't open weblint results in $temp: $!";
- 
- 	    while (<WEBLINTOUT>) {
- 		s/ \(use "-x <extension>" to allow this\)\.$/./go;
- 		s/&/&amp;/go;
- 		s/</&lt;/go;
- 		s/>/&gt;/go;
- 		print "    <li>$_";
- 	    }
- 
- 	    close( WEBLINTOUT ) || die "couldn't close weblint results: $!";
- 	    print "  </ul>\n";
-     }
-     else {
- 	print "\n  <blockquote>\n    Looks good to me!\n  </blockquote>\n";
-     }
-     print "\n\n";
- }
  
  if ($q->param('outline')) {
    print <<'EOF';
--- 720,725 ----
***************
*** 889,895 ****
  	$gd =~ s/&/&amp;/go; $gd =~ s/</&lt;/go;
  	printf "%4d: %s", 0, $gd;
      }
!     $line = 1;
      for (@{$File->{Content}}) {
  	s/&/&amp;/go; s/</&lt;/go;
  	printf "<a name=\"line-%s\">%4d</a>: %s\n", $line, $line, $_;
--- 801,807 ----
  	$gd =~ s/&/&amp;/go; $gd =~ s/</&lt;/go;
  	printf "%4d: %s", 0, $gd;
      }
!     my $line = 1;
      for (@{$File->{Content}}) {
  	s/&/&amp;/go; s/</&lt;/go;
  	printf "<a name=\"line-%s\">%4d</a>: %s\n", $line, $line, $_;
***************
*** 932,937 ****
--- 844,850 ----
  
      $line =~ s/\\n/ /g;
      $line =~ s/\\011/ /g;
+     $line =~ s/\\012/ /g;
      $line =~ s/\s+/ /g;
      next if $line =~ /^-\s*$/;
  
***************
*** 1037,1069 ****
  }
  
  sub erase_stuff {
! 
!     unlink $temp           or warn "unlink($temp) returned: $!\n";
!     unlink "$temp.esis"    or warn "unlink($temp.esis) returned: $!\n";
!     unlink "$temp.weblint";
! 
! }
! 
! sub make_log_entry {
! 
!     my $msgindex;
! 
!     open(LOG,">>$logfile") || die "couldn't append to log: $!";
!     print LOG "$ENV{REMOTE_HOST}\t$validity $version\t", $q->param('uri'), "\n";
!     foreach $msgindex (keys %undef_frag) {
! 	print LOG "frag not defined for msgindex: $msgindex\n";
!     }
!     close( LOG ) || die "couldn't close log: $!";
! 
  }
  
  sub clean_up_and_exit {
! 
!     &output_closing;
!     &erase_stuff;
! #    &make_log_entry;
!     exit;
! 
  }
  
  sub redirect_to_home_page {
--- 950,964 ----
  }
  
  sub erase_stuff {
!   unlink $temp           or warn "unlink($temp) returned: $!\n";
!   unlink "$temp.esis"    or warn "unlink($temp.esis) returned: $!\n";
!   unlink "$temp.weblint";
  }
  
  sub clean_up_and_exit {
!   &output_closing;
!   &erase_stuff;
!   exit;
  }
  
  sub redirect_to_home_page {
***************
*** 1087,1103 ****
  
      $count++ if $q->param('ss');
      $count++ if $q->param('sp');
-     $count++ if $q->param('weblint');
      $count++ if $q->param('outline');
  
      if ( $count ) {
  	$text .= "  <p>\n    Jump to: ";
- 	if ( $q->param('weblint') ) {
- 	    $text .= "<a\n      href=\"#weblint\">Weblint Results</a>";
- 	    $count--;
- 	    $text .= " or " if ( $count == 1 );
- 	    $text .= ", "   if ( $count >  1 );
- 	}
  	if ( $q->param('outline') ) {
  	    $text .= "<a\n      href=\"#outline\">Outline</a>";
  	    $count--;
--- 982,991 ----
***************
*** 1137,1143 ****
  
      # does an HTML element precede the doctype on the same line?
      if ( $line =~ /<[a-z].*<!doctype/i ) {
! 	if ( $line =~ /<[a-z]+ xmlns=['"]([^ '"]*)/i ) {# look for an xmlns attr
  	    return 2, $1;
  	}
  	last;
--- 1025,1031 ----
  
      # does an HTML element precede the doctype on the same line?
      if ( $line =~ /<[a-z].*<!doctype/i ) {
! 	if ( $line =~ /<[a-z]+ xmlns=[\'\"]([^ \'\"]*)/i ) {# look for an xmlns attr
  	    return 2, $1;
  	}
  	last;
***************
*** 1154,1160 ****
      # Strip comments, so the next line doesn't find commented-out markup etc.
      # (this doesn't handle multi-line comments, unfortunately)
      if ( $line =~ /<[a-z]/i ) {	# found an element
! 	if ( $line =~ /<[a-z]+ xmlns=['"]([^ '"]*)/i ) {# look for an xmlns attr
  	    return 2, $1;
  	}
  	last;
--- 1042,1048 ----
      # Strip comments, so the next line doesn't find commented-out markup etc.
      # (this doesn't handle multi-line comments, unfortunately)
      if ( $line =~ /<[a-z]/i ) {	# found an element
! 	if ( $line =~ /<[a-z]+ xmlns=[\'\"]([^ \'\"]*)/i ) {# look for an xmlns attr
  	    return 2, $1;
  	}
  	last;
***************
*** 1356,1362 ****
    my($ct, @param) = split /\s*;\s*/, lc $Content_Type;
  
    if (exists $file_type->{$ct}) {
!     $type = $file_type->{$ct};
    }
    
    foreach my $param (@param) {
--- 1244,1250 ----
    my($ct, @param) = split /\s*;\s*/, lc $Content_Type;
  
    if (exists $file_type->{$ct}) {
!     $ct = $file_type->{$ct};
    }
    
    foreach my $param (@param) {
***************
*** 1368,1374 ****
      }
    }
  
!   return $type, $charset;
  }
  
  #
--- 1256,1262 ----
      }
    }
  
!   return $ct, $charset;
  }
  
  #

Received on Wednesday, 9 August 2000 06:38:35 UTC