validator/httpd/cgi-bin check,1.432,1.432.2.1 from Terje Bless via cvs-syncmail on 2005-08-15 (www-validator-cvs@w3.org from August 2005)

From: Terje Bless via cvs-syncmail <cvsmail@w3.org>
Date: Mon, 15 Aug 2005 22:47:53 +0000
To: www-validator-cvs@w3.org
Message-Id: <E1E4nkH-0006f1-8g@lionel-hutz.w3.org>
Update of /sources/public/validator/httpd/cgi-bin
In directory hutz:/tmp/cvs-serv25103/httpd/cgi-bin

Modified Files:
      Tag: validator-0_7-branch
	check 
Log Message:
Merging changes between validator-0_7_0-release and current HEAD at tag
validator-0_7-branchpoint.


Index: check
===================================================================
RCS file: /sources/public/validator/httpd/cgi-bin/check,v
retrieving revision 1.432
retrieving revision 1.432.2.1
diff -u -d -r1.432 -r1.432.2.1
--- check	29 Jul 2005 06:26:15 -0000	1.432
+++ check	15 Aug 2005 22:47:51 -0000	1.432.2.1
@@ -57,7 +57,6 @@
 use URI                  qw();
 use URI::Escape          qw(uri_escape);
 
-
 ###############################################################################
 #### Constant definitions. ####################################################
 ###############################################################################
@@ -96,7 +95,6 @@
 # Define global variables.
 use vars qw($DEBUG $CFG $RSRC $VERSION);
 
-
 #
 # Things inside BEGIN don't happen on every request in persistent environments
 # (such as mod_perl); so let's do the globals, eg. read config, here.
@@ -142,7 +140,7 @@
   #
   # Check a filesystem path for existance and "readability".
   sub pathcheck (@) {
-    my %paths = map {$_ => [-d $_, -r _]} @_;
+    my %paths = map { $_ => [-d $_, -r _] } @_;
     my @_d = grep {not $paths{$_}->[0]} keys %paths;
     my @_r = grep {not $paths{$_}->[1]} keys %paths;
     return TRUE if (scalar(@_d) + scalar(@_r) == 0);
@@ -169,7 +167,7 @@
   #
   # Split allowed protocols into a list.
   if (my $allowed = delete($CFG->{Protocols}->{Allow})) {
-    $CFG->{Protocols}->{Allow} = [ split(/\s*,\s*/, $allowed) ];
+    $CFG->{Protocols}->{Allow} = [split(/\s*,\s*/, $allowed)];
   }
 
   #
@@ -180,7 +178,7 @@
 
   { # Make types config indexed by FPI.
     my $_types = {};
-    map {$_types->{$CFG->{Types}->{$_}->{PubID}} = $CFG->{Types}->{$_}}
+    map { $_types->{$CFG->{Types}->{$_}->{PubID}} = $CFG->{Types}->{$_} }
       keys %{$CFG->{Types}};
     $CFG->{Types} = $_types;
   }
@@ -188,10 +186,10 @@
   #
   # Change strings to internal constants in MIME type mapping.
   for (keys %{$CFG->{MIME}}) {
-    if    ($CFG->{MIME}->{$_} eq 'SGML') {$CFG->{MIME}->{$_} = MODE_SGML}
-    elsif ($CFG->{MIME}->{$_} eq 'XML')  {$CFG->{MIME}->{$_} = MODE_XML}
-    elsif ($CFG->{MIME}->{$_} eq 'TBD')  {$CFG->{MIME}->{$_} = MODE_TBD}
-    else                                 {$CFG->{MIME}->{$_} = MODE_TBD};
+    if    ($CFG->{MIME}->{$_} eq 'SGML') { $CFG->{MIME}->{$_} = MODE_SGML }
+    elsif ($CFG->{MIME}->{$_} eq 'XML')  { $CFG->{MIME}->{$_} = MODE_XML }
+    elsif ($CFG->{MIME}->{$_} eq 'TBD')  { $CFG->{MIME}->{$_} = MODE_TBD }
+    else                                 { $CFG->{MIME}->{$_} = MODE_TBD }
   }
 
   #
@@ -204,8 +202,8 @@
 
   #
   # Strings
-  $VERSION    =  q$Revision$;
-  $VERSION    =~ s/Revision: ([\d\.]+) /$1/;
+  $VERSION =  q$Revision$;
+  $VERSION =~ s/Revision: ([\d\.]+) /$1/;
 
   #
   # Use passive FTP by default.
@@ -216,7 +214,6 @@
 # Get rid of (possibly insecure) $PATH.
 delete $ENV{PATH};
 
-
 #@@DEBUG: Dump $CFG datastructure. Used only as a developer aid.
 #use Data::Dumper qw(Dumper);
 #print Dumper($CFG);
@@ -235,7 +232,6 @@
 # The data structure that will hold all session data.
 my $File;
 
-
 ##############################################
 # Populate $File->{Env} -- Session Metadata. #
 ##############################################
@@ -244,7 +240,6 @@
 # The URL to this CGI Script.
 $File->{Env}->{'Self URI'} = $q->url(-query => 0);
 
-
 #################################
 # Initialize the datastructure. #
 #################################
@@ -272,7 +267,6 @@
 $File->{Warnings}   = []; # Warnings...
 $File->{Namespaces} = []; # Other (non-root) Namespaces.
 
-
 ###############################################################################
 #### Generate Template for Result. ############################################
 ###############################################################################
@@ -300,14 +294,12 @@
 $File->{E} = $E;
 $File->{H} = $H;
 
-
 # Read friendly error message file
 my $error_messages_list =  File::Spec->catfile($CFG->{Paths}->{Templates}, $lang, 'error_messages.cfg');
 my %config_opts = (-ConfigFile => $error_messages_list);
 my %rsrc = Config::General->new(%config_opts)->getall();
 $RSRC = \%rsrc;
 
-
 $T->param(cfg_home_page => $CFG->{'Home Page'});
 
 undef $lang;
@@ -331,7 +323,6 @@
 $File->{Opt}->{'Verbose'}        = $q->param('verbose') ? TRUE                   :  FALSE;
 $File->{Opt}->{'Debug'}          = $q->param('debug')   ? TRUE                   :  FALSE;
 $File->{Opt}->{'No200'}          = $q->param('No200')   ? TRUE                   :  FALSE;
-# $File->{Opt}->{'Fussy'}          = $q->param('fussy')   ? TRUE                   :  FALSE;
 $File->{Opt}->{'Charset'}        = $q->param('charset') ? lc $q->param('charset'):     '';
 $File->{Opt}->{'DOCTYPE'}        = $q->param('doctype') ? $q->param('doctype')   :     '';
 $File->{Opt}->{'Output'}         = $q->param('output')  ? $q->param('output')    : 'html';
@@ -381,7 +372,6 @@
 # by Apache::Registry's idiotic interference under mod_perl.
 untie *STDIN;
 
-
 ###############################################################################
 #### Output validation results. ###############################################
 ###############################################################################
@@ -415,8 +405,6 @@
   $File->{Charset}->{Use} = 'utf-8'; # UTF-8 (image/svg+xml etc.)
 }
 
-
-
 $File->{Content} = &normalize_newlines($File->{Bytes},
                        exact_charset($File, $File->{Charset}->{Use}));
 
@@ -451,9 +439,9 @@
       $File->{Tentative} |= T_ERROR;
       $File->{Charset}->{Use} = $File->{Charset}->{Override};
      }
-     else { #actually overriding something 
+     else { #actually overriding something
       # Warn about Override unless it's the same as the real charset...
-    
+
       unless ($File->{Charset}->{Override} eq $File->{Charset}->{Use}) {
         &add_warning('W03', {
           W03_use => $File->{Charset}->{Use},
@@ -462,11 +450,8 @@
 
         $File->{Tentative} |= T_ERROR;
         $File->{Charset}->{Use} = $File->{Charset}->{Override};
-      } 
+      }
     }
-    
-    
-    
   }
 }
 
@@ -476,12 +461,10 @@
   $File->{Charset}->{Use} = 'utf-8';
 }
 
-
 #
 # Abort if an error was flagged while finding the encoding.
 &abort_if_error_flagged($File, O_CHARSET|O_DOCTYPE);
 
-
 #
 # Check the detected Encoding and transcode.
 if (&conflict($File->{Charset}->{Use}, 'utf-8')) {
@@ -489,7 +472,6 @@
   &abort_if_error_flagged($File, O_CHARSET);
 }
 
-
 $File = &check_utf8($File); # always check
 $File = &byte_error($File);
 
@@ -497,8 +479,6 @@
 # Abort if an error was flagged during transcoding
 &abort_if_error_flagged($File, O_SOURCE|O_CHARSET);
 
-
-
 #
 # Overall parsing algorithm for documents returned as text/html:
 #
@@ -530,13 +510,13 @@
     my $cfg  = $CFG->{Types}->{$fpi};
     my $mode = $cfg->{'Parse Mode'};
 
-    if    ($mode eq 'SGML') {$mode = MODE_SGML}
-    elsif ($mode eq 'XML')  {$mode = MODE_XML}
-    else                    {$mode = MODE_TBD}
+    if    ($mode eq 'SGML') { $mode = MODE_SGML }
+    elsif ($mode eq 'XML')  { $mode = MODE_XML }
+    else                    { $mode = MODE_TBD }
 
     if ($File->{Mode} == MODE_TBD) {
-      if    ($mode == MODE_SGML) {$File->{Mode} = MODE_SGML}
-      elsif ($mode == MODE_XML)  {$File->{Mode} = MODE_XML}
+      if    ($mode == MODE_SGML) { $File->{Mode} = MODE_SGML }
+      elsif ($mode == MODE_XML)  { $File->{Mode} = MODE_XML }
       else {
         $File->{Mode} = MODE_SGML;
         &add_warning('W06', {});
@@ -546,9 +526,9 @@
         my $dtd = $mode;
         my $ct  = $File->{Mode};
         for ($dtd, $ct) {
-          if    ($_ == MODE_SGML) {$_ = 'SGML'}
-          elsif ($_ == MODE_XML)  {$_ =  'XML'}
-          else                    {$_ = 'SGML'};
+          if    ($_ == MODE_SGML) { $_ = 'SGML' }
+          elsif ($_ == MODE_XML)  { $_ =  'XML' }
+          else                    { $_ = 'SGML' }
         }
         unless ($File->{Mode} == MODE_TBD) {
           &add_warning('W07', {
@@ -565,7 +545,6 @@
   }
 }
 
-
 #
 # Sanity check Charset information and add any warnings necessary.
 $File = &charset_conflicts($File);
@@ -595,17 +574,8 @@
   if (&is_xml($File)) {
     $catalog  = File::Spec->catfile($CFG->{Paths}->{SGML}->{Library}, 'xml.soc');
     push(@spopt, '-wxml');
-  } else { # Only add these in SGML mode.
-#    if ($File->{Opt}->{'Fussy'}) {
-#      push @spopt, '-wmin-tag';
-#      push @spopt, '-wfully-tagged';
-#      push @spopt, '-wrefc';
-#      push @spopt, '-wmissing-att-name';
-#      push @spopt, '-wdata-delim';
-#    }
   }
-
-
+  
   #
   # Defaults for SP; turn off fixed charset mode and set encoding to UTF-8.
   $ENV{SP_CHARSET_FIXED} = 'NO';
@@ -620,7 +590,6 @@
   # Set the command to execute.
   my @cmd = ($CFG->{Paths}->{SGML}->{Parser}, '-n', '-c', $catalog, @spopt);
 
-
   #
   # Set debug info for HTML report.
   $T->param(opt_debug => $DEBUG);
@@ -673,7 +642,7 @@
   # Run it through SP, redirecting output to temporary files.
   my $pid = do {
     no warnings 'once';
-    local(*SPIN, *SPOUT, *SPERR)  = ($spin, $spout, $sperr);
+    local (*SPIN, *SPOUT, *SPERR)  = ($spin, $spout, $sperr);
     open3("<&SPIN", ">&SPOUT", ">&SPERR", @cmd);
   };
   undef $spin;
@@ -708,7 +677,7 @@
       }
     }
 
-    next if / IMPLIED$/ && not $DEBUG;;
+    next if / IMPLIED$/ && not $DEBUG;
     next if /^ASDAFORM CDATA /;
     next if /^ASDAPREF CDATA /;
     chomp; # Removes trailing newlines
@@ -758,7 +727,6 @@
   $File->{Version} = $prettyver;
 }
 
-
 #
 # Warn about unknown, incorrect, or missing Namespaces.
 if ($File->{Namespace}) {
@@ -790,11 +758,8 @@
 } else {
   &prep_template($File, $T);
 
-  
-  
-  
   if (! $File->{Doctype} and ($File->{Version} eq 'unknown' or $File->{Version} eq 'SGML' or (!$File->{Version}))) {
-    # @@TODO@@ we should try falling back on other version info, such as the ones stored in Version_ESIS 
+    # @@TODO@@ we should try falling back on other version info, such as the ones stored in Version_ESIS
     $T->param(file_version => '(no Doctype found)');
   }
   else {
@@ -844,7 +809,6 @@
 undef $File;
 exit;
 
-
 #############################################################################
 # Subroutine definitions
 #############################################################################
@@ -891,7 +855,7 @@
 
   #
   # Tip of the Day...
-  my $tip = &get_tip;
+  my $tip = &get_tip();
   $T->param(tip_uri  => $tip->[0]);
   $T->param(tip_slug => $tip->[1]);
 
@@ -908,17 +872,16 @@
   }
   my @nss                   =  map({uri => $_}, @{$File->{Namespaces}});
   $T->param(file_namespaces => \@nss) if @nss;
-  
+
   if ($File->{Opt}->{DOCTYPE}) {
       my $over_doctype_param = "override doctype $File->{Opt}->{DOCTYPE}";
-       $T->param($over_doctype_param => TRUE);   
+       $T->param($over_doctype_param => TRUE);
   }
-  
+
   if ($File->{Opt}->{Charset}) {
       my $over_charset_param = "override charset $File->{Opt}->{Charset}";
-       $T->param($over_charset_param => TRUE);   
+       $T->param($over_charset_param => TRUE);
   }
-  
 }
 
 #
@@ -951,7 +914,6 @@
   $T->param(file_thispage => $thispage);
 }
 
-
 #
 # Add a waring message to the output.
 sub add_warning ($$) {
@@ -962,7 +924,6 @@
   $File->{T}->param(have_warnings => TRUE);
 }
 
-
 #
 # Proxy authentication requests.
 # Note: expects the third argument to be a hash ref (see HTTP::Headers::Auth).
@@ -996,7 +957,6 @@
   exit; # Further interaction will be a new HTTP request.
 }
 
-
 #
 # Fetch an URL and return the content and selected meta-info.
 sub handle_uri {
@@ -1015,12 +975,12 @@
 
   unless ($ua->is_protocol_supported($uri)) {
     $File->{'Error Flagged'} = TRUE;
-    if (($uri->canonical() eq "1") ) 
+    if (($uri->canonical() eq "1") )
     #if uri param is empty (also for empty direct or upload), it's been set to TRUE in sub prepCGI()
     {
-      $File->{E}->param(fatal_no_content  => TRUE);      
+      $File->{E}->param(fatal_no_content  => TRUE);
     }
-    else { 
+    else {
       $File->{E}->param(fatal_uri_error  => TRUE);
       $File->{E}->param(fatal_uri_scheme => $uri->scheme());
     }
@@ -1061,7 +1021,7 @@
   # Enforce Max Recursion level.
   &check_recursion($File, $res);
 
-  my($mode, $ct, $charset)
+  my ($mode, $ct, $charset)
     = &parse_content_type(
                           $File,
                           $res->header('Content-Type'),
@@ -1089,7 +1049,6 @@
   $File->{'Is Upload'}     = FALSE;
 
   return $File;
-
 }
 
 #
@@ -1105,7 +1064,7 @@
   local $/ = undef; # set line delimiter so that <> reads rest of file
   $file = <$f>;
 
-  my($mode, $ct, $charset) = &parse_content_type($File, $h->{'Content-Type'});
+  my ($mode, $ct, $charset) = &parse_content_type($File, $h->{'Content-Type'});
 
   $File->{Bytes}           = $file;
   $File->{Mode}            = $mode;
@@ -1140,7 +1099,6 @@
   return $File;
 }
 
-
 #
 # Parse a Content-Type and parameters. Return document type and charset.
 sub parse_content_type {
@@ -1150,12 +1108,12 @@
   my $charset      = '';
   my $mode         = '';
 
-  my($ct, @param) = split /\s*;\s*/, lc $Content_Type;
+  my ($ct, @param) = split /\s*;\s*/, lc $Content_Type;
 
   $mode = $CFG->{MIME}->{$ct} || $ct;
 
   foreach my $param (@param) {
-    my($p, $v) = split /\s*=\s*/, $param;
+    my ($p, $v) = split /\s*=\s*/, $param;
     next unless $p =~ m(charset)i;
     if ($v =~ m/([\'\"]?)(\S+)\1/i) {
       $charset = lc $2;
@@ -1198,8 +1156,6 @@
   }
 }
 
-
-
 #
 # Normalize newline forms (CRLF/CR/LF) to native newline.
 sub normalize_newlines {
@@ -1237,7 +1193,6 @@
   return $exact_charset;
 }
 
-
 #
 # Return $_[0] encoded for HTML entities (cribbed from merlyn).
 #
@@ -1250,7 +1205,6 @@
   return $_;
 }
 
-
 #
 # Truncate source lines for report.
 #
@@ -1299,14 +1253,14 @@
   return $line, $col;
 }
 
-
 #
 # Suppress any existing DOCTYPE by commenting it out.
 sub override_doctype {
   no strict 'vars';
-  my $File  = shift;
-  my ($dt)  =
+  my $File = shift;
+  my ($dt) =
     grep { $_->{Display} eq $File->{Opt}->{DOCTYPE} } values %{$CFG->{Types}};
+
   # @@TODO: abort/whine about unrecognized doctype if $dt is undef.;
   my $pubid = $dt->{PubID};
   my $sysid = $dt->{SysID};
@@ -1314,9 +1268,10 @@
   local $dtd = qq(<!DOCTYPE $name PUBLIC "$pubid");
   $dtd .= qq( "$sysid") if $sysid; # We don't have one for all types.
   $dtd .= '>';
+  
   local $org_dtd = '';
-  local $HTML = '';
-  local $seen = FALSE;
+  local $HTML    = '';
+  local $seen    = FALSE;
 
   my $declaration = sub {
     $seen = TRUE;
@@ -1324,7 +1279,8 @@
     # No Override if Fallback was requested.
     if ($File->{Opt}->{FB}->{DOCTYPE}) {
       $HTML .= $_[0]; # Stash it as is...
-    } else { # Comment it out and insert the new one...
+    } else {
+      # Comment it out and insert the new one...
       $HTML .= "$dtd\n" . '<!-- ' . $_[0] . ' -->';
       $org_dtd = &ent($_[0]);
     }
@@ -1359,7 +1315,6 @@
   return $File;
 }
 
-
 #
 # Parse errors reported by SP.
 sub parse_errors ($$) {
@@ -1375,12 +1330,12 @@
 
     push @{$File->{DEBUG}->{Errors}}, $_;
     chomp;
-    my($err, @errors);
+    my ($err, @errors);
     next if /^<OSFD>0:[0-9]+:[0-9]+:[^A-Z]/;
     next if /numbers exceeding 65535 not supported/;
     next if /URL Redirected to/;
 
-    my(@_err) = split /:/;
+    my (@_err) = split /:/;
     next unless $_err[1] eq '<OSFD>0'; #@@FIXME: This is a polite fiction!;
     if ($_err[1] =~ m(^<URL>)) {
       @errors = ($_err[0], join(':', $_err[1], $_err[2]), @_err[3..$#_err]);
@@ -1390,6 +1345,7 @@
     $err->{src}  = $errors[1];
     $err->{line} = $errors[2];
     $err->{char} = $errors[3];
+    
     # Workaround for onsgmls 1.5 sometimes reporting errors beyond EOL.
     if ((my $l = length($File->{Content}->[$err->{line}-1])) < $err->{char}) {
       $err->{char} = $l;
@@ -1399,6 +1355,7 @@
     if ($err->{type} eq 'E' or $err->{type} eq 'X' or $err->{type} eq 'Q') {
       $err->{msg}  = join ':', @errors[6 .. $#errors];
     } elsif ($err->{type} eq 'W') {
+
       #@@FIXME: This is borked after templatification.
       # &add_warning($File, 'fake', 'Warning:',
       #  "Line $err->{line}, column $err->{char}: " . &ent($errors[6]));
@@ -1445,13 +1402,13 @@
 
   if (scalar @{$File->{Errors}}) {
     foreach my $err (@{$File->{Errors}}) {
-      my($line, $col) = &truncate_line($File->{Content}->[$err->{line}-1], $err->{char});
+      my ($line, $col) = &truncate_line($File->{Content}->[$err->{line}-1], $err->{char});
 
       $line = &mark_error($line, $col);
 
       my $explanation;
       if ($err->{num}) {
-        my(undef, $num) = split /\./, $err->{num};
+        my (undef, $num) = split /\./, $err->{num};
         if (exists $Msgs{$num}) { # We've already seen this message...
           if ($File->{Opt}->{Verbose}) { # ...so only repeat it in Verbose mode.
             $explanation = qq(\n    <div class="hidden mid-$num"></div>\n);
@@ -1468,7 +1425,7 @@
 	{
 		$_msg =~ s/<!--URI-->//g
 	}
-	else 
+	else
 	{
 		my $escaped_uri = uri_escape($File->{URI});
         	$_msg =~ s/<!--URI-->/$escaped_uri/g;
@@ -1479,7 +1436,7 @@
       $err->{src} = $line;
       $err->{col} = ' ' x $col;
       $err->{expl} = $explanation;
-      if ($err->{type} eq 'I') 
+      if ($err->{type} eq 'I')
       {
         $err->{class} = 'msg_info';
         $err->{err_type_info} = 1;
@@ -1490,7 +1447,7 @@
         $err->{err_type_err} = 1;
         $number_of_errors += 1;
       }
-      elsif (($err->{type} eq 'W') or ($err->{type} eq 'X') ) 
+      elsif (($err->{type} eq 'W') or ($err->{type} eq 'X') )
       {
         $err->{class} = 'msg_warn';
         $err->{err_type_warn} = 1;
@@ -1575,7 +1532,6 @@
   return $line;
 }
 
-
 #
 # Produce an outline of the document based on Hn elements from the ESIS.
 sub outline {
@@ -1586,7 +1542,7 @@
   my $prevlevel = 0;
   my $level     = 0;
 
-  for (1 .. $#{$File->{ESIS}}) {
+  for (1..$#{$File->{ESIS}}) {
     my $line = $File->{ESIS}->[$_];
     next unless ($line && $line =~ /^\(H([1-6])$/i);
 
@@ -1646,7 +1602,6 @@
   return $outline;
 }
 
-
 #
 # Create a HTML representation of the document.
 sub source {
@@ -1660,7 +1615,6 @@
   return \@source;
 }
 
-
 #
 # Create a HTML Parse Tree of the document for validation report.
 sub parsetree {
@@ -1719,7 +1673,6 @@
   return $tree;
 }
 
-
 #
 # Do an initial parse of the Document Entity to extract FPI.
 sub preparse_doctype {
@@ -1732,7 +1685,7 @@
 
   my $dtd = sub {
     return if $File->{Root};
-    ($File->{Root}, $File->{DOCTYPE}) = shift =~  m(<!DOCTYPE\s+(\w+)\s+(?:PUBLIC|SYSTEM)\s+(?:[\'\"])([^\"\']+)(?:[\"\']).*>)si;
+    ($File->{Root}, $File->{DOCTYPE}) = shift =~ m(<!DOCTYPE\s+(\w+)\s+(?:PUBLIC|SYSTEM)\s+(?:[\'\"])([^\"\']+)(?:[\"\']).*>)si;
   };
 
   my $start = sub {
@@ -1771,11 +1724,10 @@
   for (@{shift->{ESIS}}) {
     s/\\012//g;
     s/\\n/\n/g;
-    
-     $file_esis .= ent $_;
-     $file_esis .= "\n";
+    $file_esis .= ent $_;
+    $file_esis .= "\n";
   }
-  return  $file_esis;  
+  return  $file_esis;
 }
 
 #
@@ -1788,12 +1740,11 @@
   return $file_raw_errors;
 }
 
-
 #
 # Preprocess CGI parameters.
 sub prepCGI {
   my $File = shift;
-  my    $q = shift;
+  my $q    = shift;
 
   # Avoid CGI.pm's "exists but undef" behaviour.
   if (scalar $q->param) {
@@ -1801,6 +1752,7 @@
       next if $param eq 'uploaded_file'; # 'uploaded_file' contains data.
       next if $param eq 'fragment';      # Ditto 'fragment'.
       next if $q->param($param) eq '0';  # Keep false-but-set params.
+
       #
       # Parameters that are given to us without specifying a value get
       # set to "1" (the "TRUE" constant). This is so we can test for the
@@ -1845,6 +1797,7 @@
       print redirect &self_url_q($q, $File);
       exit;
     } else {
+        
       # Redirected from /check/referer to /check?uri=referer because
       # the browser didn't send a Referer header, or the request was
       # for /check?uri=referer but no Referer header was found.
@@ -1909,7 +1862,6 @@
   return $ssi;
 }
 
-
 #
 # Utility sub to tell if mode "is" XML.
 sub is_xml {shift->{Mode} == MODE_XML};
@@ -1922,7 +1874,7 @@
 
   my $dtd = sub {
     return if $File->{Root};
-    ($File->{Root}, $File->{DOCTYPE}) = shift =~  m(<!DOCTYPE\s+(\w+)\s+PUBLIC\s+(?:[\'\"])([^\"\']+)(?:[\"\']).*>)si;
+    ($File->{Root}, $File->{DOCTYPE}) = shift =~ m(<!DOCTYPE\s+(\w+)\s+PUBLIC\s+(?:[\'\"])([^\"\']+)(?:[\"\']).*>)si;
   };
 
   my $start = sub {
@@ -1999,7 +1951,6 @@
   return $File;
 }
 
-
 #
 # Transcode to UTF-8
 sub transcode {
@@ -2037,10 +1988,13 @@
     $_ = $c->convert($_); # $_ is local!!
     if ($in ne "" and (!defined($_) || $_ eq "")) {
       push @{$File->{Lines}}, $line;
+      
       # try to decoded as much as possible of the line
-      my $short = 0;               # longest okay
-      my $long = (length $in) - 1; # longest unknown
-      while ($long > $short) { # binary search
+      my $short = 0;                # longest okay
+      my $long  = (length $in) - 1; # longest unknown
+      
+      while ($long > $short) {
+        # binary search
         my $try = int (($long+$short+1) / 2);
         my $converted = $c->convert(substr($in, 0, $try));
         if (!defined($converted) || $converted eq "") {
@@ -2108,17 +2062,16 @@
   return $File;
 }
 
-
 #
 # Return an XML report for the page.
 sub report_xml {
   my $File = shift;
 
   my $valid = ($File->{'Is Valid'} ? 'Valid' : 'Invalid');
-  my $errs  = ($File->{'Is Valid'} ? '0' : scalar @{$File->{Errors}});
+  my $errs  = ($File->{'Is Valid'} ? '0'     : scalar @{$File->{Errors}});
   if ($File->{E}->param('fatal_http_error')) {
     $valid = 'Could not validate';
-  } 
+  }
 
   print <<".EOF.";
 Content-Type: application/xml; charset=UTF-8
@@ -2190,13 +2143,13 @@
       chomp $err->{msg};
 
       # Find index into the %frag hash for the "explanation..." links.
-      $err->{idx} =  $err->{msg};
+      $err->{idx} = $err->{msg};
       $err->{idx} =~ s/"[^\"]*"/FOO/g;
       $err->{idx} =~ s/[^A-Za-z ]//g;
-      $err->{idx} =~ s/\s+/ /g; # Collapse spaces
-      $err->{idx} =~ s/(^\s|\s$)//g; # Remove leading and trailing spaces.
+      $err->{idx} =~ s/\s+/ /g;        # Collapse spaces
+      $err->{idx} =~ s/(^\s|\s$)//g;   # Remove leading and trailing spaces.
       $err->{idx} =~ s/(FOO )+/FOO /g; # Collapse FOOs.
-      $err->{idx} =~ s/FOO FOO/FOO/g; # Collapse FOOs.
+      $err->{idx} =~ s/FOO FOO/FOO/g;  # Collapse FOOs.
 
       my $offset = $File->{Offsets}->[$err->{line} - 1]->[1] + $err->{char};
       printf <<".EOF.", &ent($err->{msg});
@@ -2208,18 +2161,16 @@
   print qq(</result>\n);
 }
 
-
-
 #
 # Return an EARL report for the page.
 sub report_earl {
   my $File = shift;
 
   my $valid = ($File->{'Is Valid'} ? 'Valid' : 'Invalid');
-  my $errs  = ($File->{'Is Valid'} ? '0' : scalar @{$File->{Errors}});
+  my $errs  = ($File->{'Is Valid'} ? '0'     : scalar @{$File->{Errors}});
   if ($File->{E}->param('fatal_http_error')) {
     $valid = 'Could not validate';
-  } 
+  }
 
   print <<".EOF.";
 Content-Type: application/rdf+xml; charset=UTF-8
@@ -2260,10 +2211,10 @@
       $err->{idx} =  $err->{msg};
       $err->{idx} =~ s/"[^\"]*"/FOO/g;
       $err->{idx} =~ s/[^A-Za-z ]//g;
-      $err->{idx} =~ s/\s+/ /g; # Collapse spaces
-      $err->{idx} =~ s/(^\s|\s\Z)//g; # Remove leading and trailing spaces.
+      $err->{idx} =~ s/\s+/ /g;        # Collapse spaces
+      $err->{idx} =~ s/(^\s|\s\Z)//g;  # Remove leading and trailing spaces.
       $err->{idx} =~ s/(FOO )+/FOO /g; # Collapse FOOs.
-      $err->{idx} =~ s/FOO FOO/FOO/g; # Collapse FOOs.
+      $err->{idx} =~ s/FOO FOO/FOO/g;  # Collapse FOOs.
 
       my @offsets = (
                      $File->{Offsets}->[$err->{line}    ]->[0],
@@ -2305,8 +2256,6 @@
 .EOF.
 }
 
-
-
 #
 # Return a Notation3 EARL report for the page.
 #
@@ -2318,7 +2267,7 @@
   my $errs  = ($File->{'Is Valid'} ? '0' : scalar @{$File->{Errors}});
   if ($File->{E}->param('fatal_http_error')) {
     $valid = 'Could not validate';
-  } 
+  }
 
   print <<".EOF.";
 Content-Type: text/plain; charset=UTF-8
@@ -2343,10 +2292,10 @@
       $err->{idx} =  $err->{msg};
       $err->{idx} =~ s/"[^\"]*"/FOO/g;
       $err->{idx} =~ s/[^A-Za-z ]//g;
-      $err->{idx} =~ s/\s+/ /g; # Collapse spaces
-      $err->{idx} =~ s/(^\s|\s\Z)//g; # Remove leading and trailing spaces.
+      $err->{idx} =~ s/\s+/ /g;        # Collapse spaces
+      $err->{idx} =~ s/(^\s|\s\Z)//g;  # Remove leading and trailing spaces.
       $err->{idx} =~ s/(FOO )+/FOO /g; # Collapse FOOs.
-      $err->{idx} =~ s/FOO FOO/FOO/g; # Collapse FOOs.
+      $err->{idx} =~ s/FOO FOO/FOO/g;  # Collapse FOOs.
 
       my @offsets = (
                      $File->{Offsets}->[$err->{line}    ]->[0],
@@ -2391,7 +2340,6 @@
   print " .\n";
 }
 
-
 #
 # Autodetection as in Appendix F of the XML 1.0 Recommendation.
 # <http://www.w3.org/TR/2000/REC-xml-20001006#sec-guessing>
@@ -2429,7 +2377,6 @@
                             # nothing in particular
 }
 
-
 #
 # Find encoding in document according to XML rules
 # Only meaningful if file contains a BOM, or for well-formed XML!
@@ -2439,20 +2386,24 @@
 
   ($File->{Charset}->{Auto}, $File->{BOM}, $CodeUnitSize, $Pattern)
     = &find_base_encoding($File->{Bytes});
+
+  # 100 arbitrary, but enough in any case
   my $someBytes = substr $File->{Bytes}, $File->{BOM}, ($CodeUnitSize * 100);
-  my $someText = '';                  # 100 arbitrary, but enough in any case
+  my $someText  = '';
 
   # translate from guessed encoding to ascii-compatible
   if ($File->{Charset}->{Auto} eq 'ebcdic') {
+
     # special treatment for EBCDIC, maybe use tr///
     # work on this later
   }
   elsif (!$Pattern) {
     $someText = $someBytes; # efficiency shortcut
   }
-  else { # generic code for UTF-16/UCS-4
+  else {
+    # generic code for UTF-16/UCS-4
     $someBytes =~ /^(($Pattern)*)/s;
-    $someText = $1;       # get initial piece without chars >255
+    $someText = $1;                   # get initial piece without chars >255
     $someText =~ s/$Pattern/$1/sg;    # select the relevant bytes
   }
 
@@ -2480,6 +2431,7 @@
     print $E->output;
     exit;
   } else {
+
     #@@FIXME: This is borked after templatification.
     # &add_warning($File, 'fatal', 'Fatal Error', <<".EOF.");
     # A fatal error has occurred while processing the requested document. Processing
@@ -2501,19 +2453,18 @@
   return $encodingA && $encodingB && ($encodingA ne $encodingB);
 }
 
-
 #
 # Construct a self-referential URL from a CGI.pm $q object.
 sub self_url_q {
   my ($q, $File) = @_;
   my $thispage = $File->{Env}->{'Self URI'};
-     $thispage .= '?uri='       . uri_escape($q->param('uri'));
-     $thispage .= ';ss=1'      if $q->param('ss');
-     $thispage .= ';sp=1'      if $q->param('sp');
-     $thispage .= ';noatt=1'   if $q->param('noatt');
-     $thispage .= ';outline=1' if $q->param('outline');
-     $thispage .= ';No200=1'   if $q->param('No200');
-     $thispage .= ';verbose=1' if $q->param('verbose');
+  $thispage .= '?uri='       . uri_escape($q->param('uri'));
+  $thispage .= ';ss=1'      if $q->param('ss');
+  $thispage .= ';sp=1'      if $q->param('sp');
+  $thispage .= ';noatt=1'   if $q->param('noatt');
+  $thispage .= ';outline=1' if $q->param('outline');
+  $thispage .= ';No200=1'   if $q->param('No200');
+  $thispage .= ';verbose=1' if $q->param('verbose');
   if ($q->param('doctype')
       and not $q->param('doctype') =~ /(Inline|detect)/i) {
     $thispage .= ';doctype=' . uri_escape($q->param('doctype'));
@@ -2528,8 +2479,8 @@
 # Return random Tip with it's URL.
 sub get_tip {
   my @tipAddrs = keys %{$CFG->{Tips}};
-  my $tipAddr = $tipAddrs[rand scalar @tipAddrs];
-  my $tipSlug = $CFG->{Tips}->{$tipAddr};
+  my $tipAddr  = $tipAddrs[rand scalar @tipAddrs];
+  my $tipSlug  = $CFG->{Tips}->{$tipAddr};
 
   return [$tipAddr, $tipSlug];
 }
@@ -2552,7 +2503,6 @@
   return $thispage;
 }
 
-
 #####
 
 package W3C::Validator::UserAgent;
@@ -2564,8 +2514,7 @@
 
 use base qw(LWP::UserAgent);
 
-sub new
-{
+sub new {
   my ($proto, $CFG, $File, @rest) = @_;
   my $class = ref($proto) || $proto;
   my $self = $class->SUPER::new(@rest);
@@ -2574,14 +2523,12 @@
   return $self;
 }
 
-sub redirect_ok
-{
+sub redirect_ok {
   my ($self, $req, $res) = @_;
   return $self->SUPER::redirect_ok($req, $res) && $self->uri_ok($req->uri());
 }
 
-sub uri_ok
-{
+sub uri_ok {
   my ($self, $uri) = @_;
   return 1 if ($self->{'W3C::Validator::CFG'}->{'Allow Private IPs'} or
                !$uri->can('host'));
@@ -2595,7 +2542,7 @@
   }
   if ($iptype && $iptype ne 'PUBLIC') {
     my $File = $self->{'W3C::Validator::File'};
-    $File->{'Error Flagged'}            =  1;
+    $File->{'Error Flagged'} = 1;
     $File->{E}->param(fatal_ip_error    => 1);
     $File->{E}->param(fatal_ip_hostname => 1)
       if $addr and $uri->host() ne $addr;
Received on Monday, 15 August 2005 22:48:02 UTC