validator/httpd/cgi-bin check,1.330,1.331

Update of /sources/public/validator/httpd/cgi-bin
In directory hutz:/tmp/cvs-serv17697/httpd/cgi-bin

Modified Files:
	check 
Log Message:
Merging from branch validator-0_6_0-branch, at tag validator-0_6_7-pre2.
(0.6.7 is missing relnotes and other minor tweaks)


Index: check
===================================================================
RCS file: /sources/public/validator/httpd/cgi-bin/check,v
retrieving revision 1.330
retrieving revision 1.331
diff -u -d -r1.330 -r1.331
--- check	21 May 2004 15:42:50 -0000	1.330
+++ check	21 Jul 2004 15:07:31 -0000	1.331
@@ -3,7 +3,7 @@
 # W3C Markup Validation Service
 # A CGI script to retrieve and validate a markup file
 #
-# Copyright 1995-2002 Gerald Oskoboiny <gerald@w3.org>
+# Copyright 1995-2004 Gerald Oskoboiny <gerald@w3.org>
 # for additional contributors, see http://dev.w3.org/cvsweb/validator/
 #
 # This source code is available under the license at:
@@ -48,6 +48,7 @@
 use HTML::Parser    3.25 qw(); # Need 3.25 for $p->ignore_elements.
 use HTML::Template  2.6  qw();
 use HTTP::Request        qw();
+use HTTP::Headers::Auth  qw(); # Needs to be imported after other HTTP::*.
 use IO::File             qw();
 use IPC::Open3           qw(open3);
 use LWP::UserAgent  1.90 qw(); # Need 1.90 for protocols_(allowed|forbidden)
@@ -77,6 +78,7 @@
 use constant T_WARN  =>  4; # 0000 0100
 use constant T_ERROR =>  8; # 0000 1000
 use constant T_FATAL => 16; # 0001 0000
+use constant T_FALL  => 32; # 0010 0000, Fallback in effect.
 
 #
 # Output flags for error processing
@@ -316,6 +318,7 @@
   # If ";debug" was given, let it overrule the value from the config file,
   # regardless of whether it's "0" or "1" (on or off).
   $DEBUG = $q->param('debug') if defined $q->param('debug');
+  $File->{Opt}->{Verbose} = TRUE if $DEBUG;
 
   &abort_if_error_flagged($File, O_NONE); # Too early to &print_table.
 
@@ -401,29 +404,37 @@
   $File->{Charset}->{Use} = $File->{Charset}->{META};
 }
 
+#
+# Handle any Fallback or Override for the charset.
 if (&conflict($File->{Opt}->{Charset}, '(detect automatically)')) {
+  # charset=foo was given to the CGI and it wasn't "autodetect".
+
+  #
+  # Extract the user-requested charset from CGI param.
   my ($override, undef) = split(/\s/, $File->{Opt}->{Charset}, 2);
   $File->{Charset}->{Override} = lc($override);
-  unless ($File->{Charset}->{Use} and $File->{Opt}->{FB}->{Charset}) {
-    if ($File->{Opt}->{FB}->{Charset} and not $File->{Charset}->{Use}) {
+
+  if ($File->{Opt}->{FB}->{Charset}) {
+    unless ($File->{Charset}->{Use}) {
       &add_warning($File, 'fallback', 'No Character Encoding Found!', <<".EOF."); # Warn about fallback...
   Falling back to "$File->{Charset}->{Override}"
   (<a href="docs/users.html#fbc">explain...</a>).
 .EOF.
       $File->{Tentative} |= T_ERROR; # Tag it as Invalid.
-    } else {
-      # Warn about Override...
-      unless ($File->{Charset}->{Override} eq $File->{Charset}->{Use}) {
-        my $cs_use = &ent($File->{Charset}->{Use});
-        my $cs_opt = &ent($File->{Charset}->{Override});
-        &add_warning($File, 'override', 'Character Encoding Override in effect!', <<".EOF.");
+      $File->{Charset}->{Use} = $File->{Charset}->{Override};
+    }
+  } else {
+    # Warn about Override unless it's the same as the real charset...
+    unless ($File->{Charset}->{Override} eq $File->{Charset}->{Use}) {
+      my $cs_use = &ent($File->{Charset}->{Use});
+      my $cs_opt = &ent($File->{Charset}->{Override});
+      &add_warning($File, 'override', 'Character Encoding Override in effect!', <<".EOF.");
       The detected character encoding "<code>$cs_use</code>"
       has been suppressed and "<code>$cs_opt</code>" used instead.
 .EOF.
-        $File->{Tentative} |= T_ERROR;
-      }
+      $File->{Tentative} |= T_ERROR;
+      $File->{Charset}->{Use} = $File->{Charset}->{Override};
     }
-    $File->{Charset}->{Use} = $File->{Charset}->{Override};
   }
 }
 
@@ -523,7 +534,7 @@
 
 #
 # Abort if an error was flagged during transcoding
-&abort_if_error_flagged($File, O_SOURCE);
+&abort_if_error_flagged($File, O_SOURCE|O_CHARSET);
 
 
 
@@ -719,8 +730,8 @@
   }
   undef $spout;
 
-  if ($File->{ESIS}->[-1] =~ /^C$/) {
-    undef $File->{ESIS}->[-1];
+  if (@{$File->{ESIS}} && $File->{ESIS}->[-1] =~ /^C$/) {
+    pop(@{$File->{ESIS}});
     $File->{'Is Valid'} = TRUE;
   } else {
     $File->{'Is Valid'} = FALSE;
@@ -735,6 +746,18 @@
   for (@{$File->{ESIS}}) {
     no warnings 'uninitialized';
     next unless /^AVERSION CDATA (.*)/;
+    if ($1 eq '-//W3C//DTD HTML Fallback//EN') {
+      $File->{Tentative} |= (T_ERROR | T_FALL);
+      &add_warning($File, 'fallback', 'DOCTYPE Fallback in effect!', <<".EOF.");
+      The DOCTYPE Declaration in your document was not recognized. This
+      probably means that the Formal Public Identifier contains a spelling
+      error, or that the Declaration is not using correct syntax. Validation
+      has been performed using a default "fallback" Document Type Definition
+      that closely resembles HTML 4.01 Transitional, but the document will not
+      be Valid until you have corrected the problem with the DOCTYPE
+      Declaration.
+.EOF.
+    }
     $File->{Version} = $1;
     last;
   }
@@ -976,29 +999,43 @@
 
 #
 # Proxy authentication requests.
+# Note: expects the third argument to be a hash ref (see HTTP::Headers::Auth).
 sub authenticate {
   my $File       = shift;
   my $resource   = shift;
-  my $authHeader = shift;
+  my $authHeader = shift || {};
+
   my $realm = $resource;
   $realm =~ s([^\w\d.-]*){}g;
-  $authHeader =~ s( realm=([\'\"])?([^\1]+)\1){ realm="$realm-$2"};
+  $resource = &ent($resource);
 
-    print <<"EOF";
+  for my $scheme (keys(%$authHeader)) {
+    my $origrealm = $authHeader->{$scheme}->{realm};
+    if (!defined($origrealm) || lc($scheme) !~ /^(?:basic|digest)$/) {
+      delete($authHeader->{$scheme});
+      next;
+    }
+    $authHeader->{$scheme}->{realm} = "$realm-$origrealm";
+  }
+
+  my $headers = HTTP::Headers->new(Connection => 'close');
+  $headers->content_type('text/html; charset=utf-8');
+  $headers->www_authenticate(%$authHeader);
+  $headers = $headers->as_string();
+
+  print <<"EOF";
 Status: 401 Authorization Required
-WWW-Authenticate: $authHeader
-Connection: close
-Content-Type: text/html; charset=utf-8
+$headers
 
 <!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
   "http://www.w3.org/TR/1999/REC-html401-19991224/loose.dtd">
-<html lang="en" xml:lang="en">
+<html lang="en">
   <head><title>401 Authorization Required</title></head>
   <body>
     <h1>Authorization Required</h1>
-    <p>Sorry, I am not authorized to access the specified URI.</p>
+    <p>Sorry, I am not authorized to access the specified URL.</p>
     <p>
-      The URI you specified, &lt;<a href="$resource">$resource</a>&gt;,
+      The URL you specified, &lt;<a href="$resource">$resource</a>&gt;,
       returned a 401 "authorization required" response when I tried
       to download it.
     </p>
@@ -1062,14 +1099,14 @@
   </blockquote>
 
   <p>
-    Please make sure you have entered the URI correctly.
+    Please make sure you have entered the URL correctly.
   </p>
 
 EOF
 }
 
 #
-# Fetch an URI and return the content and selected meta-info.
+# Fetch an URL and return the content and selected meta-info.
 sub handle_uri {
   my $q    = shift; # The CGI object.
   my $File = shift; # The master datastructure.
@@ -1117,7 +1154,8 @@
 
   unless ($res->code == 200 || $File->{Opt}->{'No200'}) {
     if ($res->code == 401) {
-      &authenticate($File, $res->request->url, $res->www_authenticate);
+      my %auth = $res->www_authenticate(); # HTTP::Headers::Auth
+      &authenticate($File, $res->request->url, \%auth);
     } else {
       $File->{'Error Flagged'} = TRUE;
       $File->{'Error Message'} = &http_error($uri->as_string, $res->code, $res->message);
@@ -1378,8 +1416,11 @@
 
   my $declaration = sub {
     $seen = TRUE;
+
     # No Override if Fallback was requested.
-    unless ($File->{Opt}->{FB}->{DOCTYPE}) {
+    if ($File->{Opt}->{FB}->{DOCTYPE}) {
+      $HTML .= $_[0]; # Stash it as is...
+    } else { # Comment it out and insert the new one...
       $HTML .= "$dtd\n" . '<!-- ' . $_[0] . ' -->';
       $org_dtd = &ent($_[0]);
     }
@@ -1450,11 +1491,17 @@
     $err->{src}  = $errors[1];
     $err->{line} = $errors[2];
     $err->{char} = $errors[3];
+    # Workaround for onsgmls 1.5 sometimes reporting errors beyond EOL.
+    if ((my $l = length($File->{Content}->[$err->{line}-1])) < $err->{char}) {
+      $err->{char} = $l;
+    }
     $err->{num}  = $errors[4] || '';
     $err->{type} = $errors[5] || '';
     if ($err->{type} eq 'E' or $err->{type} eq 'X' or $err->{type} eq 'Q') {
       $err->{msg}  = join ':', @errors[6 .. $#errors];
     } elsif ($err->{type} eq 'W') {
+      &add_warning($File, 'fake', 'Warning:',
+        "Line $err->{line}, column $err->{char}: " . &ent($errors[6]));
       $err->{msg}  = join ':', @errors[6 .. $#errors];
     } else {
       $err->{type} = 'I';
@@ -1544,7 +1591,7 @@
 
       #DEBUG: Print misc. vars relevant to source display.
       if ($DEBUG) {
-        $line .= "<br/> <strong>org length: $orglength - adj length: $adjlength - org col: $orgcol - adj col: $adjcol</strong>";
+        $line .= "<br /> <strong>org length: $orglength - adj length: $adjlength - org col: $orgcol - adj col: $adjcol</strong>";
       }
       #DEBUG;
 
@@ -1696,7 +1743,7 @@
     $heading =~ s/\\012/ /g;
     $heading =~ s/\\n/ /g;
     $heading =~ s/\s+/ /g;
-    $heading = substr($heading, 1); # chop the leading '-' or ' '.
+    $heading =~ s/^[- ]//;
     $heading = &ent($heading);
     $outline .= "    <li>$heading</li>\n";
   }
@@ -1722,8 +1769,11 @@
   $File->{Content}->[0] =
     substr $File->{Content}->[0], ($File->{BOM} ? 3 : 0); # remove BOM
 
+  my $line    = 1;
+  my $maxhlen = length scalar @{$File->{Content}};
   for (@{$File->{Content}}) {
-    push @source, {file_source_i => $line, file_source_line => ent $_};
+    my $hline = (' ' x ($maxhlen - length("$line"))) . $line;
+    push @source, {file_source_i => $line, file_source_line => ent $hline};
     $line++;
   }
   return \@source;
@@ -1743,11 +1793,8 @@
   my $prevdata = '';
 
   foreach my $line (@{$File->{ESIS}}) {
-    if ($File->{Opt}->{'No Attributes'}) { # don't show attributes
-      next if $line =~ /^A/;
-      next if $line =~ /^\(A$/;
-      next if $line =~ /^\)A$/;
-    }
+
+    next if ($File->{Opt}->{'No Attributes'} && $line =~ /^A/);
 
     $line =~ s/\\n/ /g;
     $line =~ s/\\011/ /g;
@@ -1773,15 +1820,19 @@
 
     my $printme;
     chomp($printme = $line);
-    $printme =~ s{^([()])(.*)} # reformat and add links on HTML elements
-                 { my $close = '';
-                      $close = "/" if $1 eq ")"; # ")" -> close-tag
-                   "&lt;" . $close . "<a href=\"" .
-                   $CFG->{Element_Ref_URI} . $CFG->{Element_Map}->{lc($2)} .
-                   "\">$2<\/a>&gt;"
-                 }egx;
-    $printme =~ s,^A,  A,; # indent attributes a bit
+    if (my ($close, $elem) = $printme =~ /^([()])(.+)/) {
+      # reformat and add links on HTML elements
+      $close = ($close eq ')') ? '/' : ''; # ")" -> close-tag
+      if (my $u = $CFG->{'Element Map'}->{lc($elem)}) {
+        $elem = '<a href="' . $CFG->{'Element Ref URI'} . "$u\">$elem</a>";
+      }
+      $printme = "&lt;$close$elem&gt;";
+    } else {
+      $printme =~ s,^A,  A,; # indent attributes a bit
+    }
+
     $tree .= ' ' x $indent . $printme . "\n";
+
     if ($line =~ /^\(/) {
       $indent += 2;
     }
@@ -1895,7 +1946,7 @@
     }
   }
 
-  # Futz the URI so "/referer" works.
+  # Futz the URL so "/referer" works.
   if ($q->path_info) {
     if ($q->path_info eq '/referer' or $q->path_info eq '/referrer') {
       if ($q->referer) {
@@ -1917,7 +1968,7 @@
     $q->param('uri', $q->param('url'));
   }
 
-  # Munge the URI to include commonly omitted prefix.
+  # Munge the URL to include commonly omitted prefix.
   my $u = $q->param('uri');
   $q->param('uri', "http://$u") if $u && $u =~ m(^www)i;
 
@@ -1934,6 +1985,7 @@
       $File->{'Error Flagged'} = TRUE;
       $File->{'Error Message'} = <<".EOF.";
       <div class="error">
+        <a id="skip" name="skip"></a>
         <h2><strong>No Referer header found!</strong></h2>
         <p>
           You have requested we check the referring page, but your browser did
@@ -1948,20 +2000,20 @@
           Please use the form interface on the
           <a href="$CFG->{'Home Page'}">Validator Home Page</a> (or the
           <a href="detailed.html">Extended Interface</a>) to check the
-          page by URI.
+          page by URL.
         </p>
       </div>
 .EOF.
     }
   }
 
-  # Supersede URI with an uploaded file.
+  # Supersede URL with an uploaded file.
   if ($q->param('uploaded_file')) {
     $q->param('uri', 'upload://' . $q->param('uploaded_file'));
     $File->{'Is Upload'} = TRUE; # Tag it for later use.
   }
 
-  # Supersede URI with an uploaded fragment.
+  # Supersede URL with an uploaded fragment.
   if ($q->param('fragment')) {
     $q->param('uri', 'upload://Form Submission');
     $File->{'Is Upload'} = TRUE; # Tag it for later use.
@@ -2018,6 +2070,7 @@
   $msg = 'of ' . $msg if ($ip && $host ne $ip);
   return sprintf(<<".EOF.", &ent($msg));
     <div class="error">
+      <a id="skip" name="skip"></a>
       <p>
         Sorry, the IP address %s is not public.
         For security reasons, validating resources located at non-public IP
@@ -2029,26 +2082,27 @@
 
 
 #
-# Output errors for a rejected URI.
+# Output errors for a rejected URL.
 sub uri_rejected {
   my $scheme = shift || 'undefined';
 
   return sprintf(<<".EOF.", &ent($scheme));
     <div class="error">
+      <a id="skip" name="skip"></a>
       <p>
         Sorry, this type of
-        <a href="http://www.w3.org/Addressing/#terms">URI</a>
+        <a href="http://www.w3.org/Addressing/">URL</a>
         <a href="http://www.iana.org/assignments/uri-schemes">scheme</a>
         (<q>%s</q>) is not supported by this service. Please check
-        that you entered the URI correctly.
+        that you entered the URL correctly.
       </p>
-      <p>URIs should be in the form: <code>http://validator.w3.org/</code></p>
+      <p>URLs should be in the form: <code>http://validator.w3.org/</code></p>
       <p>
-        If you entered a valid URI using a scheme that we should support,
+        If you entered a valid URL using a scheme that we should support,
         please let us know as outlined on our
         <a href="feedback.html">Feedback page</a>. Make sure to include the
-        specific URI you would like us to support, and if possible provide a
-        reference to the relevant standards document describing the URI scheme
+        specific URL you would like us to support, and if possible provide a
+        reference to the relevant standards document describing the URL scheme
         in question.
       </p>
       <p class="tip">
@@ -2100,18 +2154,6 @@
   my $cs_meta = $File->{Charset}->{META} ? &ent($File->{Charset}->{META}) : '';
 
   #
-  # warn about charset override
-  if ($File->{Charset}->{Override} &&
-      $File->{Charset}->{Override} ne $File->{Charset}->{Use}) {
-    &add_warning($File, 'override', 'Character Encoding Override in effect!', <<".EOF.");
-      The detected character encoding, "<code>$cs_use</code>", has been
-      suppressed and the character encoding "<code>$cs_opt</code>" used
-      instead.
-.EOF.
-    $File->{Tentative} |= T_ERROR;
-  }
-
-  #
   # Add a warning if there was charset info conflict (HTTP header,
   # XML declaration, or <meta> element).
   if (&conflict($File->{Charset}->{HTTP}, $File->{Charset}->{XML})) {
@@ -2191,14 +2233,15 @@
     my $in = $_;
     $line++;
     $_ = $c->convert($_); # $_ is local!!
-    if ($in ne "" and $_ eq "") {
+    if ($in ne "" and (!defined($_) || $_ eq "")) {
       push @{$File->{Lines}}, $line;
       # try to decoded as much as possible of the line
       my $short = 0;               # longest okay
       my $long = (length $in) - 1; # longest unknown
       while ($long > $short) { # binary search
         my $try = int (($long+$short+1) / 2);
-        if ($c->convert(substr($in,0,$try)) eq "") {
+        my $converted = $c->convert(substr($in, 0, $try));
+        if (!defined($converted) || $converted eq "") {
           $long  = $try-1;
         } else {
           $short = $try;

Received on Wednesday, 21 July 2004 11:08:23 UTC