- From: Terje Bless via cvs-syncmail <cvsmail@w3.org>
- Date: Mon, 15 Aug 2005 22:47:53 +0000
- To: www-validator-cvs@w3.org
Update of /sources/public/validator/httpd/cgi-bin In directory hutz:/tmp/cvs-serv25103/httpd/cgi-bin Modified Files: Tag: validator-0_7-branch check Log Message: Merging changes between validator-0_7_0-release and current HEAD at tag validator-0_7-branchpoint. Index: check =================================================================== RCS file: /sources/public/validator/httpd/cgi-bin/check,v retrieving revision 1.432 retrieving revision 1.432.2.1 diff -u -d -r1.432 -r1.432.2.1 --- check 29 Jul 2005 06:26:15 -0000 1.432 +++ check 15 Aug 2005 22:47:51 -0000 1.432.2.1 @@ -57,7 +57,6 @@ use URI qw(); use URI::Escape qw(uri_escape); - ############################################################################### #### Constant definitions. #################################################### ############################################################################### @@ -96,7 +95,6 @@ # Define global variables. use vars qw($DEBUG $CFG $RSRC $VERSION); - # # Things inside BEGIN don't happen on every request in persistent environments # (such as mod_perl); so let's do the globals, eg. read config, here. @@ -142,7 +140,7 @@ # # Check a filesystem path for existance and "readability". sub pathcheck (@) { - my %paths = map {$_ => [-d $_, -r _]} @_; + my %paths = map { $_ => [-d $_, -r _] } @_; my @_d = grep {not $paths{$_}->[0]} keys %paths; my @_r = grep {not $paths{$_}->[1]} keys %paths; return TRUE if (scalar(@_d) + scalar(@_r) == 0); @@ -169,7 +167,7 @@ # # Split allowed protocols into a list. if (my $allowed = delete($CFG->{Protocols}->{Allow})) { - $CFG->{Protocols}->{Allow} = [ split(/\s*,\s*/, $allowed) ]; + $CFG->{Protocols}->{Allow} = [split(/\s*,\s*/, $allowed)]; } # @@ -180,7 +178,7 @@ { # Make types config indexed by FPI. my $_types = {}; - map {$_types->{$CFG->{Types}->{$_}->{PubID}} = $CFG->{Types}->{$_}} + map { $_types->{$CFG->{Types}->{$_}->{PubID}} = $CFG->{Types}->{$_} } keys %{$CFG->{Types}}; $CFG->{Types} = $_types; } @@ -188,10 +186,10 @@ # # Change strings to internal constants in MIME type mapping. for (keys %{$CFG->{MIME}}) { - if ($CFG->{MIME}->{$_} eq 'SGML') {$CFG->{MIME}->{$_} = MODE_SGML} - elsif ($CFG->{MIME}->{$_} eq 'XML') {$CFG->{MIME}->{$_} = MODE_XML} - elsif ($CFG->{MIME}->{$_} eq 'TBD') {$CFG->{MIME}->{$_} = MODE_TBD} - else {$CFG->{MIME}->{$_} = MODE_TBD}; + if ($CFG->{MIME}->{$_} eq 'SGML') { $CFG->{MIME}->{$_} = MODE_SGML } + elsif ($CFG->{MIME}->{$_} eq 'XML') { $CFG->{MIME}->{$_} = MODE_XML } + elsif ($CFG->{MIME}->{$_} eq 'TBD') { $CFG->{MIME}->{$_} = MODE_TBD } + else { $CFG->{MIME}->{$_} = MODE_TBD } } # @@ -204,8 +202,8 @@ # # Strings - $VERSION = q$Revision$; - $VERSION =~ s/Revision: ([\d\.]+) /$1/; + $VERSION = q$Revision$; + $VERSION =~ s/Revision: ([\d\.]+) /$1/; # # Use passive FTP by default. @@ -216,7 +214,6 @@ # Get rid of (possibly insecure) $PATH. delete $ENV{PATH}; - #@@DEBUG: Dump $CFG datastructure. Used only as a developer aid. #use Data::Dumper qw(Dumper); #print Dumper($CFG); @@ -235,7 +232,6 @@ # The data structure that will hold all session data. my $File; - ############################################## # Populate $File->{Env} -- Session Metadata. # ############################################## @@ -244,7 +240,6 @@ # The URL to this CGI Script. $File->{Env}->{'Self URI'} = $q->url(-query => 0); - ################################# # Initialize the datastructure. # ################################# @@ -272,7 +267,6 @@ $File->{Warnings} = []; # Warnings... $File->{Namespaces} = []; # Other (non-root) Namespaces. - ############################################################################### #### Generate Template for Result. ############################################ ############################################################################### @@ -300,14 +294,12 @@ $File->{E} = $E; $File->{H} = $H; - # Read friendly error message file my $error_messages_list = File::Spec->catfile($CFG->{Paths}->{Templates}, $lang, 'error_messages.cfg'); my %config_opts = (-ConfigFile => $error_messages_list); my %rsrc = Config::General->new(%config_opts)->getall(); $RSRC = \%rsrc; - $T->param(cfg_home_page => $CFG->{'Home Page'}); undef $lang; @@ -331,7 +323,6 @@ $File->{Opt}->{'Verbose'} = $q->param('verbose') ? TRUE : FALSE; $File->{Opt}->{'Debug'} = $q->param('debug') ? TRUE : FALSE; $File->{Opt}->{'No200'} = $q->param('No200') ? TRUE : FALSE; -# $File->{Opt}->{'Fussy'} = $q->param('fussy') ? TRUE : FALSE; $File->{Opt}->{'Charset'} = $q->param('charset') ? lc $q->param('charset'): ''; $File->{Opt}->{'DOCTYPE'} = $q->param('doctype') ? $q->param('doctype') : ''; $File->{Opt}->{'Output'} = $q->param('output') ? $q->param('output') : 'html'; @@ -381,7 +372,6 @@ # by Apache::Registry's idiotic interference under mod_perl. untie *STDIN; - ############################################################################### #### Output validation results. ############################################### ############################################################################### @@ -415,8 +405,6 @@ $File->{Charset}->{Use} = 'utf-8'; # UTF-8 (image/svg+xml etc.) } - - $File->{Content} = &normalize_newlines($File->{Bytes}, exact_charset($File, $File->{Charset}->{Use})); @@ -451,9 +439,9 @@ $File->{Tentative} |= T_ERROR; $File->{Charset}->{Use} = $File->{Charset}->{Override}; } - else { #actually overriding something + else { #actually overriding something # Warn about Override unless it's the same as the real charset... - + unless ($File->{Charset}->{Override} eq $File->{Charset}->{Use}) { &add_warning('W03', { W03_use => $File->{Charset}->{Use}, @@ -462,11 +450,8 @@ $File->{Tentative} |= T_ERROR; $File->{Charset}->{Use} = $File->{Charset}->{Override}; - } + } } - - - } } @@ -476,12 +461,10 @@ $File->{Charset}->{Use} = 'utf-8'; } - # # Abort if an error was flagged while finding the encoding. &abort_if_error_flagged($File, O_CHARSET|O_DOCTYPE); - # # Check the detected Encoding and transcode. if (&conflict($File->{Charset}->{Use}, 'utf-8')) { @@ -489,7 +472,6 @@ &abort_if_error_flagged($File, O_CHARSET); } - $File = &check_utf8($File); # always check $File = &byte_error($File); @@ -497,8 +479,6 @@ # Abort if an error was flagged during transcoding &abort_if_error_flagged($File, O_SOURCE|O_CHARSET); - - # # Overall parsing algorithm for documents returned as text/html: # @@ -530,13 +510,13 @@ my $cfg = $CFG->{Types}->{$fpi}; my $mode = $cfg->{'Parse Mode'}; - if ($mode eq 'SGML') {$mode = MODE_SGML} - elsif ($mode eq 'XML') {$mode = MODE_XML} - else {$mode = MODE_TBD} + if ($mode eq 'SGML') { $mode = MODE_SGML } + elsif ($mode eq 'XML') { $mode = MODE_XML } + else { $mode = MODE_TBD } if ($File->{Mode} == MODE_TBD) { - if ($mode == MODE_SGML) {$File->{Mode} = MODE_SGML} - elsif ($mode == MODE_XML) {$File->{Mode} = MODE_XML} + if ($mode == MODE_SGML) { $File->{Mode} = MODE_SGML } + elsif ($mode == MODE_XML) { $File->{Mode} = MODE_XML } else { $File->{Mode} = MODE_SGML; &add_warning('W06', {}); @@ -546,9 +526,9 @@ my $dtd = $mode; my $ct = $File->{Mode}; for ($dtd, $ct) { - if ($_ == MODE_SGML) {$_ = 'SGML'} - elsif ($_ == MODE_XML) {$_ = 'XML'} - else {$_ = 'SGML'}; + if ($_ == MODE_SGML) { $_ = 'SGML' } + elsif ($_ == MODE_XML) { $_ = 'XML' } + else { $_ = 'SGML' } } unless ($File->{Mode} == MODE_TBD) { &add_warning('W07', { @@ -565,7 +545,6 @@ } } - # # Sanity check Charset information and add any warnings necessary. $File = &charset_conflicts($File); @@ -595,17 +574,8 @@ if (&is_xml($File)) { $catalog = File::Spec->catfile($CFG->{Paths}->{SGML}->{Library}, 'xml.soc'); push(@spopt, '-wxml'); - } else { # Only add these in SGML mode. -# if ($File->{Opt}->{'Fussy'}) { -# push @spopt, '-wmin-tag'; -# push @spopt, '-wfully-tagged'; -# push @spopt, '-wrefc'; -# push @spopt, '-wmissing-att-name'; -# push @spopt, '-wdata-delim'; -# } } - - + # # Defaults for SP; turn off fixed charset mode and set encoding to UTF-8. $ENV{SP_CHARSET_FIXED} = 'NO'; @@ -620,7 +590,6 @@ # Set the command to execute. my @cmd = ($CFG->{Paths}->{SGML}->{Parser}, '-n', '-c', $catalog, @spopt); - # # Set debug info for HTML report. $T->param(opt_debug => $DEBUG); @@ -673,7 +642,7 @@ # Run it through SP, redirecting output to temporary files. my $pid = do { no warnings 'once'; - local(*SPIN, *SPOUT, *SPERR) = ($spin, $spout, $sperr); + local (*SPIN, *SPOUT, *SPERR) = ($spin, $spout, $sperr); open3("<&SPIN", ">&SPOUT", ">&SPERR", @cmd); }; undef $spin; @@ -708,7 +677,7 @@ } } - next if / IMPLIED$/ && not $DEBUG;; + next if / IMPLIED$/ && not $DEBUG; next if /^ASDAFORM CDATA /; next if /^ASDAPREF CDATA /; chomp; # Removes trailing newlines @@ -758,7 +727,6 @@ $File->{Version} = $prettyver; } - # # Warn about unknown, incorrect, or missing Namespaces. if ($File->{Namespace}) { @@ -790,11 +758,8 @@ } else { &prep_template($File, $T); - - - if (! $File->{Doctype} and ($File->{Version} eq 'unknown' or $File->{Version} eq 'SGML' or (!$File->{Version}))) { - # @@TODO@@ we should try falling back on other version info, such as the ones stored in Version_ESIS + # @@TODO@@ we should try falling back on other version info, such as the ones stored in Version_ESIS $T->param(file_version => '(no Doctype found)'); } else { @@ -844,7 +809,6 @@ undef $File; exit; - ############################################################################# # Subroutine definitions ############################################################################# @@ -891,7 +855,7 @@ # # Tip of the Day... - my $tip = &get_tip; + my $tip = &get_tip(); $T->param(tip_uri => $tip->[0]); $T->param(tip_slug => $tip->[1]); @@ -908,17 +872,16 @@ } my @nss = map({uri => $_}, @{$File->{Namespaces}}); $T->param(file_namespaces => \@nss) if @nss; - + if ($File->{Opt}->{DOCTYPE}) { my $over_doctype_param = "override doctype $File->{Opt}->{DOCTYPE}"; - $T->param($over_doctype_param => TRUE); + $T->param($over_doctype_param => TRUE); } - + if ($File->{Opt}->{Charset}) { my $over_charset_param = "override charset $File->{Opt}->{Charset}"; - $T->param($over_charset_param => TRUE); + $T->param($over_charset_param => TRUE); } - } # @@ -951,7 +914,6 @@ $T->param(file_thispage => $thispage); } - # # Add a waring message to the output. sub add_warning ($$) { @@ -962,7 +924,6 @@ $File->{T}->param(have_warnings => TRUE); } - # # Proxy authentication requests. # Note: expects the third argument to be a hash ref (see HTTP::Headers::Auth). @@ -996,7 +957,6 @@ exit; # Further interaction will be a new HTTP request. } - # # Fetch an URL and return the content and selected meta-info. sub handle_uri { @@ -1015,12 +975,12 @@ unless ($ua->is_protocol_supported($uri)) { $File->{'Error Flagged'} = TRUE; - if (($uri->canonical() eq "1") ) + if (($uri->canonical() eq "1") ) #if uri param is empty (also for empty direct or upload), it's been set to TRUE in sub prepCGI() { - $File->{E}->param(fatal_no_content => TRUE); + $File->{E}->param(fatal_no_content => TRUE); } - else { + else { $File->{E}->param(fatal_uri_error => TRUE); $File->{E}->param(fatal_uri_scheme => $uri->scheme()); } @@ -1061,7 +1021,7 @@ # Enforce Max Recursion level. &check_recursion($File, $res); - my($mode, $ct, $charset) + my ($mode, $ct, $charset) = &parse_content_type( $File, $res->header('Content-Type'), @@ -1089,7 +1049,6 @@ $File->{'Is Upload'} = FALSE; return $File; - } # @@ -1105,7 +1064,7 @@ local $/ = undef; # set line delimiter so that <> reads rest of file $file = <$f>; - my($mode, $ct, $charset) = &parse_content_type($File, $h->{'Content-Type'}); + my ($mode, $ct, $charset) = &parse_content_type($File, $h->{'Content-Type'}); $File->{Bytes} = $file; $File->{Mode} = $mode; @@ -1140,7 +1099,6 @@ return $File; } - # # Parse a Content-Type and parameters. Return document type and charset. sub parse_content_type { @@ -1150,12 +1108,12 @@ my $charset = ''; my $mode = ''; - my($ct, @param) = split /\s*;\s*/, lc $Content_Type; + my ($ct, @param) = split /\s*;\s*/, lc $Content_Type; $mode = $CFG->{MIME}->{$ct} || $ct; foreach my $param (@param) { - my($p, $v) = split /\s*=\s*/, $param; + my ($p, $v) = split /\s*=\s*/, $param; next unless $p =~ m(charset)i; if ($v =~ m/([\'\"]?)(\S+)\1/i) { $charset = lc $2; @@ -1198,8 +1156,6 @@ } } - - # # Normalize newline forms (CRLF/CR/LF) to native newline. sub normalize_newlines { @@ -1237,7 +1193,6 @@ return $exact_charset; } - # # Return $_[0] encoded for HTML entities (cribbed from merlyn). # @@ -1250,7 +1205,6 @@ return $_; } - # # Truncate source lines for report. # @@ -1299,14 +1253,14 @@ return $line, $col; } - # # Suppress any existing DOCTYPE by commenting it out. sub override_doctype { no strict 'vars'; - my $File = shift; - my ($dt) = + my $File = shift; + my ($dt) = grep { $_->{Display} eq $File->{Opt}->{DOCTYPE} } values %{$CFG->{Types}}; + # @@TODO: abort/whine about unrecognized doctype if $dt is undef.; my $pubid = $dt->{PubID}; my $sysid = $dt->{SysID}; @@ -1314,9 +1268,10 @@ local $dtd = qq(<!DOCTYPE $name PUBLIC "$pubid"); $dtd .= qq( "$sysid") if $sysid; # We don't have one for all types. $dtd .= '>'; + local $org_dtd = ''; - local $HTML = ''; - local $seen = FALSE; + local $HTML = ''; + local $seen = FALSE; my $declaration = sub { $seen = TRUE; @@ -1324,7 +1279,8 @@ # No Override if Fallback was requested. if ($File->{Opt}->{FB}->{DOCTYPE}) { $HTML .= $_[0]; # Stash it as is... - } else { # Comment it out and insert the new one... + } else { + # Comment it out and insert the new one... $HTML .= "$dtd\n" . '<!-- ' . $_[0] . ' -->'; $org_dtd = &ent($_[0]); } @@ -1359,7 +1315,6 @@ return $File; } - # # Parse errors reported by SP. sub parse_errors ($$) { @@ -1375,12 +1330,12 @@ push @{$File->{DEBUG}->{Errors}}, $_; chomp; - my($err, @errors); + my ($err, @errors); next if /^<OSFD>0:[0-9]+:[0-9]+:[^A-Z]/; next if /numbers exceeding 65535 not supported/; next if /URL Redirected to/; - my(@_err) = split /:/; + my (@_err) = split /:/; next unless $_err[1] eq '<OSFD>0'; #@@FIXME: This is a polite fiction!; if ($_err[1] =~ m(^<URL>)) { @errors = ($_err[0], join(':', $_err[1], $_err[2]), @_err[3..$#_err]); @@ -1390,6 +1345,7 @@ $err->{src} = $errors[1]; $err->{line} = $errors[2]; $err->{char} = $errors[3]; + # Workaround for onsgmls 1.5 sometimes reporting errors beyond EOL. if ((my $l = length($File->{Content}->[$err->{line}-1])) < $err->{char}) { $err->{char} = $l; @@ -1399,6 +1355,7 @@ if ($err->{type} eq 'E' or $err->{type} eq 'X' or $err->{type} eq 'Q') { $err->{msg} = join ':', @errors[6 .. $#errors]; } elsif ($err->{type} eq 'W') { + #@@FIXME: This is borked after templatification. # &add_warning($File, 'fake', 'Warning:', # "Line $err->{line}, column $err->{char}: " . &ent($errors[6])); @@ -1445,13 +1402,13 @@ if (scalar @{$File->{Errors}}) { foreach my $err (@{$File->{Errors}}) { - my($line, $col) = &truncate_line($File->{Content}->[$err->{line}-1], $err->{char}); + my ($line, $col) = &truncate_line($File->{Content}->[$err->{line}-1], $err->{char}); $line = &mark_error($line, $col); my $explanation; if ($err->{num}) { - my(undef, $num) = split /\./, $err->{num}; + my (undef, $num) = split /\./, $err->{num}; if (exists $Msgs{$num}) { # We've already seen this message... if ($File->{Opt}->{Verbose}) { # ...so only repeat it in Verbose mode. $explanation = qq(\n <div class="hidden mid-$num"></div>\n); @@ -1468,7 +1425,7 @@ { $_msg =~ s/<!--URI-->//g } - else + else { my $escaped_uri = uri_escape($File->{URI}); $_msg =~ s/<!--URI-->/$escaped_uri/g; @@ -1479,7 +1436,7 @@ $err->{src} = $line; $err->{col} = ' ' x $col; $err->{expl} = $explanation; - if ($err->{type} eq 'I') + if ($err->{type} eq 'I') { $err->{class} = 'msg_info'; $err->{err_type_info} = 1; @@ -1490,7 +1447,7 @@ $err->{err_type_err} = 1; $number_of_errors += 1; } - elsif (($err->{type} eq 'W') or ($err->{type} eq 'X') ) + elsif (($err->{type} eq 'W') or ($err->{type} eq 'X') ) { $err->{class} = 'msg_warn'; $err->{err_type_warn} = 1; @@ -1575,7 +1532,6 @@ return $line; } - # # Produce an outline of the document based on Hn elements from the ESIS. sub outline { @@ -1586,7 +1542,7 @@ my $prevlevel = 0; my $level = 0; - for (1 .. $#{$File->{ESIS}}) { + for (1..$#{$File->{ESIS}}) { my $line = $File->{ESIS}->[$_]; next unless ($line && $line =~ /^\(H([1-6])$/i); @@ -1646,7 +1602,6 @@ return $outline; } - # # Create a HTML representation of the document. sub source { @@ -1660,7 +1615,6 @@ return \@source; } - # # Create a HTML Parse Tree of the document for validation report. sub parsetree { @@ -1719,7 +1673,6 @@ return $tree; } - # # Do an initial parse of the Document Entity to extract FPI. sub preparse_doctype { @@ -1732,7 +1685,7 @@ my $dtd = sub { return if $File->{Root}; - ($File->{Root}, $File->{DOCTYPE}) = shift =~ m(<!DOCTYPE\s+(\w+)\s+(?:PUBLIC|SYSTEM)\s+(?:[\'\"])([^\"\']+)(?:[\"\']).*>)si; + ($File->{Root}, $File->{DOCTYPE}) = shift =~ m(<!DOCTYPE\s+(\w+)\s+(?:PUBLIC|SYSTEM)\s+(?:[\'\"])([^\"\']+)(?:[\"\']).*>)si; }; my $start = sub { @@ -1771,11 +1724,10 @@ for (@{shift->{ESIS}}) { s/\\012//g; s/\\n/\n/g; - - $file_esis .= ent $_; - $file_esis .= "\n"; + $file_esis .= ent $_; + $file_esis .= "\n"; } - return $file_esis; + return $file_esis; } # @@ -1788,12 +1740,11 @@ return $file_raw_errors; } - # # Preprocess CGI parameters. sub prepCGI { my $File = shift; - my $q = shift; + my $q = shift; # Avoid CGI.pm's "exists but undef" behaviour. if (scalar $q->param) { @@ -1801,6 +1752,7 @@ next if $param eq 'uploaded_file'; # 'uploaded_file' contains data. next if $param eq 'fragment'; # Ditto 'fragment'. next if $q->param($param) eq '0'; # Keep false-but-set params. + # # Parameters that are given to us without specifying a value get # set to "1" (the "TRUE" constant). This is so we can test for the @@ -1845,6 +1797,7 @@ print redirect &self_url_q($q, $File); exit; } else { + # Redirected from /check/referer to /check?uri=referer because # the browser didn't send a Referer header, or the request was # for /check?uri=referer but no Referer header was found. @@ -1909,7 +1862,6 @@ return $ssi; } - # # Utility sub to tell if mode "is" XML. sub is_xml {shift->{Mode} == MODE_XML}; @@ -1922,7 +1874,7 @@ my $dtd = sub { return if $File->{Root}; - ($File->{Root}, $File->{DOCTYPE}) = shift =~ m(<!DOCTYPE\s+(\w+)\s+PUBLIC\s+(?:[\'\"])([^\"\']+)(?:[\"\']).*>)si; + ($File->{Root}, $File->{DOCTYPE}) = shift =~ m(<!DOCTYPE\s+(\w+)\s+PUBLIC\s+(?:[\'\"])([^\"\']+)(?:[\"\']).*>)si; }; my $start = sub { @@ -1999,7 +1951,6 @@ return $File; } - # # Transcode to UTF-8 sub transcode { @@ -2037,10 +1988,13 @@ $_ = $c->convert($_); # $_ is local!! if ($in ne "" and (!defined($_) || $_ eq "")) { push @{$File->{Lines}}, $line; + # try to decoded as much as possible of the line - my $short = 0; # longest okay - my $long = (length $in) - 1; # longest unknown - while ($long > $short) { # binary search + my $short = 0; # longest okay + my $long = (length $in) - 1; # longest unknown + + while ($long > $short) { + # binary search my $try = int (($long+$short+1) / 2); my $converted = $c->convert(substr($in, 0, $try)); if (!defined($converted) || $converted eq "") { @@ -2108,17 +2062,16 @@ return $File; } - # # Return an XML report for the page. sub report_xml { my $File = shift; my $valid = ($File->{'Is Valid'} ? 'Valid' : 'Invalid'); - my $errs = ($File->{'Is Valid'} ? '0' : scalar @{$File->{Errors}}); + my $errs = ($File->{'Is Valid'} ? '0' : scalar @{$File->{Errors}}); if ($File->{E}->param('fatal_http_error')) { $valid = 'Could not validate'; - } + } print <<".EOF."; Content-Type: application/xml; charset=UTF-8 @@ -2190,13 +2143,13 @@ chomp $err->{msg}; # Find index into the %frag hash for the "explanation..." links. - $err->{idx} = $err->{msg}; + $err->{idx} = $err->{msg}; $err->{idx} =~ s/"[^\"]*"/FOO/g; $err->{idx} =~ s/[^A-Za-z ]//g; - $err->{idx} =~ s/\s+/ /g; # Collapse spaces - $err->{idx} =~ s/(^\s|\s$)//g; # Remove leading and trailing spaces. + $err->{idx} =~ s/\s+/ /g; # Collapse spaces + $err->{idx} =~ s/(^\s|\s$)//g; # Remove leading and trailing spaces. $err->{idx} =~ s/(FOO )+/FOO /g; # Collapse FOOs. - $err->{idx} =~ s/FOO FOO/FOO/g; # Collapse FOOs. + $err->{idx} =~ s/FOO FOO/FOO/g; # Collapse FOOs. my $offset = $File->{Offsets}->[$err->{line} - 1]->[1] + $err->{char}; printf <<".EOF.", &ent($err->{msg}); @@ -2208,18 +2161,16 @@ print qq(</result>\n); } - - # # Return an EARL report for the page. sub report_earl { my $File = shift; my $valid = ($File->{'Is Valid'} ? 'Valid' : 'Invalid'); - my $errs = ($File->{'Is Valid'} ? '0' : scalar @{$File->{Errors}}); + my $errs = ($File->{'Is Valid'} ? '0' : scalar @{$File->{Errors}}); if ($File->{E}->param('fatal_http_error')) { $valid = 'Could not validate'; - } + } print <<".EOF."; Content-Type: application/rdf+xml; charset=UTF-8 @@ -2260,10 +2211,10 @@ $err->{idx} = $err->{msg}; $err->{idx} =~ s/"[^\"]*"/FOO/g; $err->{idx} =~ s/[^A-Za-z ]//g; - $err->{idx} =~ s/\s+/ /g; # Collapse spaces - $err->{idx} =~ s/(^\s|\s\Z)//g; # Remove leading and trailing spaces. + $err->{idx} =~ s/\s+/ /g; # Collapse spaces + $err->{idx} =~ s/(^\s|\s\Z)//g; # Remove leading and trailing spaces. $err->{idx} =~ s/(FOO )+/FOO /g; # Collapse FOOs. - $err->{idx} =~ s/FOO FOO/FOO/g; # Collapse FOOs. + $err->{idx} =~ s/FOO FOO/FOO/g; # Collapse FOOs. my @offsets = ( $File->{Offsets}->[$err->{line} ]->[0], @@ -2305,8 +2256,6 @@ .EOF. } - - # # Return a Notation3 EARL report for the page. # @@ -2318,7 +2267,7 @@ my $errs = ($File->{'Is Valid'} ? '0' : scalar @{$File->{Errors}}); if ($File->{E}->param('fatal_http_error')) { $valid = 'Could not validate'; - } + } print <<".EOF."; Content-Type: text/plain; charset=UTF-8 @@ -2343,10 +2292,10 @@ $err->{idx} = $err->{msg}; $err->{idx} =~ s/"[^\"]*"/FOO/g; $err->{idx} =~ s/[^A-Za-z ]//g; - $err->{idx} =~ s/\s+/ /g; # Collapse spaces - $err->{idx} =~ s/(^\s|\s\Z)//g; # Remove leading and trailing spaces. + $err->{idx} =~ s/\s+/ /g; # Collapse spaces + $err->{idx} =~ s/(^\s|\s\Z)//g; # Remove leading and trailing spaces. $err->{idx} =~ s/(FOO )+/FOO /g; # Collapse FOOs. - $err->{idx} =~ s/FOO FOO/FOO/g; # Collapse FOOs. + $err->{idx} =~ s/FOO FOO/FOO/g; # Collapse FOOs. my @offsets = ( $File->{Offsets}->[$err->{line} ]->[0], @@ -2391,7 +2340,6 @@ print " .\n"; } - # # Autodetection as in Appendix F of the XML 1.0 Recommendation. # <http://www.w3.org/TR/2000/REC-xml-20001006#sec-guessing> @@ -2429,7 +2377,6 @@ # nothing in particular } - # # Find encoding in document according to XML rules # Only meaningful if file contains a BOM, or for well-formed XML! @@ -2439,20 +2386,24 @@ ($File->{Charset}->{Auto}, $File->{BOM}, $CodeUnitSize, $Pattern) = &find_base_encoding($File->{Bytes}); + + # 100 arbitrary, but enough in any case my $someBytes = substr $File->{Bytes}, $File->{BOM}, ($CodeUnitSize * 100); - my $someText = ''; # 100 arbitrary, but enough in any case + my $someText = ''; # translate from guessed encoding to ascii-compatible if ($File->{Charset}->{Auto} eq 'ebcdic') { + # special treatment for EBCDIC, maybe use tr/// # work on this later } elsif (!$Pattern) { $someText = $someBytes; # efficiency shortcut } - else { # generic code for UTF-16/UCS-4 + else { + # generic code for UTF-16/UCS-4 $someBytes =~ /^(($Pattern)*)/s; - $someText = $1; # get initial piece without chars >255 + $someText = $1; # get initial piece without chars >255 $someText =~ s/$Pattern/$1/sg; # select the relevant bytes } @@ -2480,6 +2431,7 @@ print $E->output; exit; } else { + #@@FIXME: This is borked after templatification. # &add_warning($File, 'fatal', 'Fatal Error', <<".EOF."); # A fatal error has occurred while processing the requested document. Processing @@ -2501,19 +2453,18 @@ return $encodingA && $encodingB && ($encodingA ne $encodingB); } - # # Construct a self-referential URL from a CGI.pm $q object. sub self_url_q { my ($q, $File) = @_; my $thispage = $File->{Env}->{'Self URI'}; - $thispage .= '?uri=' . uri_escape($q->param('uri')); - $thispage .= ';ss=1' if $q->param('ss'); - $thispage .= ';sp=1' if $q->param('sp'); - $thispage .= ';noatt=1' if $q->param('noatt'); - $thispage .= ';outline=1' if $q->param('outline'); - $thispage .= ';No200=1' if $q->param('No200'); - $thispage .= ';verbose=1' if $q->param('verbose'); + $thispage .= '?uri=' . uri_escape($q->param('uri')); + $thispage .= ';ss=1' if $q->param('ss'); + $thispage .= ';sp=1' if $q->param('sp'); + $thispage .= ';noatt=1' if $q->param('noatt'); + $thispage .= ';outline=1' if $q->param('outline'); + $thispage .= ';No200=1' if $q->param('No200'); + $thispage .= ';verbose=1' if $q->param('verbose'); if ($q->param('doctype') and not $q->param('doctype') =~ /(Inline|detect)/i) { $thispage .= ';doctype=' . uri_escape($q->param('doctype')); @@ -2528,8 +2479,8 @@ # Return random Tip with it's URL. sub get_tip { my @tipAddrs = keys %{$CFG->{Tips}}; - my $tipAddr = $tipAddrs[rand scalar @tipAddrs]; - my $tipSlug = $CFG->{Tips}->{$tipAddr}; + my $tipAddr = $tipAddrs[rand scalar @tipAddrs]; + my $tipSlug = $CFG->{Tips}->{$tipAddr}; return [$tipAddr, $tipSlug]; } @@ -2552,7 +2503,6 @@ return $thispage; } - ##### package W3C::Validator::UserAgent; @@ -2564,8 +2514,7 @@ use base qw(LWP::UserAgent); -sub new -{ +sub new { my ($proto, $CFG, $File, @rest) = @_; my $class = ref($proto) || $proto; my $self = $class->SUPER::new(@rest); @@ -2574,14 +2523,12 @@ return $self; } -sub redirect_ok -{ +sub redirect_ok { my ($self, $req, $res) = @_; return $self->SUPER::redirect_ok($req, $res) && $self->uri_ok($req->uri()); } -sub uri_ok -{ +sub uri_ok { my ($self, $uri) = @_; return 1 if ($self->{'W3C::Validator::CFG'}->{'Allow Private IPs'} or !$uri->can('host')); @@ -2595,7 +2542,7 @@ } if ($iptype && $iptype ne 'PUBLIC') { my $File = $self->{'W3C::Validator::File'}; - $File->{'Error Flagged'} = 1; + $File->{'Error Flagged'} = 1; $File->{E}->param(fatal_ip_error => 1); $File->{E}->param(fatal_ip_hostname => 1) if $addr and $uri->host() ne $addr;
Received on Monday, 15 August 2005 22:48:02 UTC