- From: Terje Bless via cvs-syncmail <cvsmail@w3.org>
- Date: Mon, 15 Aug 2005 22:47:53 +0000
- To: www-validator-cvs@w3.org
Update of /sources/public/validator/httpd/cgi-bin
In directory hutz:/tmp/cvs-serv25103/httpd/cgi-bin
Modified Files:
Tag: validator-0_7-branch
check
Log Message:
Merging changes between validator-0_7_0-release and current HEAD at tag
validator-0_7-branchpoint.
Index: check
===================================================================
RCS file: /sources/public/validator/httpd/cgi-bin/check,v
retrieving revision 1.432
retrieving revision 1.432.2.1
diff -u -d -r1.432 -r1.432.2.1
--- check 29 Jul 2005 06:26:15 -0000 1.432
+++ check 15 Aug 2005 22:47:51 -0000 1.432.2.1
@@ -57,7 +57,6 @@
use URI qw();
use URI::Escape qw(uri_escape);
-
###############################################################################
#### Constant definitions. ####################################################
###############################################################################
@@ -96,7 +95,6 @@
# Define global variables.
use vars qw($DEBUG $CFG $RSRC $VERSION);
-
#
# Things inside BEGIN don't happen on every request in persistent environments
# (such as mod_perl); so let's do the globals, eg. read config, here.
@@ -142,7 +140,7 @@
#
# Check a filesystem path for existance and "readability".
sub pathcheck (@) {
- my %paths = map {$_ => [-d $_, -r _]} @_;
+ my %paths = map { $_ => [-d $_, -r _] } @_;
my @_d = grep {not $paths{$_}->[0]} keys %paths;
my @_r = grep {not $paths{$_}->[1]} keys %paths;
return TRUE if (scalar(@_d) + scalar(@_r) == 0);
@@ -169,7 +167,7 @@
#
# Split allowed protocols into a list.
if (my $allowed = delete($CFG->{Protocols}->{Allow})) {
- $CFG->{Protocols}->{Allow} = [ split(/\s*,\s*/, $allowed) ];
+ $CFG->{Protocols}->{Allow} = [split(/\s*,\s*/, $allowed)];
}
#
@@ -180,7 +178,7 @@
{ # Make types config indexed by FPI.
my $_types = {};
- map {$_types->{$CFG->{Types}->{$_}->{PubID}} = $CFG->{Types}->{$_}}
+ map { $_types->{$CFG->{Types}->{$_}->{PubID}} = $CFG->{Types}->{$_} }
keys %{$CFG->{Types}};
$CFG->{Types} = $_types;
}
@@ -188,10 +186,10 @@
#
# Change strings to internal constants in MIME type mapping.
for (keys %{$CFG->{MIME}}) {
- if ($CFG->{MIME}->{$_} eq 'SGML') {$CFG->{MIME}->{$_} = MODE_SGML}
- elsif ($CFG->{MIME}->{$_} eq 'XML') {$CFG->{MIME}->{$_} = MODE_XML}
- elsif ($CFG->{MIME}->{$_} eq 'TBD') {$CFG->{MIME}->{$_} = MODE_TBD}
- else {$CFG->{MIME}->{$_} = MODE_TBD};
+ if ($CFG->{MIME}->{$_} eq 'SGML') { $CFG->{MIME}->{$_} = MODE_SGML }
+ elsif ($CFG->{MIME}->{$_} eq 'XML') { $CFG->{MIME}->{$_} = MODE_XML }
+ elsif ($CFG->{MIME}->{$_} eq 'TBD') { $CFG->{MIME}->{$_} = MODE_TBD }
+ else { $CFG->{MIME}->{$_} = MODE_TBD }
}
#
@@ -204,8 +202,8 @@
#
# Strings
- $VERSION = q$Revision$;
- $VERSION =~ s/Revision: ([\d\.]+) /$1/;
+ $VERSION = q$Revision$;
+ $VERSION =~ s/Revision: ([\d\.]+) /$1/;
#
# Use passive FTP by default.
@@ -216,7 +214,6 @@
# Get rid of (possibly insecure) $PATH.
delete $ENV{PATH};
-
#@@DEBUG: Dump $CFG datastructure. Used only as a developer aid.
#use Data::Dumper qw(Dumper);
#print Dumper($CFG);
@@ -235,7 +232,6 @@
# The data structure that will hold all session data.
my $File;
-
##############################################
# Populate $File->{Env} -- Session Metadata. #
##############################################
@@ -244,7 +240,6 @@
# The URL to this CGI Script.
$File->{Env}->{'Self URI'} = $q->url(-query => 0);
-
#################################
# Initialize the datastructure. #
#################################
@@ -272,7 +267,6 @@
$File->{Warnings} = []; # Warnings...
$File->{Namespaces} = []; # Other (non-root) Namespaces.
-
###############################################################################
#### Generate Template for Result. ############################################
###############################################################################
@@ -300,14 +294,12 @@
$File->{E} = $E;
$File->{H} = $H;
-
# Read friendly error message file
my $error_messages_list = File::Spec->catfile($CFG->{Paths}->{Templates}, $lang, 'error_messages.cfg');
my %config_opts = (-ConfigFile => $error_messages_list);
my %rsrc = Config::General->new(%config_opts)->getall();
$RSRC = \%rsrc;
-
$T->param(cfg_home_page => $CFG->{'Home Page'});
undef $lang;
@@ -331,7 +323,6 @@
$File->{Opt}->{'Verbose'} = $q->param('verbose') ? TRUE : FALSE;
$File->{Opt}->{'Debug'} = $q->param('debug') ? TRUE : FALSE;
$File->{Opt}->{'No200'} = $q->param('No200') ? TRUE : FALSE;
-# $File->{Opt}->{'Fussy'} = $q->param('fussy') ? TRUE : FALSE;
$File->{Opt}->{'Charset'} = $q->param('charset') ? lc $q->param('charset'): '';
$File->{Opt}->{'DOCTYPE'} = $q->param('doctype') ? $q->param('doctype') : '';
$File->{Opt}->{'Output'} = $q->param('output') ? $q->param('output') : 'html';
@@ -381,7 +372,6 @@
# by Apache::Registry's idiotic interference under mod_perl.
untie *STDIN;
-
###############################################################################
#### Output validation results. ###############################################
###############################################################################
@@ -415,8 +405,6 @@
$File->{Charset}->{Use} = 'utf-8'; # UTF-8 (image/svg+xml etc.)
}
-
-
$File->{Content} = &normalize_newlines($File->{Bytes},
exact_charset($File, $File->{Charset}->{Use}));
@@ -451,9 +439,9 @@
$File->{Tentative} |= T_ERROR;
$File->{Charset}->{Use} = $File->{Charset}->{Override};
}
- else { #actually overriding something
+ else { #actually overriding something
# Warn about Override unless it's the same as the real charset...
-
+
unless ($File->{Charset}->{Override} eq $File->{Charset}->{Use}) {
&add_warning('W03', {
W03_use => $File->{Charset}->{Use},
@@ -462,11 +450,8 @@
$File->{Tentative} |= T_ERROR;
$File->{Charset}->{Use} = $File->{Charset}->{Override};
- }
+ }
}
-
-
-
}
}
@@ -476,12 +461,10 @@
$File->{Charset}->{Use} = 'utf-8';
}
-
#
# Abort if an error was flagged while finding the encoding.
&abort_if_error_flagged($File, O_CHARSET|O_DOCTYPE);
-
#
# Check the detected Encoding and transcode.
if (&conflict($File->{Charset}->{Use}, 'utf-8')) {
@@ -489,7 +472,6 @@
&abort_if_error_flagged($File, O_CHARSET);
}
-
$File = &check_utf8($File); # always check
$File = &byte_error($File);
@@ -497,8 +479,6 @@
# Abort if an error was flagged during transcoding
&abort_if_error_flagged($File, O_SOURCE|O_CHARSET);
-
-
#
# Overall parsing algorithm for documents returned as text/html:
#
@@ -530,13 +510,13 @@
my $cfg = $CFG->{Types}->{$fpi};
my $mode = $cfg->{'Parse Mode'};
- if ($mode eq 'SGML') {$mode = MODE_SGML}
- elsif ($mode eq 'XML') {$mode = MODE_XML}
- else {$mode = MODE_TBD}
+ if ($mode eq 'SGML') { $mode = MODE_SGML }
+ elsif ($mode eq 'XML') { $mode = MODE_XML }
+ else { $mode = MODE_TBD }
if ($File->{Mode} == MODE_TBD) {
- if ($mode == MODE_SGML) {$File->{Mode} = MODE_SGML}
- elsif ($mode == MODE_XML) {$File->{Mode} = MODE_XML}
+ if ($mode == MODE_SGML) { $File->{Mode} = MODE_SGML }
+ elsif ($mode == MODE_XML) { $File->{Mode} = MODE_XML }
else {
$File->{Mode} = MODE_SGML;
&add_warning('W06', {});
@@ -546,9 +526,9 @@
my $dtd = $mode;
my $ct = $File->{Mode};
for ($dtd, $ct) {
- if ($_ == MODE_SGML) {$_ = 'SGML'}
- elsif ($_ == MODE_XML) {$_ = 'XML'}
- else {$_ = 'SGML'};
+ if ($_ == MODE_SGML) { $_ = 'SGML' }
+ elsif ($_ == MODE_XML) { $_ = 'XML' }
+ else { $_ = 'SGML' }
}
unless ($File->{Mode} == MODE_TBD) {
&add_warning('W07', {
@@ -565,7 +545,6 @@
}
}
-
#
# Sanity check Charset information and add any warnings necessary.
$File = &charset_conflicts($File);
@@ -595,17 +574,8 @@
if (&is_xml($File)) {
$catalog = File::Spec->catfile($CFG->{Paths}->{SGML}->{Library}, 'xml.soc');
push(@spopt, '-wxml');
- } else { # Only add these in SGML mode.
-# if ($File->{Opt}->{'Fussy'}) {
-# push @spopt, '-wmin-tag';
-# push @spopt, '-wfully-tagged';
-# push @spopt, '-wrefc';
-# push @spopt, '-wmissing-att-name';
-# push @spopt, '-wdata-delim';
-# }
}
-
-
+
#
# Defaults for SP; turn off fixed charset mode and set encoding to UTF-8.
$ENV{SP_CHARSET_FIXED} = 'NO';
@@ -620,7 +590,6 @@
# Set the command to execute.
my @cmd = ($CFG->{Paths}->{SGML}->{Parser}, '-n', '-c', $catalog, @spopt);
-
#
# Set debug info for HTML report.
$T->param(opt_debug => $DEBUG);
@@ -673,7 +642,7 @@
# Run it through SP, redirecting output to temporary files.
my $pid = do {
no warnings 'once';
- local(*SPIN, *SPOUT, *SPERR) = ($spin, $spout, $sperr);
+ local (*SPIN, *SPOUT, *SPERR) = ($spin, $spout, $sperr);
open3("<&SPIN", ">&SPOUT", ">&SPERR", @cmd);
};
undef $spin;
@@ -708,7 +677,7 @@
}
}
- next if / IMPLIED$/ && not $DEBUG;;
+ next if / IMPLIED$/ && not $DEBUG;
next if /^ASDAFORM CDATA /;
next if /^ASDAPREF CDATA /;
chomp; # Removes trailing newlines
@@ -758,7 +727,6 @@
$File->{Version} = $prettyver;
}
-
#
# Warn about unknown, incorrect, or missing Namespaces.
if ($File->{Namespace}) {
@@ -790,11 +758,8 @@
} else {
&prep_template($File, $T);
-
-
-
if (! $File->{Doctype} and ($File->{Version} eq 'unknown' or $File->{Version} eq 'SGML' or (!$File->{Version}))) {
- # @@TODO@@ we should try falling back on other version info, such as the ones stored in Version_ESIS
+ # @@TODO@@ we should try falling back on other version info, such as the ones stored in Version_ESIS
$T->param(file_version => '(no Doctype found)');
}
else {
@@ -844,7 +809,6 @@
undef $File;
exit;
-
#############################################################################
# Subroutine definitions
#############################################################################
@@ -891,7 +855,7 @@
#
# Tip of the Day...
- my $tip = &get_tip;
+ my $tip = &get_tip();
$T->param(tip_uri => $tip->[0]);
$T->param(tip_slug => $tip->[1]);
@@ -908,17 +872,16 @@
}
my @nss = map({uri => $_}, @{$File->{Namespaces}});
$T->param(file_namespaces => \@nss) if @nss;
-
+
if ($File->{Opt}->{DOCTYPE}) {
my $over_doctype_param = "override doctype $File->{Opt}->{DOCTYPE}";
- $T->param($over_doctype_param => TRUE);
+ $T->param($over_doctype_param => TRUE);
}
-
+
if ($File->{Opt}->{Charset}) {
my $over_charset_param = "override charset $File->{Opt}->{Charset}";
- $T->param($over_charset_param => TRUE);
+ $T->param($over_charset_param => TRUE);
}
-
}
#
@@ -951,7 +914,6 @@
$T->param(file_thispage => $thispage);
}
-
#
# Add a waring message to the output.
sub add_warning ($$) {
@@ -962,7 +924,6 @@
$File->{T}->param(have_warnings => TRUE);
}
-
#
# Proxy authentication requests.
# Note: expects the third argument to be a hash ref (see HTTP::Headers::Auth).
@@ -996,7 +957,6 @@
exit; # Further interaction will be a new HTTP request.
}
-
#
# Fetch an URL and return the content and selected meta-info.
sub handle_uri {
@@ -1015,12 +975,12 @@
unless ($ua->is_protocol_supported($uri)) {
$File->{'Error Flagged'} = TRUE;
- if (($uri->canonical() eq "1") )
+ if (($uri->canonical() eq "1") )
#if uri param is empty (also for empty direct or upload), it's been set to TRUE in sub prepCGI()
{
- $File->{E}->param(fatal_no_content => TRUE);
+ $File->{E}->param(fatal_no_content => TRUE);
}
- else {
+ else {
$File->{E}->param(fatal_uri_error => TRUE);
$File->{E}->param(fatal_uri_scheme => $uri->scheme());
}
@@ -1061,7 +1021,7 @@
# Enforce Max Recursion level.
&check_recursion($File, $res);
- my($mode, $ct, $charset)
+ my ($mode, $ct, $charset)
= &parse_content_type(
$File,
$res->header('Content-Type'),
@@ -1089,7 +1049,6 @@
$File->{'Is Upload'} = FALSE;
return $File;
-
}
#
@@ -1105,7 +1064,7 @@
local $/ = undef; # set line delimiter so that <> reads rest of file
$file = <$f>;
- my($mode, $ct, $charset) = &parse_content_type($File, $h->{'Content-Type'});
+ my ($mode, $ct, $charset) = &parse_content_type($File, $h->{'Content-Type'});
$File->{Bytes} = $file;
$File->{Mode} = $mode;
@@ -1140,7 +1099,6 @@
return $File;
}
-
#
# Parse a Content-Type and parameters. Return document type and charset.
sub parse_content_type {
@@ -1150,12 +1108,12 @@
my $charset = '';
my $mode = '';
- my($ct, @param) = split /\s*;\s*/, lc $Content_Type;
+ my ($ct, @param) = split /\s*;\s*/, lc $Content_Type;
$mode = $CFG->{MIME}->{$ct} || $ct;
foreach my $param (@param) {
- my($p, $v) = split /\s*=\s*/, $param;
+ my ($p, $v) = split /\s*=\s*/, $param;
next unless $p =~ m(charset)i;
if ($v =~ m/([\'\"]?)(\S+)\1/i) {
$charset = lc $2;
@@ -1198,8 +1156,6 @@
}
}
-
-
#
# Normalize newline forms (CRLF/CR/LF) to native newline.
sub normalize_newlines {
@@ -1237,7 +1193,6 @@
return $exact_charset;
}
-
#
# Return $_[0] encoded for HTML entities (cribbed from merlyn).
#
@@ -1250,7 +1205,6 @@
return $_;
}
-
#
# Truncate source lines for report.
#
@@ -1299,14 +1253,14 @@
return $line, $col;
}
-
#
# Suppress any existing DOCTYPE by commenting it out.
sub override_doctype {
no strict 'vars';
- my $File = shift;
- my ($dt) =
+ my $File = shift;
+ my ($dt) =
grep { $_->{Display} eq $File->{Opt}->{DOCTYPE} } values %{$CFG->{Types}};
+
# @@TODO: abort/whine about unrecognized doctype if $dt is undef.;
my $pubid = $dt->{PubID};
my $sysid = $dt->{SysID};
@@ -1314,9 +1268,10 @@
local $dtd = qq(<!DOCTYPE $name PUBLIC "$pubid");
$dtd .= qq( "$sysid") if $sysid; # We don't have one for all types.
$dtd .= '>';
+
local $org_dtd = '';
- local $HTML = '';
- local $seen = FALSE;
+ local $HTML = '';
+ local $seen = FALSE;
my $declaration = sub {
$seen = TRUE;
@@ -1324,7 +1279,8 @@
# No Override if Fallback was requested.
if ($File->{Opt}->{FB}->{DOCTYPE}) {
$HTML .= $_[0]; # Stash it as is...
- } else { # Comment it out and insert the new one...
+ } else {
+ # Comment it out and insert the new one...
$HTML .= "$dtd\n" . '<!-- ' . $_[0] . ' -->';
$org_dtd = &ent($_[0]);
}
@@ -1359,7 +1315,6 @@
return $File;
}
-
#
# Parse errors reported by SP.
sub parse_errors ($$) {
@@ -1375,12 +1330,12 @@
push @{$File->{DEBUG}->{Errors}}, $_;
chomp;
- my($err, @errors);
+ my ($err, @errors);
next if /^<OSFD>0:[0-9]+:[0-9]+:[^A-Z]/;
next if /numbers exceeding 65535 not supported/;
next if /URL Redirected to/;
- my(@_err) = split /:/;
+ my (@_err) = split /:/;
next unless $_err[1] eq '<OSFD>0'; #@@FIXME: This is a polite fiction!;
if ($_err[1] =~ m(^<URL>)) {
@errors = ($_err[0], join(':', $_err[1], $_err[2]), @_err[3..$#_err]);
@@ -1390,6 +1345,7 @@
$err->{src} = $errors[1];
$err->{line} = $errors[2];
$err->{char} = $errors[3];
+
# Workaround for onsgmls 1.5 sometimes reporting errors beyond EOL.
if ((my $l = length($File->{Content}->[$err->{line}-1])) < $err->{char}) {
$err->{char} = $l;
@@ -1399,6 +1355,7 @@
if ($err->{type} eq 'E' or $err->{type} eq 'X' or $err->{type} eq 'Q') {
$err->{msg} = join ':', @errors[6 .. $#errors];
} elsif ($err->{type} eq 'W') {
+
#@@FIXME: This is borked after templatification.
# &add_warning($File, 'fake', 'Warning:',
# "Line $err->{line}, column $err->{char}: " . &ent($errors[6]));
@@ -1445,13 +1402,13 @@
if (scalar @{$File->{Errors}}) {
foreach my $err (@{$File->{Errors}}) {
- my($line, $col) = &truncate_line($File->{Content}->[$err->{line}-1], $err->{char});
+ my ($line, $col) = &truncate_line($File->{Content}->[$err->{line}-1], $err->{char});
$line = &mark_error($line, $col);
my $explanation;
if ($err->{num}) {
- my(undef, $num) = split /\./, $err->{num};
+ my (undef, $num) = split /\./, $err->{num};
if (exists $Msgs{$num}) { # We've already seen this message...
if ($File->{Opt}->{Verbose}) { # ...so only repeat it in Verbose mode.
$explanation = qq(\n <div class="hidden mid-$num"></div>\n);
@@ -1468,7 +1425,7 @@
{
$_msg =~ s/<!--URI-->//g
}
- else
+ else
{
my $escaped_uri = uri_escape($File->{URI});
$_msg =~ s/<!--URI-->/$escaped_uri/g;
@@ -1479,7 +1436,7 @@
$err->{src} = $line;
$err->{col} = ' ' x $col;
$err->{expl} = $explanation;
- if ($err->{type} eq 'I')
+ if ($err->{type} eq 'I')
{
$err->{class} = 'msg_info';
$err->{err_type_info} = 1;
@@ -1490,7 +1447,7 @@
$err->{err_type_err} = 1;
$number_of_errors += 1;
}
- elsif (($err->{type} eq 'W') or ($err->{type} eq 'X') )
+ elsif (($err->{type} eq 'W') or ($err->{type} eq 'X') )
{
$err->{class} = 'msg_warn';
$err->{err_type_warn} = 1;
@@ -1575,7 +1532,6 @@
return $line;
}
-
#
# Produce an outline of the document based on Hn elements from the ESIS.
sub outline {
@@ -1586,7 +1542,7 @@
my $prevlevel = 0;
my $level = 0;
- for (1 .. $#{$File->{ESIS}}) {
+ for (1..$#{$File->{ESIS}}) {
my $line = $File->{ESIS}->[$_];
next unless ($line && $line =~ /^\(H([1-6])$/i);
@@ -1646,7 +1602,6 @@
return $outline;
}
-
#
# Create a HTML representation of the document.
sub source {
@@ -1660,7 +1615,6 @@
return \@source;
}
-
#
# Create a HTML Parse Tree of the document for validation report.
sub parsetree {
@@ -1719,7 +1673,6 @@
return $tree;
}
-
#
# Do an initial parse of the Document Entity to extract FPI.
sub preparse_doctype {
@@ -1732,7 +1685,7 @@
my $dtd = sub {
return if $File->{Root};
- ($File->{Root}, $File->{DOCTYPE}) = shift =~ m(<!DOCTYPE\s+(\w+)\s+(?:PUBLIC|SYSTEM)\s+(?:[\'\"])([^\"\']+)(?:[\"\']).*>)si;
+ ($File->{Root}, $File->{DOCTYPE}) = shift =~ m(<!DOCTYPE\s+(\w+)\s+(?:PUBLIC|SYSTEM)\s+(?:[\'\"])([^\"\']+)(?:[\"\']).*>)si;
};
my $start = sub {
@@ -1771,11 +1724,10 @@
for (@{shift->{ESIS}}) {
s/\\012//g;
s/\\n/\n/g;
-
- $file_esis .= ent $_;
- $file_esis .= "\n";
+ $file_esis .= ent $_;
+ $file_esis .= "\n";
}
- return $file_esis;
+ return $file_esis;
}
#
@@ -1788,12 +1740,11 @@
return $file_raw_errors;
}
-
#
# Preprocess CGI parameters.
sub prepCGI {
my $File = shift;
- my $q = shift;
+ my $q = shift;
# Avoid CGI.pm's "exists but undef" behaviour.
if (scalar $q->param) {
@@ -1801,6 +1752,7 @@
next if $param eq 'uploaded_file'; # 'uploaded_file' contains data.
next if $param eq 'fragment'; # Ditto 'fragment'.
next if $q->param($param) eq '0'; # Keep false-but-set params.
+
#
# Parameters that are given to us without specifying a value get
# set to "1" (the "TRUE" constant). This is so we can test for the
@@ -1845,6 +1797,7 @@
print redirect &self_url_q($q, $File);
exit;
} else {
+
# Redirected from /check/referer to /check?uri=referer because
# the browser didn't send a Referer header, or the request was
# for /check?uri=referer but no Referer header was found.
@@ -1909,7 +1862,6 @@
return $ssi;
}
-
#
# Utility sub to tell if mode "is" XML.
sub is_xml {shift->{Mode} == MODE_XML};
@@ -1922,7 +1874,7 @@
my $dtd = sub {
return if $File->{Root};
- ($File->{Root}, $File->{DOCTYPE}) = shift =~ m(<!DOCTYPE\s+(\w+)\s+PUBLIC\s+(?:[\'\"])([^\"\']+)(?:[\"\']).*>)si;
+ ($File->{Root}, $File->{DOCTYPE}) = shift =~ m(<!DOCTYPE\s+(\w+)\s+PUBLIC\s+(?:[\'\"])([^\"\']+)(?:[\"\']).*>)si;
};
my $start = sub {
@@ -1999,7 +1951,6 @@
return $File;
}
-
#
# Transcode to UTF-8
sub transcode {
@@ -2037,10 +1988,13 @@
$_ = $c->convert($_); # $_ is local!!
if ($in ne "" and (!defined($_) || $_ eq "")) {
push @{$File->{Lines}}, $line;
+
# try to decoded as much as possible of the line
- my $short = 0; # longest okay
- my $long = (length $in) - 1; # longest unknown
- while ($long > $short) { # binary search
+ my $short = 0; # longest okay
+ my $long = (length $in) - 1; # longest unknown
+
+ while ($long > $short) {
+ # binary search
my $try = int (($long+$short+1) / 2);
my $converted = $c->convert(substr($in, 0, $try));
if (!defined($converted) || $converted eq "") {
@@ -2108,17 +2062,16 @@
return $File;
}
-
#
# Return an XML report for the page.
sub report_xml {
my $File = shift;
my $valid = ($File->{'Is Valid'} ? 'Valid' : 'Invalid');
- my $errs = ($File->{'Is Valid'} ? '0' : scalar @{$File->{Errors}});
+ my $errs = ($File->{'Is Valid'} ? '0' : scalar @{$File->{Errors}});
if ($File->{E}->param('fatal_http_error')) {
$valid = 'Could not validate';
- }
+ }
print <<".EOF.";
Content-Type: application/xml; charset=UTF-8
@@ -2190,13 +2143,13 @@
chomp $err->{msg};
# Find index into the %frag hash for the "explanation..." links.
- $err->{idx} = $err->{msg};
+ $err->{idx} = $err->{msg};
$err->{idx} =~ s/"[^\"]*"/FOO/g;
$err->{idx} =~ s/[^A-Za-z ]//g;
- $err->{idx} =~ s/\s+/ /g; # Collapse spaces
- $err->{idx} =~ s/(^\s|\s$)//g; # Remove leading and trailing spaces.
+ $err->{idx} =~ s/\s+/ /g; # Collapse spaces
+ $err->{idx} =~ s/(^\s|\s$)//g; # Remove leading and trailing spaces.
$err->{idx} =~ s/(FOO )+/FOO /g; # Collapse FOOs.
- $err->{idx} =~ s/FOO FOO/FOO/g; # Collapse FOOs.
+ $err->{idx} =~ s/FOO FOO/FOO/g; # Collapse FOOs.
my $offset = $File->{Offsets}->[$err->{line} - 1]->[1] + $err->{char};
printf <<".EOF.", &ent($err->{msg});
@@ -2208,18 +2161,16 @@
print qq(</result>\n);
}
-
-
#
# Return an EARL report for the page.
sub report_earl {
my $File = shift;
my $valid = ($File->{'Is Valid'} ? 'Valid' : 'Invalid');
- my $errs = ($File->{'Is Valid'} ? '0' : scalar @{$File->{Errors}});
+ my $errs = ($File->{'Is Valid'} ? '0' : scalar @{$File->{Errors}});
if ($File->{E}->param('fatal_http_error')) {
$valid = 'Could not validate';
- }
+ }
print <<".EOF.";
Content-Type: application/rdf+xml; charset=UTF-8
@@ -2260,10 +2211,10 @@
$err->{idx} = $err->{msg};
$err->{idx} =~ s/"[^\"]*"/FOO/g;
$err->{idx} =~ s/[^A-Za-z ]//g;
- $err->{idx} =~ s/\s+/ /g; # Collapse spaces
- $err->{idx} =~ s/(^\s|\s\Z)//g; # Remove leading and trailing spaces.
+ $err->{idx} =~ s/\s+/ /g; # Collapse spaces
+ $err->{idx} =~ s/(^\s|\s\Z)//g; # Remove leading and trailing spaces.
$err->{idx} =~ s/(FOO )+/FOO /g; # Collapse FOOs.
- $err->{idx} =~ s/FOO FOO/FOO/g; # Collapse FOOs.
+ $err->{idx} =~ s/FOO FOO/FOO/g; # Collapse FOOs.
my @offsets = (
$File->{Offsets}->[$err->{line} ]->[0],
@@ -2305,8 +2256,6 @@
.EOF.
}
-
-
#
# Return a Notation3 EARL report for the page.
#
@@ -2318,7 +2267,7 @@
my $errs = ($File->{'Is Valid'} ? '0' : scalar @{$File->{Errors}});
if ($File->{E}->param('fatal_http_error')) {
$valid = 'Could not validate';
- }
+ }
print <<".EOF.";
Content-Type: text/plain; charset=UTF-8
@@ -2343,10 +2292,10 @@
$err->{idx} = $err->{msg};
$err->{idx} =~ s/"[^\"]*"/FOO/g;
$err->{idx} =~ s/[^A-Za-z ]//g;
- $err->{idx} =~ s/\s+/ /g; # Collapse spaces
- $err->{idx} =~ s/(^\s|\s\Z)//g; # Remove leading and trailing spaces.
+ $err->{idx} =~ s/\s+/ /g; # Collapse spaces
+ $err->{idx} =~ s/(^\s|\s\Z)//g; # Remove leading and trailing spaces.
$err->{idx} =~ s/(FOO )+/FOO /g; # Collapse FOOs.
- $err->{idx} =~ s/FOO FOO/FOO/g; # Collapse FOOs.
+ $err->{idx} =~ s/FOO FOO/FOO/g; # Collapse FOOs.
my @offsets = (
$File->{Offsets}->[$err->{line} ]->[0],
@@ -2391,7 +2340,6 @@
print " .\n";
}
-
#
# Autodetection as in Appendix F of the XML 1.0 Recommendation.
# <http://www.w3.org/TR/2000/REC-xml-20001006#sec-guessing>
@@ -2429,7 +2377,6 @@
# nothing in particular
}
-
#
# Find encoding in document according to XML rules
# Only meaningful if file contains a BOM, or for well-formed XML!
@@ -2439,20 +2386,24 @@
($File->{Charset}->{Auto}, $File->{BOM}, $CodeUnitSize, $Pattern)
= &find_base_encoding($File->{Bytes});
+
+ # 100 arbitrary, but enough in any case
my $someBytes = substr $File->{Bytes}, $File->{BOM}, ($CodeUnitSize * 100);
- my $someText = ''; # 100 arbitrary, but enough in any case
+ my $someText = '';
# translate from guessed encoding to ascii-compatible
if ($File->{Charset}->{Auto} eq 'ebcdic') {
+
# special treatment for EBCDIC, maybe use tr///
# work on this later
}
elsif (!$Pattern) {
$someText = $someBytes; # efficiency shortcut
}
- else { # generic code for UTF-16/UCS-4
+ else {
+ # generic code for UTF-16/UCS-4
$someBytes =~ /^(($Pattern)*)/s;
- $someText = $1; # get initial piece without chars >255
+ $someText = $1; # get initial piece without chars >255
$someText =~ s/$Pattern/$1/sg; # select the relevant bytes
}
@@ -2480,6 +2431,7 @@
print $E->output;
exit;
} else {
+
#@@FIXME: This is borked after templatification.
# &add_warning($File, 'fatal', 'Fatal Error', <<".EOF.");
# A fatal error has occurred while processing the requested document. Processing
@@ -2501,19 +2453,18 @@
return $encodingA && $encodingB && ($encodingA ne $encodingB);
}
-
#
# Construct a self-referential URL from a CGI.pm $q object.
sub self_url_q {
my ($q, $File) = @_;
my $thispage = $File->{Env}->{'Self URI'};
- $thispage .= '?uri=' . uri_escape($q->param('uri'));
- $thispage .= ';ss=1' if $q->param('ss');
- $thispage .= ';sp=1' if $q->param('sp');
- $thispage .= ';noatt=1' if $q->param('noatt');
- $thispage .= ';outline=1' if $q->param('outline');
- $thispage .= ';No200=1' if $q->param('No200');
- $thispage .= ';verbose=1' if $q->param('verbose');
+ $thispage .= '?uri=' . uri_escape($q->param('uri'));
+ $thispage .= ';ss=1' if $q->param('ss');
+ $thispage .= ';sp=1' if $q->param('sp');
+ $thispage .= ';noatt=1' if $q->param('noatt');
+ $thispage .= ';outline=1' if $q->param('outline');
+ $thispage .= ';No200=1' if $q->param('No200');
+ $thispage .= ';verbose=1' if $q->param('verbose');
if ($q->param('doctype')
and not $q->param('doctype') =~ /(Inline|detect)/i) {
$thispage .= ';doctype=' . uri_escape($q->param('doctype'));
@@ -2528,8 +2479,8 @@
# Return random Tip with it's URL.
sub get_tip {
my @tipAddrs = keys %{$CFG->{Tips}};
- my $tipAddr = $tipAddrs[rand scalar @tipAddrs];
- my $tipSlug = $CFG->{Tips}->{$tipAddr};
+ my $tipAddr = $tipAddrs[rand scalar @tipAddrs];
+ my $tipSlug = $CFG->{Tips}->{$tipAddr};
return [$tipAddr, $tipSlug];
}
@@ -2552,7 +2503,6 @@
return $thispage;
}
-
#####
package W3C::Validator::UserAgent;
@@ -2564,8 +2514,7 @@
use base qw(LWP::UserAgent);
-sub new
-{
+sub new {
my ($proto, $CFG, $File, @rest) = @_;
my $class = ref($proto) || $proto;
my $self = $class->SUPER::new(@rest);
@@ -2574,14 +2523,12 @@
return $self;
}
-sub redirect_ok
-{
+sub redirect_ok {
my ($self, $req, $res) = @_;
return $self->SUPER::redirect_ok($req, $res) && $self->uri_ok($req->uri());
}
-sub uri_ok
-{
+sub uri_ok {
my ($self, $uri) = @_;
return 1 if ($self->{'W3C::Validator::CFG'}->{'Allow Private IPs'} or
!$uri->can('host'));
@@ -2595,7 +2542,7 @@
}
if ($iptype && $iptype ne 'PUBLIC') {
my $File = $self->{'W3C::Validator::File'};
- $File->{'Error Flagged'} = 1;
+ $File->{'Error Flagged'} = 1;
$File->{E}->param(fatal_ip_error => 1);
$File->{E}->param(fatal_ip_hostname => 1)
if $addr and $uri->host() ne $addr;
Received on Monday, 15 August 2005 22:48:02 UTC