--- check.old 2005-08-08 13:48:46.546875000 +0200 +++ check 2005-08-11 17:46:05.734375000 +0200 @@ -16,8 +16,8 @@ $| = 1; # -# We need Perl 5.6.0+. -use 5.006; +# We need Perl 5.8.0+. +use 5.008; ############################################################################### #### Load modules. ############################################################ @@ -108,6 +108,16 @@ } # + # Defaults for SP; turn off fixed charset mode and set encoding to UTF-8. + # These must be in the BEGIN block and the BEGIN block must before using + # SGML::Parser::OpenSP through use/require, otherwise the variables won't + # be visible for OpenSP on Win32. Setting these might become obsolete if + # storage object specifications are used to specify the encoding. + $ENV{SP_CHARSET_FIXED} = 'NO'; + $ENV{SP_ENCODING} = 'UTF-8'; + $ENV{SP_BCTF} = 'UTF-8'; + + # # Read Config Files. eval { my %config_opts = ( @@ -575,57 +585,44 @@ $File = &parse($File); sub parse (\$) { my $File = shift; + + use SGML::Parser::OpenSP 0.99 qw(); + + my $opensp = SGML::Parser::OpenSP->new(); # # By default, use SGML catalog file and SGML Declaration. my $catalog = File::Spec->catfile($CFG->{Paths}->{SGML}->{Library}, 'sgml.soc'); + # - # Note: if you feel the urge to remove -R from here, please understand that - # doing so opens a potential security hole. Don't do that; instead just - # make sure you're running OpenSP 1.5 or later. - my @spopt = qw( - -wvalid - -wnon-sgml-char-ref - -wno-duplicate - ); + my @spopt = qw(valid non-sgml-char-ref no-duplicate); # # Switch to XML semantics if file is XML. if (&is_xml($File)) { $catalog = File::Spec->catfile($CFG->{Paths}->{SGML}->{Library}, 'xml.soc'); - push(@spopt, '-wxml'); - } else { # Only add these in SGML mode. -# if ($File->{Opt}->{'Fussy'}) { -# push @spopt, '-wmin-tag'; -# push @spopt, '-wfully-tagged'; -# push @spopt, '-wrefc'; -# push @spopt, '-wmissing-att-name'; -# push @spopt, '-wdata-delim'; -# } + push(@spopt, 'xml'); } - - # - # Defaults for SP; turn off fixed charset mode and set encoding to UTF-8. - $ENV{SP_CHARSET_FIXED} = 'NO'; - $ENV{SP_ENCODING} = 'UTF-8'; - $ENV{SP_BCTF} = 'UTF-8'; + # -n + $opensp->show_error_numbers(1); + + # -R (0 is OFF, 1 is ON) + $opensp->restrict_file_reading(0); + + # -c + $opensp->catalogs($catalog); # # Tell onsgmls about the SGML Library. - $ENV{SGML_SEARCH_PATH} = $CFG->{Paths}->{SGML}->{Library}; - - # - # Set the command to execute. - my @cmd = ($CFG->{Paths}->{SGML}->{Parser}, '-n', '-c', $catalog, @spopt); - + $opensp->search_dirs($CFG->{Paths}->{SGML}->{Library}); # # Set debug info for HTML report. $T->param(opt_debug => $DEBUG); $T->param(debug => [ - {name => 'Command', value => "@cmd"}, + {name => 'Command', value => "SGML::Parser::OpenSP->new"}, {name => 'SP_CHARSET_FIXED', value => $ENV{SP_CHARSET_FIXED}}, {name => 'SP_ENCODING', value => $ENV{SP_ENCODING}}, {name => 'SP_BCTF', value => $ENV{SP_BCTF}}, @@ -633,112 +630,14 @@ {name => 'Transfer-Encoding', value => $File->{TransferEnc}}, ], ); - - #@@FIXME: This needs a UI and testing! - # - # Set onsgmls' -E switch to the number of errors requested. - if ($File->{Opt}->{'Max Errors'} =~ m(^all$)i) { - push @cmd, '-E0'; - } elsif ($File->{Opt}->{'Max Errors'} =~ m(^(\d+)$)) { - my $numErr = $1; - if ($numErr >= 200) { - $numErr = 200; - } elsif ($numErr <= 0) { - $numErr = 0; #@@FIXME: Should add feature to supress error output in this case.; - } - push @cmd, '-E' . $numErr; - } else { - push @cmd, '-E' . ($CFG->{'Max Errors'} || 0); # "-E0" means "all". - } - #@@FIXME; - - # - # Temporary filehandles. - my $spin = IO::File->new_tmpfile; - my $spout = IO::File->new_tmpfile; - my $sperr = IO::File->new_tmpfile; - - # - # Dump file to a temp file for parsing. - for (@{$File->{Content}}) { - print $spin $_, "\n"; - } - - # - # seek() to beginning of the file. - seek $spin, 0, 0; - - # - # Run it through SP, redirecting output to temporary files. - my $pid = do { - no warnings 'once'; - local(*SPIN, *SPOUT, *SPERR) = ($spin, $spout, $sperr); - open3("<&SPIN", ">&SPOUT", ">&SPERR", @cmd); - }; - undef $spin; - waitpid $pid, 0; - - # - # Rewind temporary filehandles. - seek $_, 0, 0 for $spout, $sperr; - - # - # Proper text mode for Win32 systems - binmode($spout, ':crlf') if $^O eq "MSWin32"; - - $File = &parse_errors($File, $sperr); # Parse error output. - undef $sperr; # Get rid of no longer needed filehandle. - - $File->{ESIS} = []; - my $elements_found = 0; - while (<$spout>) { - $elements_found++ if /^\(/; - - if (/^Axmlns() \w+ (.*)/ or /^Axmlns:([^ ]+) \w+ (.*)/) { - if (not $File->{Namespace}) { - if ($elements_found == 0 and $1 eq "") { - $File->{Namespace} = $2; - } else { - # @@FIXME: should not happen; - push(@{$File->{Namespaces}}, $2); - } - } else { - push(@{$File->{Namespaces}}, $2) if ($2 ne $File->{Namespace}); - } - } - - next if / IMPLIED$/ && not $DEBUG;; - next if /^ASDAFORM CDATA /; - next if /^ASDAPREF CDATA /; - chomp; # Removes trailing newlines - push @{$File->{ESIS}}, $_; - } - undef $spout; - - if (@{$File->{ESIS}} && $File->{ESIS}->[-1] =~ /^C$/) { - pop(@{$File->{ESIS}}); - $File->{'Is Valid'} = TRUE; - } else { - $File->{'Is Valid'} = FALSE; - } + + $opensp->handler(W3C::Validator::ErrorHandler->new($opensp, $File)); + $opensp->parse_string(join"\n",@{$File->{Content}}); # # Set Version to be the FPI initially. $File->{Version} = $File->{DOCTYPE}; - # - # Extract any version attribute from the ESIS. - for (@{$File->{ESIS}}) { - no warnings 'uninitialized'; - next unless /^AVERSION CDATA (.*)/i; - push @{$File->{Version_ESIS}}, $1; - if ($1 =~ '-//W3C//DTD (SGML|XML) Fallback//EN') { - $File->{Tentative} |= (T_ERROR | T_FALL); - my $dtd = $1 eq 'SGML' ? 'HTML 4.01 Transitional' : 'XHTML 1.0 Strict'; - &add_warning('W09', { W09_dtd => $dtd }); - } - } - return $File; } @@ -824,14 +723,9 @@ } $T->param(file_warnings => $File->{Warnings}); - $T->param(file_outline => &outline($File)) - if $T->param('opt_show_outline'); $T->param(file_source => &source($File)) if $T->param('opt_show_source'); - $T->param(file_parsetree => &parsetree($File)) - if $T->param('opt_show_parsetree'); $T->param('opt_show_esis' => TRUE) if $File->{Opt}->{'Show ESIS'}; - $T->param('file_esis' => &show_esis($File)) if $T->param('opt_show_esis'); $T->param('opt_show_raw_errors' => TRUE) if $File->{Opt}->{'Show Errors'}; $T->param('file_raw_errors' => &show_errors($File)) if $T->param('opt_show_raw_errors'); @@ -1360,79 +1254,6 @@ # -# Parse errors reported by SP. -sub parse_errors ($$) { - my $File = shift; - my $fh = shift; - - $File->{Errors} = []; # Initialize to an (empty) anonymous array ref. - for (<$fh>) { - - # remove SGML Parser path if it contains colons - s/^\Q$CFG->{Paths}->{SGML}->{Parser}\E// if - $CFG->{Paths}->{SGML}->{Parser} =~ /:/; - - push @{$File->{DEBUG}->{Errors}}, $_; - chomp; - my($err, @errors); - next if /^0:[0-9]+:[0-9]+:[^A-Z]/; - next if /numbers exceeding 65535 not supported/; - next if /URL Redirected to/; - - my(@_err) = split /:/; - next unless $_err[1] eq '0'; #@@FIXME: This is a polite fiction!; - if ($_err[1] =~ m(^)) { - @errors = ($_err[0], join(':', $_err[1], $_err[2]), @_err[3..$#_err]); - } else { - @errors = @_err; - } - $err->{src} = $errors[1]; - $err->{line} = $errors[2]; - $err->{char} = $errors[3]; - # Workaround for onsgmls 1.5 sometimes reporting errors beyond EOL. - if ((my $l = length($File->{Content}->[$err->{line}-1])) < $err->{char}) { - $err->{char} = $l; - } - $err->{num} = $errors[4] || ''; - $err->{type} = $errors[5] || ''; - if ($err->{type} eq 'E' or $err->{type} eq 'X' or $err->{type} eq 'Q') { - $err->{msg} = join ':', @errors[6 .. $#errors]; - } elsif ($err->{type} eq 'W') { - #@@FIXME: This is borked after templatification. - # &add_warning($File, 'fake', 'Warning:', - # "Line $err->{line}, column $err->{char}: " . &ent($errors[6])); - #@@FIXME; - $err->{msg} = join ':', @errors[6 .. $#errors]; - } else { - $err->{type} = 'I'; - $err->{num} = ''; - $err->{msg} = join ':', @errors[4 .. $#errors]; - } - - # No or unknown FPI and a relative SI. - if ($err->{msg} =~ m(cannot (open|find))) { - $File->{'Error Flagged'} = TRUE; - $File->{E}->param(fatal_parse_extid_error => TRUE); - $File->{E}->param(fatal_parse_extid_msg => $err->{msg}); - } - - # No DOCTYPE. - if ($err->{msg} =~ m(prolog can\'t be omitted)) { - my $dtd = ($File->{Mode} == MODE_SGML ? - 'HTML 4.01 Transitional' : 'XHTML 1.0 Transitional'); - &add_warning('W09', {W09_dtd => $dtd}); - next; # Don't report this as a normal error. - } - - &abort_if_error_flagged($File, O_DOCTYPE); - $err->{msg} =~ s/^\s*//; - push @{$File->{Errors}}, $err; - } - undef $fh; - return $File; -} - -# # Generate a HTML report of detected errors. sub report_errors ($) { my $File = shift; @@ -1450,7 +1271,7 @@ my $explanation; if ($err->{num}) { - my(undef, $num) = split /\./, $err->{num}; + my $num = $err->{num}; if (exists $Msgs{$num}) { # We've already seen this message... if ($File->{Opt}->{Verbose}) { # ...so only repeat it in Verbose mode. $explanation = qq(\n \n); @@ -1574,78 +1395,6 @@ return $line; } - -# -# Produce an outline of the document based on Hn elements from the ESIS. -sub outline { - my $File = shift; - - my $outline = ''; - - my $prevlevel = 0; - my $level = 0; - - for (1 .. $#{$File->{ESIS}}) { - my $line = $File->{ESIS}->[$_]; - next unless ($line && $line =~ /^\(H([1-6])$/i); - - $prevlevel = $level; - $level = $1; - - my $TAB = $level + 2; - - if ($prevlevel == 0) { - $outline .= "
    \n"; - } else { - if ($level < $prevlevel) { - $outline .= "\n"; - for (my $i = $prevlevel; $i > $level; $i--) { - $outline .= " " x ($i + 2) . "
\n"; - $outline .= " " x (($i + 2) - 1) . "\n"; - } - } elsif ($level == $prevlevel) { - $outline .= "\n"; - } elsif ($level > $prevlevel) { - if ($level - $prevlevel > 1) { - foreach my $i (($prevlevel + 1) .. ($level - 1)) { - $outline .= "\n". " " x ($i + 2) . "
    \n" . " " x ($i + 2); - $outline .= qq(
  • A level $i heading is missing!); - } - $outline .= "\n" . " " x $TAB . "
      \n"; - } else { - $outline .= "\n" . " " x $TAB; - $outline .= "
        \n"; - } - } - } - - $line = ''; - my $heading = ''; - until (substr($line, 0, 3) =~ /^\)H$level/i) { - $line = $File->{ESIS}->[$_++]; - if ($line =~ /^-/) { - my $headcont = $line; - substr($headcont, 0, 1) = " "; - $heading .= $headcont; - } elsif ($line =~ /^AALT CDATA( .+)/i) { - my $headcont = $1; - $heading .= $headcont; - } - } - - $heading =~ s/\\011/ /g; - $heading =~ s/\\012/ /g; - $heading =~ s/\\n/ /g; - $heading =~ s/\s+/ /g; - $heading =~ s/^[- ]//; - $heading = &ent($heading); - $outline .= "
      • $heading"; - } - $outline .= "
      \n" x $level; - return $outline; -} - - # # Create a HTML representation of the document. sub source { @@ -1659,66 +1408,6 @@ return \@source; } - -# -# Create a HTML Parse Tree of the document for validation report. -sub parsetree { - my $File = shift; - my $tree = ''; - - $T->param(file_parsetree_noatt => TRUE) if $File->{Opt}->{'No Attributes'}; - - my $indent = 0; - my $prevdata = ''; - - foreach my $line (@{$File->{ESIS}}) { - - next if ($File->{Opt}->{'No Attributes'} && $line =~ /^A/); - - $line =~ s/\\n/ /g; - $line =~ s/\\011/ /g; - $line =~ s/\\012/ /g; - $line =~ s/\s+/ /g; - next if $line =~ /^-\s*$/; - - if ($line =~ /^-/) { - substr($line, 0, 1) = ' '; - $prevdata .= $line; - next; - } elsif ($prevdata) { - $prevdata =~ s/\s+/ /g; - $tree .= &ent(wrap(' ' x $indent, ' ' x $indent, $prevdata)) . "\n"; - undef $prevdata; - } - - $line = &ent($line); - if ($line =~ /^\)/) { - $indent -= 2; - } - - my $printme; - chomp($printme = $line); - if (my ($close, $elem) = $printme =~ /^([()])(.+)/) { - # reformat and add links on HTML elements - $close = ($close eq ')') ? '/' : ''; # ")" -> close-tag - if (my $u = $CFG->{Elements}->{lc($elem)}) { - $elem = '$elem"; - } - $printme = "<$close$elem>"; - } else { - $printme =~ s,^A, A,; # indent attributes a bit - } - - $tree .= ' ' x $indent . $printme . "\n"; - - if ($line =~ /^\(/) { - $indent += 2; - } - } - return $tree; -} - - # # Do an initial parse of the Document Entity to extract FPI. sub preparse_doctype { @@ -1764,20 +1453,6 @@ } # -# Print out the raw ESIS output for debugging. -sub show_esis ($) { - my $file_esis = ""; - for (@{shift->{ESIS}}) { - s/\\012//g; - s/\\n/\n/g; - - $file_esis .= ent $_; - $file_esis .= "\n"; - } - return $file_esis; -} - -# # Print out the raw error output for debugging. sub show_errors ($) { my $file_raw_errors = ""; @@ -1787,7 +1462,6 @@ return $file_raw_errors; } - # # Preprocess CGI parameters. sub prepCGI { @@ -2551,6 +2225,73 @@ return $thispage; } +##### + +package W3C::Validator::ErrorHandler; + +sub new +{ + my $class = shift; + my $parser = shift; + my $File = shift; + + my $self = { _file => $File, _parser => $parser }; + + # ... + $File->{'Is Valid'} = main::TRUE; + $File->{Errors} = []; + + bless $self, $class; +} + +sub error +{ + my $self = shift; + my $error = shift; + my $mess = $self->{_parser}->split_message($error); + my $File = $self->{_file}; + +# use Data::Dumper; +# die Dumper $mess; + + my $err; + + $err->{src} = '...'; # do this with show_open_entities()? + $err->{line} = $mess->{primary_message}{LineNumber}; + $err->{char} = $mess->{primary_message}{ColumnNumber}; + $err->{num} = $mess->{primary_message}{Number}; + $err->{type} = $mess->{primary_message}{Severity}; + $err->{msg} = $mess->{primary_message}{Text}; + + # ... + $File->{'Is Valid'} = main::FALSE if $err->{type} eq 'E'; + + # Workaround for onsgmls 1.5 sometimes reporting errors beyond EOL. + # (How true is that? Test cases please.) + if ((my $l = length($File->{Content}->[$err->{line}-1])) < $err->{char}) { + $err->{char} = $l; + } + + # No or unknown FPI and a relative SI. + if ($err->{msg} =~ m(cannot (open|find))) { + $File->{'Error Flagged'} = main::TRUE; + $File->{E}->param(fatal_parse_extid_error => main::TRUE); + $File->{E}->param(fatal_parse_extid_msg => $err->{msg}); + } + + # No DOCTYPE. + if ($err->{msg} =~ m(prolog can\'t be omitted)) { + my $dtd = ($File->{Mode} == main::MODE_SGML ? + 'HTML 4.01 Transitional' : 'XHTML 1.0 Transitional'); + main::add_warning('W09', {W09_dtd => $dtd}); + next; # Don't report this as a normal error. + } + + main::abort_if_error_flagged($File, main::O_DOCTYPE); + $err->{msg} =~ s/^\s*//; + push @{$File->{Errors}}, $err; + +} #####