validator/httpd/cgi-bin check,1.778,1.779

Update of /sources/public/validator/httpd/cgi-bin
In directory hutz:/tmp/cvs-serv15022/httpd/cgi-bin

Modified Files:
	check 
Log Message:
Bring back support for non-structured XML::LibXML errors.

This was removed in rev 1.769, but according to the docs, 1.70 may still
throw them.


Index: check
===================================================================
RCS file: /sources/public/validator/httpd/cgi-bin/check,v
retrieving revision 1.778
retrieving revision 1.779
diff -u -d -r1.778 -r1.779
--- check	10 Jun 2010 22:19:55 -0000	1.778
+++ check	13 Jun 2010 21:45:40 -0000	1.779
@@ -652,35 +652,152 @@
 
         eval { $xmlparser->parse_string($xml_string); };
         $xml_string = undef;
+        my $xml_parse_errors_line = undef;
+        my @xmlwf_error_list;
 
-        my $err_obj = $@;
-        while ($err_obj) {
-            my $err;
-            $err->{src} = '...';    # do this with show_open_entities()?
-            $err->{line} = $err_obj->line();
-            $err->{char} = $err_obj->column();
-            $err->{num}  = "libxml2-" . $err_obj->code();
-            $err->{type} = "E";
-            $err->{msg}  = $err_obj->message();
+        if (ref($@)) {
 
-            $err_obj = $err_obj->_prev();
+            # handle a structured error (XML::LibXML::Error object)
 
-            # The validator will sometimes fail to dereference entities
-            # files; we're filtering the resulting bogus error for
-            # non-standalone documents. @@@TODO: is this still needed?
-            if (!$standalone &&
-                $err->{msg} =~ /Entity '\w+' not defined/)
-            {
-                $err = undef;
-                next;
+            my $err_obj         = $@;
+            my $num_xmlwf_error = 0;
+            while ($err_obj) {
+                my $err;
+                $err->{src} = '...';    # do this with show_open_entities()?
+                $err->{line} = $err_obj->line();
+                $err->{char} = $err_obj->column();
+                $err->{num}  = "libxml2-" . $err_obj->code();
+                $err->{type} = "E";
+                $err->{msg}  = $err_obj->message();
+
+                $err_obj = $err_obj->_prev();
+
+                # The validator will sometimes fail to dereference entities
+                # files; we're filtering the resulting bogus error for
+                # non-standalone documents. @@@TODO: is this still needed?
+                if (!$standalone &&
+                    $err->{msg} =~ /Entity '\w+' not defined/)
+                {
+                    $err = undef;
+                    next;
+                }
+
+                unshift(@xmlwf_error_list, $err);
+                $num_xmlwf_error++;
             }
+        }
+        elsif ($@) {
+            my $xmlwf_errors      = $@;
+            my $xmlwf_error_line  = undef;
+            my $xmlwf_error_col   = undef;
+            my $xmlwf_error_msg   = undef;
+            my $got_error_message = undef;
+            my $got_quoted_line   = undef;
+            my $num_xmlwf_error   = 0;
+            foreach my $msg_line (split "\n", $xmlwf_errors) {
+
+                $msg_line =~ s{[^\x0d\x0a](:\d+:)}{\n$1}g;
+                $msg_line =~ s{[^\x0d\x0a]+[\x0d\x0a]$}{};
+
+                # first we get the actual error message
+                if (!$got_error_message &&
+                    $msg_line =~ /^(:\d+:)( parser error : .*)/)
+                {
+                    $xmlwf_error_line = $1;
+                    $xmlwf_error_msg  = $2;
+                    $xmlwf_error_line =~ s/:(\d+):/$1/;
+                    $xmlwf_error_msg =~ s/ parser error :/XML Parsing Error: /;
+                    $got_error_message = 1;
+                }
+
+                # then we skip the second line, which shows the context
+                # (we don't use that)
+                elsif ($got_error_message && !$got_quoted_line) {
+                    $got_quoted_line = 1;
+                }
+
+                # we now take the third line, with the pointer to the error's
+                # column
+                elsif (($msg_line =~ /(\s+)\^/) and
+                    $got_error_message and
+                    $got_quoted_line)
+                {
+                    $xmlwf_error_col = length($1);
+                }
+
+                #  cleanup for a number of bugs for the column number
+                if (defined($xmlwf_error_col)) {
+                    if ((   my $l =
+                            length($File->{Content}->[$xmlwf_error_line - 1])
+                        ) < $xmlwf_error_col
+                        )
+                    {
+
+                        # http://bugzilla.gnome.org/show_bug.cgi?id=434196
+                        #warn("Warning: reported error column larger than line length " .
+                        #     "($xmlwf_error_col > $l) in $File->{URI} line " .
+                        #     "$xmlwf_error_line, libxml2 bug? Resetting to line length.");
+                        $xmlwf_error_col = $l;
+                    }
+                    elsif ($xmlwf_error_col == 79) {
+
+                        # working around an apparent odd limitation of libxml
+                        # which only gives context for lines up to 80 chars
+                        # http://www.w3.org/Bugs/Public/show_bug.cgi?id=4420
+                        # http://bugzilla.gnome.org/show_bug.cgi?id=424017
+                        $xmlwf_error_col = "> 80";
+
+                        # non-int line number will trigger the proper behavior
+                        # in report_error
+                    }
+                }
+
+                # when we have all the info (one full error message), proceed
+                # and move on to the next error
+                if ((defined $xmlwf_error_line) and
+                    (defined $xmlwf_error_col) and
+                    (defined $xmlwf_error_msg))
+                {
+
+                    # Reinitializing for the next batch of 3 lines
+                    $got_error_message = undef;
+                    $got_quoted_line   = undef;
 
+                    # formatting the error message for output
+                    my $err;
+                    $err->{src} = '...';   # do this with show_open_entities()?
+                    $err->{line} = $xmlwf_error_line;
+                    $err->{char} = $xmlwf_error_col;
+                    $err->{num}  = 'xmlwf';
+                    $err->{type} = "E";
+                    $err->{msg}  = $xmlwf_error_msg;
+
+                    # The validator will sometimes fail to dereference entities
+                    # files; we're filtering the resulting bogus error for
+                    # non-standalone documents. @@@TODO: is this still needed?
+                    if (!$standalone &&
+                        $err->{msg} =~ /Entity '\w+' not defined/)
+                    {
+                        $xmlwf_error_line = undef;
+                        $xmlwf_error_col  = undef;
+                        $xmlwf_error_msg  = undef;
+                        next;
+                    }
+                    push(@xmlwf_error_list, $err);
+                    $xmlwf_error_line = undef;
+                    $xmlwf_error_col  = undef;
+                    $xmlwf_error_msg  = undef;
+                    $num_xmlwf_error++;
+
+                }
+            }
+        }
+        foreach my $errmsg (@xmlwf_error_list) {
             $File->{'Is Valid'} = FALSE;
-            unshift(@{$File->{WF_Errors}}, $err);
+            push @{$File->{WF_Errors}}, $errmsg;
         }
     }
 }
-
 if (($File->{DOCTYPE} eq "HTML5") or ($File->{DOCTYPE} eq "XHTML5")) {
     if ($CFG->{External}->{HTML5}) {
         $File = &html5_validate($File);
@@ -867,6 +984,13 @@
             for my $key (qw(msg expl)) {
                 $msg->{$key} = $json->encode($msg->{$key}) if $msg->{$key};
             }
+
+            # Drop non-numeric char indicators from output, e.g.
+            # "> 80" for some XML parse error ones (see the non-structured
+            # XML::LibXML code branch in XML preparsing below).
+            if ($msg->{char} && $msg->{char} !~ /^\d+$/) {
+                delete($msg->{char});
+            }
         }
     }
 }

Received on Sunday, 13 June 2010 21:45:44 UTC