- From: Olivier Thereaux via cvs-syncmail <cvsmail@w3.org>
- Date: Thu, 12 Jun 2008 22:13:56 +0000
- To: www-validator-cvs@w3.org
Update of /sources/public/perl/modules/W3C/XHTML/HTMLCompatChecker/bin
In directory hutz:/tmp/cvs-serv16831/bin
Modified Files:
appCcheck.pl
Log Message:
modularization and refactoring
Index: appCcheck.pl
===================================================================
RCS file: /sources/public/perl/modules/W3C/XHTML/HTMLCompatChecker/bin/appCcheck.pl,v
retrieving revision 1.40
retrieving revision 1.41
diff -u -d -r1.40 -r1.41
--- appCcheck.pl 10 Oct 2006 08:10:06 -0000 1.40
+++ appCcheck.pl 12 Jun 2008 22:13:54 -0000 1.41
@@ -1,363 +1,87 @@
#!/usr/bin/perl
-# Copyright (c) 2005-2006 the World Wide Web Consortium :
-# Keio University,
-# Institut National de Recherche en Informatique et Automatique,
-# Massachusetts Institute of Technology.
-#
-# Written and maintained by the QA-dev tool development team at W3C:
-# http://www.w3.org/QA/Tools/qa-dev/
+# Copyright (c) 2005-2008 W3C
+
+
use strict;
use warnings;
-use XML::Parser;
+
use CGI qw(param);
require LWP::UserAgent;
use URI;
use CGI::Carp 'fatalsToBrowser';
use HTML::Template 2.6 qw();
+use W3C::XHTML::HTMLCompatChecker;
# Define global constants
use constant TRUE => 1;
use constant FALSE => 0;
-use constant APPC_FOUND_XML_DECL => 1; # http://www.w3.org/TR/xhtml1/#C_1
-use constant APPC_FOUND_XML_PI => 2; # http://www.w3.org/TR/xhtml1/#C_1
-use constant APPC_MISSING_SPACE => 3; # http://www.w3.org/TR/xhtml1/#C_2
-use constant APPC_UNMINIMIZED => 4; # http://www.w3.org/TR/xhtml1/#C_2
-use constant APPC_MINIMIZED => 5; # http://www.w3.org/TR/xhtml1/#C_3
-use constant APPC_MANY_ISINDEX => 6; # http://www.w3.org/TR/xhtml1/#C_6
-use constant APPC_ONLY_LANG => 7; # http://www.w3.org/TR/xhtml1/#C_7
-use constant APPC_ONLY_XML_LANG => 8; # http://www.w3.org/TR/xhtml1/#C_7
-use constant APPC_APOS_IN_ATTR => 9; # http://www.w3.org/TR/xhtml1/#C_16
-use constant APPC_APOS_IN_ELEM => 10; # http://www.w3.org/TR/xhtml1/#C_16
-
-use constant APPC_ERRO => 0; # @@
-use constant APPC_WARN => 1; # @@
-use constant APPC_INFO => 2; # @@
-use constant APPC_HINT => 3; # @@
-
-use constant SEVERITY_NAMES =>
-{
- APPC_ERRO, "Error",
- APPC_WARN, "Warning",
- APPC_INFO, "Info",
- APPC_HINT, "Hint",
-};
-
-use constant CRITERIA =>
-{
- APPC_FOUND_XML_DECL, [ 1, APPC_INFO, "XML declarations are problematic" ],
- APPC_FOUND_XML_PI, [ 1, APPC_INFO, "XML processing instructions are problematic" ],
- APPC_MISSING_SPACE, [ 2, APPC_ERRO, "<example/> shall be written as <example />" ],
- APPC_UNMINIMIZED, [ 2, APPC_ERRO, "For empty elements you shall use <example />" ],
- APPC_MINIMIZED, [ 3, APPC_ERRO, "For non-empty elements, you shall use <example></example>" ],
- APPC_ONLY_LANG, [ 7, APPC_ERRO, "<example lang='en'> shall be written as <example lang='en' xml:lang='en'>" ],
- APPC_ONLY_XML_LANG, [ 7, APPC_ERRO, "<example xml:lang='en'> shall be written as <example lang='en' xml:lang='en'>" ],
- APPC_MANY_ISINDEX, [10, APPC_WARN, "Avoid more than one <isindex> element in the <head> element" ],
- APPC_APOS_IN_ATTR, [16, APPC_ERRO, "You must write ' as e.g. ' for legacy user agents" ],
- APPC_APOS_IN_ELEM, [16, APPC_ERRO, "You must write ' as e.g. ' for legacy user agents" ],
-};
-
-use constant GUIDELINE_TITLES =>
-{
- 1, "Processing Instructions and the XML Declaration",
- 2, "Empty Elements",
- 3, "Element Minimization and Empty Element Content",
- 6, "Isindex",
- 7, "The lang and xml:lang Attributes",
- 16, "The Named Character Reference '",
-};
-
-use constant EMPTY_ELEMENTS => { map { $_ => 1 }
-qw/
- base basefont link area hr img
- meta param input isindex col br
-/ };
-
-# global variables...
-our $ISINDEX = 0;
-our $IS_RELEVANT_DOC = 1; # whether the checker is relevant to the doctype of the document being processed.
-our $IS_RELEVANT_CT = 1; # whether the checker is relevant to the media type of the document being processed.
-our $IS_WF = 1; # whether the document is at least well-formed XML
-our @MESSAGES;
-
-## Helper functions #######################################################
-sub is_empty_element { EMPTY_ELEMENTS->{shift()} }
-sub is_isindex_element { shift eq "isindex" }
-sub is_inside_head { shift->within_element("head") }
-
-sub report_problem
-{
- my $exp = shift;
- my $cod = shift;
- my $loc = shift;
-
- my $str = $exp->recognized_string;
- my $lin = $exp->current_line;
- my $col = $exp->current_column;
- my $off = $exp->current_byte;
-
- # determine position after skipping $loc, e.g. if there is
- #
- # <p lang = "de"
- # xml:lang = "de"
- # class = "a b c d e f g"
- # id = "example"/>
- #
- # the error is the / and it would be unhelpful to point at
- # the < as expat would do in this case.
-
- my $left = substr $str, 0, $loc;
- my $lines = $left =~ y/\n//; # @@ does \n always work?
- $left =~ s/^.*\n//s; # @@ does \n always work?
- my $chars = length $left;
-
- # set new positions
- my $posy = $lin + $lines; # advance pointer
- my $posx = $lines ? $chars : $col + $chars; # advance or replace
- my $posxy = $off + $loc; # advance pointer
-
- my $stext = SEVERITY_NAMES->{CRITERIA->{$cod}->[1]};
- my $mtext = CRITERIA->{$cod}->[2];
- my $cnum = CRITERIA->{$cod}->[0];
- my $gtitle = GUIDELINE_TITLES->{$cnum};
-
- push @MESSAGES, {severity => $stext, line => $posy, column => $posx + 1, cnum => $cnum, message_text => $mtext, guideline_title => $gtitle}
-
-}
-
-
-## Pre-Parsing routines ###################################################
-# make sure we are actually handling XHTML 1.0 documents served as text/html
-# some code taken from W3C Markup Validator Codebase
-
-sub parse_content_type {
- my $Content_Type = shift;
- my ($ct, @others) = split /\s*;\s*/, lc $Content_Type;
- #print p($ct);
- if ($ct ne "text/html") {
- $IS_RELEVANT_CT = 0;
- }
- return $ct;
-}
-
-
-## Handler for XML::Parser ################################################
-
-sub _start
-{
- my $exp = shift;
- my $ele = shift;
- my %att = @_;
- my $str = $exp->recognized_string;
- my $lin = $exp->current_line;
- my $col = $exp->current_column;
- my $off = $exp->current_byte;
- my $end = length($str) - 1;
-
- # check for multiple isindex elements
- if (is_isindex_element($ele) and
- is_inside_head($exp) and
- $ISINDEX++)
- {
- report_problem($exp, APPC_MANY_ISINDEX, 0);
- }
-
- if ($str =~ m|/>$|)
- {
- # check for preceding space in empty element tag
- if ($str !~ m|[ \x0d\x0a\t]/>$|)
- {
- report_problem($exp, APPC_MISSING_SPACE, $end - 1);
- }
-
- # check that empty element tags are used only for
- # elements declared as EMPTY in the DTD
- if (!is_empty_element($ele))
- {
- report_problem($exp, APPC_MINIMIZED, $end - 1);
- }
- }
-
- # check that elements declared as EMPTY use empty element tags
- if (is_empty_element($ele))
- {
- if ($str !~ m|/>$|)
- {
- report_problem($exp, APPC_UNMINIMIZED, $end);
- }
- }
-
- # check for ' in attribute values
- if ($str =~ m|'|)
- {
- local $_ = $str;
- my $len = 0;
-
- while(s/^(.*?)'//)
- {
- $len += length $1;
- report_problem($exp, APPC_APOS_IN_ATTR, $len);
-
- }
- }
-
- # check for <p lang="de">...</p>
- if (exists $att{'lang'} && not exists $att{'xml:lang'})
- {
- report_problem($exp, APPC_ONLY_LANG, $end);
- }
-
- # check for <p xml:lang="de">...</p>
- if (exists $att{'xml:lang'} && not exists $att{'lang'})
- {
- report_problem($exp, APPC_ONLY_XML_LANG, $end);
- }
-}
-
-sub _char
-{
- my $exp = shift;
- my $txt = shift;
- my $str = $exp->recognized_string;
- my $lin = $exp->current_line;
- my $col = $exp->current_column;
- my $off = $exp->current_byte;
-
- # check for ' in parsed character data
- if ($str =~ /'/)
- {
- local $_ = $str;
- my $len = 0;
-
- while(s/^(.*?)'//)
- {
- $len += length $1;
- report_problem($exp, APPC_APOS_IN_ELEM, $len);
-
- }
- }
-}
-
-sub _proc
-{
- # check for XML processing instructions
- report_problem(shift, APPC_FOUND_XML_PI, 0);
-}
-
-sub _xmldecl
-{
- # check for XML declaration
- report_problem(shift, APPC_FOUND_XML_DECL, 0);
-}
-
-sub _doctype
-{
- my $exp = shift;
- my $doctypename = shift;
- my $doctypesys = shift;
- my $doctypepub = shift;
- my $doctypeint = shift;
- if (defined $doctypename) {
- $IS_RELEVANT_DOC = 0 if ($doctypename ne "html");
- }
- if(defined $doctypesys) {
- $_ = $doctypesys;
- $IS_RELEVANT_DOC = 0 if (not /http:\/\/www.w3.org\/TR\/xhtml1\/DTD\/xhtml1-(strict|transitional|frameset).dtd/);
- }
- if (defined $doctypepub) {
- $_ = $doctypepub;
- $IS_RELEVANT_DOC = 0 if (not /-\/\/W3C\/\/DTD XHTML 1.0 (Strict|Transitional|Frameset)\/\/EN/);
- }
- if (defined $doctypeint) # there should be no internal subset
- {
- $IS_RELEVANT_DOC = 0 if (length $doctypeint);
- }
- $IS_RELEVANT_DOC = 0 if ((not defined $doctypesys) and (not defined $doctypepub)); # should not happen with XHTML 1.0
-}
-
## Output routines ########################################################
sub prep_output {
my $output_param = shift;
my $html_output_template_text = CGI::header(). '
-<!DOCTYPE html
- PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" lang="en-US" xml:lang="en-US">
<head>
-<title>Experimental XHTML 1.0 Appendix C checker</title>
-<link rel="stylesheet" type="text/css" href="/unicorn/style/base.css" />
-<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
+<title>W3C HTML Compatibility Checker for HTML</title>
+<style type="text/css" media="all">
+ @import "http://validator.w3.org/style/base.css";
+ p.submit_button { margin-top: 1em; }
+</style>
+
</head>
<body>
-<div id="banner"><h1 id="title"><a href="http://www.w3.org"><img src="http://www.w3.org/Icons/WWW/w3c_home.gif" alt="W3C" /></a> XHTML 1.0 Appendix C checker</h1></div>
-<p id="tagline">Checks XHTML 1.0 documents against HTML compatibility guidelines</p>
+ <div id="banner">
+ <h1 id="title">
+ <a href="http://www.w3.org/"><img alt="W3C" width="110" height="61" id="logo" src="http://validator.w3.org/images/w3c.png" /></a>
+ <a href="./"><span>HTML Compatibility Checker for HTML</span></a>
+ </h1>
+ <p id="tagline">Check XHTML documents against HTML compatibility guidelines</p>
+ </div>
-<form method="get" action="/appc/" enctype="multipart/form-data">
-<div><input name="uri" size="60" <TMPL_IF NAME="uri">value="<TMPL_VAR NAME="uri" ESCAPE="HTML">"</TMPL_IF> /><input type="submit" value="Check" />
+
+
+
+<div id="frontforms">
+<ul id="tabset_tabs">
+ <li class="selected"><a href="#validate-by-uri"><span>Check online</span> document</a></li>
+</ul>
+ <div id="fields">
+
+<fieldset id="validate-by-uri" class="tabset_content front">
+<form method="get" action="">
+<p>
+ <label title="Address of page to Validate" for="uri">Address:</label>
+ <input type="text" name="uri" id="uri" size="60" <TMPL_IF NAME="uri">value="<TMPL_VAR NAME="uri" ESCAPE="HTML">"</TMPL_IF>/>
+</p>
+<p class="submit_button"><input title="Submit for validation" type="submit" value="Check" />
+</p>
+
+
+</fieldset>
+</form>
+</div>
<dl>
<TMPL_LOOP NAME="message_loop">
<dt><strong><TMPL_VAR NAME="severity" ESCAPE="HTML"></strong> Line <TMPL_VAR NAME="line" ESCAPE="HTML"> column <TMPL_VAR NAME="column" ESCAPE="HTML"></dt>
<dd><a href="http://www.w3.org/TR/xhtml1/#C_<TMPL_VAR NAME="cnum" ESCAPE="HTML">"><TMPL_VAR NAME="message_text" ESCAPE="HTML"></a></dd>
</TMPL_LOOP>
-
</dl>
<TMPL_UNLESS NAME="uri"><p>Enter the URI of an XHTML 1.0 document which you would like to check against the
<a href="http://www.w3.org/TR/xhtml1/#guidelines">HTML Compatibility Guidelines</a> .</p></TMPL_UNLESS>
-<TMPL_IF NAME="uri">
-<TMPL_UNLESS NAME="is_wf">
-<p>The document appears to not be well-formed XML. Either it is not XHTML (e.g HTML up to version 4.01) or it has XML well-formedness errors.
-HTML Compatibility guidelines checking does not apply.
+<TMPL_IF NAME="Abort">
+<p>The document was not checked against HTML Compatibility Guidelines.
+Reason: <em><TMPL_VAR NAME="Abort_Message"></em>.
</p>
-<TMPL_IF NAME="wf_errors">
-<p>WF errors: <TMPL_VAR NAME="wf_errors" ESCAPE="HTML"> </p>
-</TMPL_IF>
-</TMPL_UNLESS>
-<TMPL_UNLESS NAME="is_relevant_ct">
-<p>The document is not served with a <code>text/html</code> media type (<TMPL_VAR NAME="content_type" ESCAPE="HTML">). HTML Compatibility guidelines checking does not apply.
-</TMPL_UNLESS>
-<TMPL_UNLESS NAME="is_relevant_doctype">
-<p>The document does not appear to be XHTML 1.0. HTML Compatibility guidelines checking does not apply.<p>
-</TMPL_UNLESS>
</TMPL_IF>
</div>
-</form>
-
- <ul class="navbar" id="menu">
- <li><a href="" title="About this service">About</a> <span class="hideme">|</span></li>
- </ul>
- <p id="activity_logos">
-
- <a href="http://www.w3.org/QA/" title="W3C\'s Quality Assurance Activity, bringing you free Web quality tools and more"><img src="http://www.w3.org/QA/2002/12/qa-small.png" alt="QA" /></a>
- </p>
- <p id="support_logo">
-
- Support this tool, become a<br />
- <a href="http://www.w3.org/Consortium/supporters"><img src="http://www.w3.org/Consortium/supporter-logos/csupporter.png" alt="W3C Supporter" /></a>
- </p>
- <p class="copyright">
- <a rel="Copyright" href="http://www.w3.org/Consortium/Legal/ipr-notice#Copyright">Copyright</a> © 1994-2006
- <a href="http://www.w3.org/"><acronym title="World Wide Web Consortium">W3C</acronym></a>®
-
- (<a href="http://www.csail.mit.edu/"><acronym title="Massachusetts Institute of Technology">MIT</acronym></a>,
- <a href="http://www.ercim.org/"><acronym title="European Research Consortium for Informatics and Mathematics">ERCIM</acronym></a>,
- <a href="http://www.keio.ac.jp/">Keio</a>),
- All Rights Reserved.
- W3C <a href="http://www.w3.org/Consortium/Legal/ipr-notice#Legal_Disclaimer">liability</a>,
- <a href="http://www.w3.org/Consortium/Legal/ipr-notice#W3C_Trademarks">trademark</a>,
- <a rel="Copyright" href="http://www.w3.org/Consortium/Legal/copyright-documents">document use</a>
- and <a rel="Copyright" href="http://www.w3.org/Consortium/Legal/copyright-software">software licensing</a>
-
- rules apply. Your interactions with this site are in accordance
- with our <a href="http://www.w3.org/Consortium/Legal/privacy-statement#Public">public</a> and
- <a href="http://www.w3.org/Consortium/Legal/privacy-statement#Members">Member</a> privacy
- statements.
- </p>
</body>
</html>
';
@@ -470,77 +194,34 @@
$output->param(warning_count => 0);
$output->param(error_count => 0);
-if (defined $uri and length $uri and URI->new($uri)->scheme eq "http")
-{
- $output->param(uri => $uri);
- my $ua = LWP::UserAgent->new;
- my $response = $ua->get($uri);
- my $xml = undef;
- my $ct = undef;
- my @content_type_values = undef;
- if ($response->is_success) {
- $xml = $response->content;
- @content_type_values = $response->header('Content-Type');
- $ct = $content_type_values[0];
- }
- if (defined $ct and length $ct) {
- $ct = &parse_content_type($ct);
- $output->param(content_type => $ct);
+my $compat_parser = W3C::XHTML::HTMLCompatChecker->new();
+my @checker_messages = $compat_parser->check_uri($uri);
+if (exists $checker_messages[0]) {
+ if ($checker_messages[0]{"severity"} eq "Abort"){
+ if ($checker_messages[0]{"message_text"} != "Bad URI") { $output->param(uri => $uri);}
+ $output->param(Abort=>1);
+ $output->param(Abort_Message=>$checker_messages[0]{"message_text"});
}
-
- if (defined $xml and length $xml)
- {
- my $p = XML::Parser->new;
- $p->setHandlers(Doctype => \&_doctype);
-
- eval { $p->parsestring($xml); };
- $output->param(is_relevant_ct => $IS_RELEVANT_CT);
- $output->param(is_relevant_doctype => $IS_RELEVANT_DOC);
-
- if ($@) # not well-formed
- {
- $IS_WF = 0;
- my $wf_errors = join '', $@;
- $output->param(info_count => 1);
- $output->param(wf_errors => $wf_errors);
- }
- elsif ((not $IS_RELEVANT_CT) or (not $IS_RELEVANT_DOC)) # not relevant to this checker
- {
- #pass
- }
- else # woot, Well-formed, and relevant. Let's get to work.
+ else {
+ $output->param(uri => $uri);
+ $output->param(message_loop => \@checker_messages);
+ my (@ERRORS, @WARNINGS, @INFOS);
+ for (my $i=0; $i < scalar @checker_messages; $i++)
{
- my $p = XML::Parser->new;
- $p->setHandlers(Char => \&_char,
- Proc => \&_proc,
- Start => \&_start,
- XMLDecl => \&_xmldecl);
- eval { $p->parsestring($xml); };
- my (@ERRORS, @WARNINGS, @INFOS);
- for (my $i=0; $i < scalar @MESSAGES; $i++)
- {
- if ($MESSAGES[$i]{'severity'} eq "Error") { push @ERRORS, $MESSAGES[$i];}
- elsif ($MESSAGES[$i]{'severity'} eq "Warning") {push @WARNINGS, $MESSAGES[$i];}
- else {push @INFOS, $MESSAGES[$i];}
- }
- $output->param(message_loop => \@MESSAGES);
- if (@ERRORS) {$output->param(passed => 0) } else {$output->param(passed => 1)}
- $output->param(info_loop => \@INFOS);
- $output->param(info_count => scalar @INFOS);
- $output->param(warning_loop => \@WARNINGS);
- $output->param(warning_count => scalar @WARNINGS);
- $output->param(error_loop => \@ERRORS);
- $output->param(error_count => scalar @ERRORS);
+ if ($checker_messages[$i]{'severity'} eq "Error") { push @ERRORS, $checker_messages[$i];}
+ elsif ($checker_messages[$i]{'severity'} eq "Warning") {push @WARNINGS, $checker_messages[$i];}
+ else {push @INFOS, $checker_messages[$i];}
}
- $output->param(is_wf => $IS_WF);
+ if (@ERRORS) {$output->param(passed => 0) } else {$output->param(passed => 1)}
+ $output->param(info_loop => \@INFOS);
+ $output->param(info_count => scalar @INFOS);
+ $output->param(warning_loop => \@WARNINGS);
+ $output->param(warning_count => scalar @WARNINGS);
+ $output->param(error_loop => \@ERRORS);
+ $output->param(error_count => scalar @ERRORS);
}
- else # houston, we had a problem
- {
- # TODO some fault code
- }
-
-}
+} else {$output->param(uri => $uri);}
print $output->output();
Received on Thursday, 12 June 2008 22:14:30 UTC