- From: Ville Skytta via cvs-syncmail <cvsmail@w3.org>
- Date: Thu, 25 May 2006 15:21:48 +0000
- To: www-validator-cvs@w3.org
Update of /sources/public/2006/ack
In directory hutz:/tmp/cvs-serv25775
Modified Files:
ack.pl
Log Message:
New checks: system IDs in doctype declarations, P3P policyrefs, PICS-Labels,
Link HTTP headers.
Index: ack.pl
===================================================================
RCS file: /sources/public/2006/ack/ack.pl,v
retrieving revision 1.7
retrieving revision 1.8
diff -u -d -r1.7 -r1.8
--- ack.pl 25 May 2006 13:45:29 -0000 1.7
+++ ack.pl 25 May 2006 15:21:46 -0000 1.8
@@ -21,6 +21,7 @@
use Digest::MD5 qw(md5_hex);
use File::Temp ();
use HTML::Entities qw(encode_entities);
+use HTTP::Headers::Util qw(split_header_words);
use HTML::LinkExtor ();
use LWP::UserAgent ();
use URI ();
@@ -38,12 +39,44 @@
)->get($url, ":content_file" => "$file");
$url = $res->request()->uri()->canonical();
- my $p = HTML::LinkExtor->new(undef, $res->base());
+ my $base = $res->base();
+
+ my @extra_links;
+
+ # P3P policyrefs:
+ for (split_header_words($res->header("P3P"))) {
+ my %p3p = (@$_);
+ push(@extra_links, ["http", "p3p", URI->new_abs($p3p{policyref}, $base)])
+ if $p3p{policyref};
+ }
+
+ # PICS labels:
+ for my $label ($res->header("PICS-Label")) {
+ push(@extra_links, ["http", "pics-label", URI->new_abs($1, $base)])
+ if ($label =~ /^\(PICS-1\.\d+\s+"([^"]+)"/);
+ }
+
+ # Link headers:
+ for my $link ($res->header("Link")) {
+ push(@extra_links, ["http", "link", URI->new_abs($1, $base)])
+ if ($link =~ /^<([^>]+)>/);
+ }
+
+ my $p = HTML::LinkExtor->new(undef, $base);
+
+ # System ID's in doctype declarations:
+ $p->handler(declaration => sub {
+ (my ($self, $tag), local $_) = @_;
+ return unless $_;
+ if (/^<!DOCTYPE\s+html\s+(?:PUBLIC\s+"[^"]*"|SYSTEM)\s+"([^"]+)"/i) {
+ push(@extra_links, [$tag, system => URI->new_abs($1, $base)]);
+ }}, "self, tagname, text");
+
$p->parse_file($file);
- foreach ($p->links()) {
- my ($tag, %foo) = @$_;
- map { $links{$_->canonical()}++ } values(%foo);
+ foreach ($p->links(), @extra_links) {
+ my ($tag, %attrs) = @$_;
+ map { $links{$_->canonical()}++ } values(%attrs);
}
}
Received on Thursday, 25 May 2006 15:21:54 UTC