- From: Ville Skytta via cvs-syncmail <cvsmail@w3.org>
- Date: Thu, 25 May 2006 15:21:48 +0000
- To: www-validator-cvs@w3.org
Update of /sources/public/2006/ack In directory hutz:/tmp/cvs-serv25775 Modified Files: ack.pl Log Message: New checks: system IDs in doctype declarations, P3P policyrefs, PICS-Labels, Link HTTP headers. Index: ack.pl =================================================================== RCS file: /sources/public/2006/ack/ack.pl,v retrieving revision 1.7 retrieving revision 1.8 diff -u -d -r1.7 -r1.8 --- ack.pl 25 May 2006 13:45:29 -0000 1.7 +++ ack.pl 25 May 2006 15:21:46 -0000 1.8 @@ -21,6 +21,7 @@ use Digest::MD5 qw(md5_hex); use File::Temp (); use HTML::Entities qw(encode_entities); +use HTTP::Headers::Util qw(split_header_words); use HTML::LinkExtor (); use LWP::UserAgent (); use URI (); @@ -38,12 +39,44 @@ )->get($url, ":content_file" => "$file"); $url = $res->request()->uri()->canonical(); - my $p = HTML::LinkExtor->new(undef, $res->base()); + my $base = $res->base(); + + my @extra_links; + + # P3P policyrefs: + for (split_header_words($res->header("P3P"))) { + my %p3p = (@$_); + push(@extra_links, ["http", "p3p", URI->new_abs($p3p{policyref}, $base)]) + if $p3p{policyref}; + } + + # PICS labels: + for my $label ($res->header("PICS-Label")) { + push(@extra_links, ["http", "pics-label", URI->new_abs($1, $base)]) + if ($label =~ /^\(PICS-1\.\d+\s+"([^"]+)"/); + } + + # Link headers: + for my $link ($res->header("Link")) { + push(@extra_links, ["http", "link", URI->new_abs($1, $base)]) + if ($link =~ /^<([^>]+)>/); + } + + my $p = HTML::LinkExtor->new(undef, $base); + + # System ID's in doctype declarations: + $p->handler(declaration => sub { + (my ($self, $tag), local $_) = @_; + return unless $_; + if (/^<!DOCTYPE\s+html\s+(?:PUBLIC\s+"[^"]*"|SYSTEM)\s+"([^"]+)"/i) { + push(@extra_links, [$tag, system => URI->new_abs($1, $base)]); + }}, "self, tagname, text"); + $p->parse_file($file); - foreach ($p->links()) { - my ($tag, %foo) = @$_; - map { $links{$_->canonical()}++ } values(%foo); + foreach ($p->links(), @extra_links) { + my ($tag, %attrs) = @$_; + map { $links{$_->canonical()}++ } values(%attrs); } }
Received on Thursday, 25 May 2006 15:21:54 UTC