- From: Ville Skytta via cvs-syncmail <cvsmail@w3.org>
- Date: Sat, 09 Feb 2008 14:49:16 +0000
- To: www-validator-cvs@w3.org
Update of /sources/public/perl/modules/W3C/LinkChecker/bin In directory hutz:/tmp/cvs-serv28708 Modified Files: checklink Log Message: Review and explicitly list things treated as links. New cases checked: * BLOCKQUOTE,DEL,INS,Q: cite * BODY: background * FRAME,IFRAME,IMG: longdesc * HEAD: profile * EMBED (proprietary): pluginspage, pluginurl. Cases no longer checked: * (any unknown element): src, href * (any element whose src attribute is unknown to me): src * (any element whose href attribute is unknown to me): href. Theoretically, we could use %HTML::Tagset::linkElements, but it contains things that are not necessarily dereferenceable URIs and/or require special treatment so maintaining our own lists is simpler. Index: checklink =================================================================== RCS file: /sources/public/perl/modules/W3C/LinkChecker/bin/checklink,v retrieving revision 4.80 retrieving revision 4.81 diff -u -d -r4.80 -r4.81 --- checklink 24 Jan 2008 01:29:38 -0000 4.80 +++ checklink 9 Feb 2008 14:49:13 -0000 4.81 @@ -195,6 +195,46 @@ use constant MP2 => (exists($ENV{MOD_PERL_API_VERSION}) && $ENV{MOD_PERL_API_VERSION} >= 2); +# Tag=>attribute mapping of things we treat as links. +# Note: base/@href gets a special treatment, see start() for details. +use constant LINK_ATTRS => { + a => ['href'], + area => ['href'], + blockquote => ['cite'], + body => ['background'], + del => ['cite'], + embed => ['href', 'pluginspage', 'pluginurl', 'src'], # proprietary + frame => ['longdesc', 'src'], + iframe => ['longdesc', 'src'], + img => ['longdesc', 'src'], + ins => ['cite'], + input => ['src'], + link => ['href'], + object => ['data'], + q => ['cite'], + script => ['src'], +}; + +# Tag=>attribute mapping of things we treat as space separeted lists of links. +use constant LINK_LIST_ATTRS => { + head => ['profile'], +}; + +# TBD/TODO: +# - applet/@archive, @code? +# - applet/@codebase: does not need to be dereferenceable but HTML 4 spec says +# it may only point to subdirectories of the directory containing the current +# document +# - bgsound/@src? +# - object/@classid? +# - object/@archive? +# - object/@codebase: base URI for @classid, @data, @archive +# - isindex/@action? +# - layer/@background,@src? +# - ilayer/@background? +# - table,tr,td,th/@background? +# - xmp/@href? + @W3C::LinkChecker::ISA = qw(HTML::Parser); BEGIN @@ -1437,19 +1477,34 @@ # Links if (!$self->{only_anchors}) { - # Here, we are checking too many things - # The right thing to do is to parse the DTD... + + # Special case: base/@href if ($tag eq 'base') { # Treat <base> (without href) or <base href=""> as if it didn't exist. if (defined($attr->{href}) && $attr->{href} ne '') { $self->{base} = $attr->{href}; } - } else { - $self->add_link($attr->{href}, $line); + # Note: base/@href intentionally not treated as a dereferenceable link: + # http://www.w3.org/mid/200802091439.27764.ville.skytta%40iki.fi + } + + # Link attributes: + if (my $link_attrs = LINK_ATTRS()->{$tag}) { + for my $la (@$link_attrs) { + $self->add_link($attr->{$la}, $line); + } + } + + # List of links attributes: + if (my $link_attrs = LINK_LIST_ATTRS()->{$tag}) { + for my $la (@$link_attrs) { + if (defined(my $value = $attr->{$la})) { + for my $link (split(/\s+/, $value)) { + $self->add_link($link, $line); + } + } + } } - $self->add_link($attr->{src}, $line); - $self->add_link($attr->{data}, $line) if ($tag eq 'object'); - $self->add_link($attr->{cite}, $line) if ($tag eq 'blockquote'); } $self->parse_progress($line) if $Opts{Progress};
Received on Saturday, 9 February 2008 14:49:25 UTC