- From: Ville Skytta via cvs-syncmail <cvsmail@w3.org>
- Date: Sat, 09 Feb 2008 14:49:16 +0000
- To: www-validator-cvs@w3.org
Update of /sources/public/perl/modules/W3C/LinkChecker/bin
In directory hutz:/tmp/cvs-serv28708
Modified Files:
checklink
Log Message:
Review and explicitly list things treated as links.
New cases checked:
* BLOCKQUOTE,DEL,INS,Q: cite
* BODY: background
* FRAME,IFRAME,IMG: longdesc
* HEAD: profile
* EMBED (proprietary): pluginspage, pluginurl.
Cases no longer checked:
* (any unknown element): src, href
* (any element whose src attribute is unknown to me): src
* (any element whose href attribute is unknown to me): href.
Theoretically, we could use %HTML::Tagset::linkElements, but it contains things
that are not necessarily dereferenceable URIs and/or require special treatment
so maintaining our own lists is simpler.
Index: checklink
===================================================================
RCS file: /sources/public/perl/modules/W3C/LinkChecker/bin/checklink,v
retrieving revision 4.80
retrieving revision 4.81
diff -u -d -r4.80 -r4.81
--- checklink 24 Jan 2008 01:29:38 -0000 4.80
+++ checklink 9 Feb 2008 14:49:13 -0000 4.81
@@ -195,6 +195,46 @@
use constant MP2 =>
(exists($ENV{MOD_PERL_API_VERSION}) && $ENV{MOD_PERL_API_VERSION} >= 2);
+# Tag=>attribute mapping of things we treat as links.
+# Note: base/@href gets a special treatment, see start() for details.
+use constant LINK_ATTRS => {
+ a => ['href'],
+ area => ['href'],
+ blockquote => ['cite'],
+ body => ['background'],
+ del => ['cite'],
+ embed => ['href', 'pluginspage', 'pluginurl', 'src'], # proprietary
+ frame => ['longdesc', 'src'],
+ iframe => ['longdesc', 'src'],
+ img => ['longdesc', 'src'],
+ ins => ['cite'],
+ input => ['src'],
+ link => ['href'],
+ object => ['data'],
+ q => ['cite'],
+ script => ['src'],
+};
+
+# Tag=>attribute mapping of things we treat as space separeted lists of links.
+use constant LINK_LIST_ATTRS => {
+ head => ['profile'],
+};
+
+# TBD/TODO:
+# - applet/@archive, @code?
+# - applet/@codebase: does not need to be dereferenceable but HTML 4 spec says
+# it may only point to subdirectories of the directory containing the current
+# document
+# - bgsound/@src?
+# - object/@classid?
+# - object/@archive?
+# - object/@codebase: base URI for @classid, @data, @archive
+# - isindex/@action?
+# - layer/@background,@src?
+# - ilayer/@background?
+# - table,tr,td,th/@background?
+# - xmp/@href?
+
@W3C::LinkChecker::ISA = qw(HTML::Parser);
BEGIN
@@ -1437,19 +1477,34 @@
# Links
if (!$self->{only_anchors}) {
- # Here, we are checking too many things
- # The right thing to do is to parse the DTD...
+
+ # Special case: base/@href
if ($tag eq 'base') {
# Treat <base> (without href) or <base href=""> as if it didn't exist.
if (defined($attr->{href}) && $attr->{href} ne '') {
$self->{base} = $attr->{href};
}
- } else {
- $self->add_link($attr->{href}, $line);
+ # Note: base/@href intentionally not treated as a dereferenceable link:
+ # http://www.w3.org/mid/200802091439.27764.ville.skytta%40iki.fi
+ }
+
+ # Link attributes:
+ if (my $link_attrs = LINK_ATTRS()->{$tag}) {
+ for my $la (@$link_attrs) {
+ $self->add_link($attr->{$la}, $line);
+ }
+ }
+
+ # List of links attributes:
+ if (my $link_attrs = LINK_LIST_ATTRS()->{$tag}) {
+ for my $la (@$link_attrs) {
+ if (defined(my $value = $attr->{$la})) {
+ for my $link (split(/\s+/, $value)) {
+ $self->add_link($link, $line);
+ }
+ }
+ }
}
- $self->add_link($attr->{src}, $line);
- $self->add_link($attr->{data}, $line) if ($tag eq 'object');
- $self->add_link($attr->{cite}, $line) if ($tag eq 'blockquote');
}
$self->parse_progress($line) if $Opts{Progress};
Received on Saturday, 9 February 2008 14:49:25 UTC