- From: Mercurial notifier <nobody@w3.org>
- Date: Thu, 05 Aug 2010 14:47:11 +0000
- To: link-checker updates <www-validator-cvs@w3.org>
changeset: 210:ecec50b52a17
user: ville
date: Sat Feb 09 14:49:13 2008 +0000
files: bin/checklink
description:
Review and explicitly list things treated as links.
New cases checked:
* BLOCKQUOTE,DEL,INS,Q: cite
* BODY: background
* FRAME,IFRAME,IMG: longdesc
* HEAD: profile
* EMBED (proprietary): pluginspage, pluginurl.
Cases no longer checked:
* (any unknown element): src, href
* (any element whose src attribute is unknown to me): src
* (any element whose href attribute is unknown to me): href.
Theoretically, we could use %HTML::Tagset::linkElements, but it contains things
that are not necessarily dereferenceable URIs and/or require special treatment
so maintaining our own lists is simpler.
diff -r 8d6b29d9faba -r ecec50b52a17 bin/checklink
--- a/bin/checklink Thu Jan 24 18:19:56 2008 +0000
+++ b/bin/checklink Sat Feb 09 14:49:13 2008 +0000
@@ -5,7 +5,7 @@
# (c) 1999-2007 World Wide Web Consortium
# based on Renaud Bruyeron's checklink.pl
#
-# $Id: checklink,v 4.80 2008-01-24 01:29:38 ot Exp $
+# $Id: checklink,v 4.81 2008-02-09 14:49:13 ville Exp $
#
# This program is licensed under the W3C(r) Software License:
# http://www.w3.org/Consortium/Legal/copyright-software
@@ -195,6 +195,46 @@
use constant MP2 =>
(exists($ENV{MOD_PERL_API_VERSION}) && $ENV{MOD_PERL_API_VERSION} >= 2);
+# Tag=>attribute mapping of things we treat as links.
+# Note: base/@href gets a special treatment, see start() for details.
+use constant LINK_ATTRS => {
+ a => ['href'],
+ area => ['href'],
+ blockquote => ['cite'],
+ body => ['background'],
+ del => ['cite'],
+ embed => ['href', 'pluginspage', 'pluginurl', 'src'], # proprietary
+ frame => ['longdesc', 'src'],
+ iframe => ['longdesc', 'src'],
+ img => ['longdesc', 'src'],
+ ins => ['cite'],
+ input => ['src'],
+ link => ['href'],
+ object => ['data'],
+ q => ['cite'],
+ script => ['src'],
+};
+
+# Tag=>attribute mapping of things we treat as space separeted lists of links.
+use constant LINK_LIST_ATTRS => {
+ head => ['profile'],
+};
+
+# TBD/TODO:
+# - applet/@archive, @code?
+# - applet/@codebase: does not need to be dereferenceable but HTML 4 spec says
+# it may only point to subdirectories of the directory containing the current
+# document
+# - bgsound/@src?
+# - object/@classid?
+# - object/@archive?
+# - object/@codebase: base URI for @classid, @data, @archive
+# - isindex/@action?
+# - layer/@background,@src?
+# - ilayer/@background?
+# - table,tr,td,th/@background?
+# - xmp/@href?
+
@W3C::LinkChecker::ISA = qw(HTML::Parser);
BEGIN
@@ -204,7 +244,7 @@
$PROGRAM = 'W3C-checklink';
$VERSION = '4.3';
$REVISION = sprintf('version %s (c) 1999-2007 W3C', $VERSION);
- my ($cvsver) = q$Revision: 4.80 $ =~ /(\d+[\d\.]*\.\d+)/;
+ my ($cvsver) = q$Revision: 4.81 $ =~ /(\d+[\d\.]*\.\d+)/;
$AGENT = sprintf('%s/%s [%s] %s',
$PROGRAM, $VERSION, $cvsver, LWP::RobotUA->_agent());
@@ -1437,19 +1477,34 @@
# Links
if (!$self->{only_anchors}) {
- # Here, we are checking too many things
- # The right thing to do is to parse the DTD...
+
+ # Special case: base/@href
if ($tag eq 'base') {
# Treat <base> (without href) or <base href=""> as if it didn't exist.
if (defined($attr->{href}) && $attr->{href} ne '') {
$self->{base} = $attr->{href};
}
- } else {
- $self->add_link($attr->{href}, $line);
+ # Note: base/@href intentionally not treated as a dereferenceable link:
+ # http://www.w3.org/mid/200802091439.27764.ville.skytta%40iki.fi
}
- $self->add_link($attr->{src}, $line);
- $self->add_link($attr->{data}, $line) if ($tag eq 'object');
- $self->add_link($attr->{cite}, $line) if ($tag eq 'blockquote');
+
+ # Link attributes:
+ if (my $link_attrs = LINK_ATTRS()->{$tag}) {
+ for my $la (@$link_attrs) {
+ $self->add_link($attr->{$la}, $line);
+ }
+ }
+
+ # List of links attributes:
+ if (my $link_attrs = LINK_LIST_ATTRS()->{$tag}) {
+ for my $la (@$link_attrs) {
+ if (defined(my $value = $attr->{$la})) {
+ for my $link (split(/\s+/, $value)) {
+ $self->add_link($link, $line);
+ }
+ }
+ }
+ }
}
$self->parse_progress($line) if $Opts{Progress};
Received on Thursday, 5 August 2010 14:47:20 UTC