- From: Ville Skytta via cvs-syncmail <cvsmail@w3.org>
- Date: Mon, 09 Apr 2007 22:18:24 +0000
- To: www-validator-cvs@w3.org
Update of /sources/public/perl/modules/W3C/LinkChecker/bin
In directory hutz:/tmp/cvs-serv1201/bin
Modified Files:
checklink checklink.pod
Log Message:
Add --exclude option for excluding links matching given regexp (#689).
Index: checklink
===================================================================
RCS file: /sources/public/perl/modules/W3C/LinkChecker/bin/checklink,v
retrieving revision 4.51
retrieving revision 4.52
diff -u -d -r4.51 -r4.52
--- checklink 30 Mar 2007 20:47:54 -0000 4.51
+++ checklink 9 Apr 2007 22:18:21 -0000 4.52
@@ -219,6 +219,7 @@
User => undef,
Password => undef,
Base_Locations => [],
+ Exclude => undef,
Exclude_Docs => undef,
Masquerade => 0,
Masquerade_From => '',
@@ -262,6 +263,10 @@
&parse_arguments() if $Opts{Command_Line};
# Precompile/error-check regular expressions.
+if (defined($Opts{Exclude})) {
+ eval { $Opts{Exclude} = qr/$Opts{Exclude}/o; };
+ &usage(1, "Error in exclude regexp: $@") if $@;
+}
if (defined($Opts{Exclude_Docs})) {
eval { $Opts{Exclude_Docs} = qr/$Opts{Exclude_Docs}/o; };
&usage(1, "Error in exclude-docs regexp: $@") if $@;
@@ -449,6 +454,7 @@
'r|recursive' => sub { $Opts{Depth} = -1
if $Opts{Depth} == 0; },
'l|location=s' => \@locs,
+ 'X|exclude=s', => \$Opts{Exclude},
'exclude-docs=s', => \$Opts{Exclude_Docs},
'u|user=s' => \$Opts{User},
'p|password=s' => \$Opts{Password},
@@ -537,6 +543,9 @@
times. If not specified, the default eg. for
http://www.w3.org/TR/html4/Overview.html
would be http://www.w3.org/TR/html4/
+ -X, --exclude REGEXP Do not check links whose full, canonical URIs
+ match REGEXP; also limits recursion the same way
+ as --exclude-docs with the same regexp would.
--exclude-docs REGEXP In recursive mode, do not check links in documents
whose full, canonical URIs match REGEXP.
-L, --languages LANGS Accept-Language header to send. The special value
@@ -692,6 +701,8 @@
- Accept-Language: %s
- Sleeping %d second%s between requests to each server
EOF
+ printf("- Excluding links matching %s\n", $Opts{Exclude})
+ if defined($Opts{Exclude});
printf("- Excluding links in documents whose URIs match %s\n",
$Opts{Exclude_Docs}) if defined($Opts{Exclude_Docs});
}
@@ -786,10 +797,10 @@
};
}
foreach my $lines (keys %{$p->{Links}{$link}}) {
- my $canonical = URI->new($abs_link_uri->canonical());
- my $url = $canonical->scheme().':'.$canonical->opaque();
- my $fragment = $canonical->fragment();
- if (! $fragment) {
+ my $url = URI->new($abs_link_uri->canonical());
+ my $fragment = $url->fragment(undef);
+ next if (defined($Opts{Exclude}) && $url =~ $Opts{Exclude});
+ if (!defined($fragment) || $fragment eq '') {
# Document without fragment
$links{$url}{location}{$lines} = 1;
} else {
@@ -992,7 +1003,8 @@
my $candidate = URI->new($uri)->canonical();
return 0
- if (defined($Opts{Exclude_Docs}) && $candidate =~ $Opts{Exclude_Docs});
+ if ((defined($Opts{Exclude}) && $candidate =~ $Opts{Exclude}) ||
+ (defined($Opts{Exclude_Docs}) && $candidate =~ $Opts{Exclude_Docs}));
foreach my $base (@{$Opts{Base_Locations}}) {
my $rel = $candidate->rel($base);
Index: checklink.pod
===================================================================
RCS file: /sources/public/perl/modules/W3C/LinkChecker/bin/checklink.pod,v
retrieving revision 1.15
retrieving revision 1.16
diff -u -d -r1.15 -r1.16
--- checklink.pod 30 Mar 2007 20:47:55 -0000 1.15
+++ checklink.pod 9 Apr 2007 22:18:21 -0000 1.16
@@ -68,6 +68,12 @@
L<http://www.w3.org/TR/html4/Overview.html> it would be
L<http://www.w3.org/TR/html4/>.
+=item B<-X, --exclude> I<regexp>
+
+Do not check links whose full, canonical URIs match I<regexp>. Note that
+this option limits recursion the same way as B<--exclude-docs> with the same
+regular expression would.
+
=item B<--exclude-docs> I<regexp>
In recursive mode, do not check links in documents whose full, canonical
Received on Monday, 9 April 2007 22:18:26 UTC