- From: Ville Skytta via cvs-syncmail <cvsmail@w3.org>
- Date: Thu, 15 Jun 2006 17:53:16 +0000
- To: www-validator-cvs@w3.org
Update of /sources/public/perl/modules/W3C/LinkChecker/bin
In directory hutz:/tmp/cvs-serv30242
Modified Files:
checklink checklink.pod
Log Message:
Allow specifying --location multiple times, make it imply --recursive.
Index: checklink
===================================================================
RCS file: /sources/public/perl/modules/W3C/LinkChecker/bin/checklink,v
retrieving revision 4.34
retrieving revision 4.35
diff -u -d -r4.34 -r4.35
--- checklink 25 May 2006 15:13:22 -0000 4.34
+++ checklink 15 Jun 2006 17:53:14 -0000 4.35
@@ -215,7 +215,7 @@
Max_Documents => 150, # For the online version.
User => undef,
Password => undef,
- Base_Location => '.',
+ Base_Locations => [],
Exclude_Docs => undef,
Masquerade => 0,
Masquerade_From => '',
@@ -423,6 +423,7 @@
Getopt::Long->import('GetOptions');
Getopt::Long::Configure('bundling', 'no_ignore_case');
my $masq = '';
+ my @locs = ();
GetOptions('help|h|?' => sub { usage(0) },
'q|quiet' => sub { $Opts{Quiet} = 1;
@@ -438,7 +439,7 @@
'H|html' => \$Opts{HTML},
'r|recursive' => sub { $Opts{Depth} = -1
if $Opts{Depth} == 0; },
- 'l|location=s' => \$Opts{Base_Location},
+ 'l|location=s' => \@locs,
'exclude-docs=s', => \$Opts{Exclude_Docs},
'u|user=s' => \$Opts{User},
'p|password=s' => \$Opts{Password},
@@ -486,6 +487,10 @@
warn("*** Warning: minimum allowed sleep time is 1 second, resetting.\n");
$Opts{Sleep_Time} = 1;
}
+
+ push(@{$Opts{Base_Locations}}, map { URI->new($_)->canonical() } @locs);
+
+ $Opts{Depth} = -1 if ($Opts{Depth} == 0 && @locs);
}
sub version ()
@@ -516,10 +521,11 @@
-r, --recursive Check the documents linked from the first one.
-D, --depth N Check the documents linked from the first one to
depth N (implies --recursive).
- -l, --location URI Scope of the documents checked in recursive mode.
- By default, for example for
+ -l, --location URI Scope of the documents checked in recursive mode
+ (implies --recursive). Can be specified multiple
+ times. If not specified, the default eg. for
http://www.w3.org/TR/html4/Overview.html
- it would be http://www.w3.org/TR/html4/
+ would be http://www.w3.org/TR/html4/
--exclude-docs REGEXP In recursive mode, do not check links in documents
whose URIs match REGEXP.
-L, --languages LANGS Accept-Language header to send. The special value
@@ -642,9 +648,8 @@
if ($first) {
# Use the first URI as the recursion base unless specified otherwise.
- $Opts{Base_Location} = ($Opts{Base_Location} eq '.')
- ? $response->{absolute_uri}->canonical() :
- URI->new($Opts{Base_Location})->canonical();
+ push(@{$Opts{Base_Locations}}, $response->{absolute_uri}->canonical())
+ unless @{$Opts{Base_Locations}};
} else {
# Before fetching the document, we don't know if we'll be within the
# recursion scope or not (think redirects).
@@ -964,14 +969,19 @@
my ($uri) = @_;
return undef unless $uri;
- my $current = URI->new($uri)->canonical();
- my $rel = $current->rel($Opts{Base_Location}); # base -> current!
+ my $candidate = URI->new($uri)->canonical();
- return undef if ($current eq $rel); # Relative path not possible?
- return undef if ($rel =~ m|^(\.\.)?/|); # Relative path starts with ../ or /?
- return undef if (defined($Opts{Exclude_Docs}) &&
- $current =~ $Opts{Exclude_Docs});
- return 1;
+ return undef
+ if (defined($Opts{Exclude_Docs}) && $candidate =~ $Opts{Exclude_Docs});
+
+ foreach my $base (@{$Opts{Base_Locations}}) {
+ my $rel = $candidate->rel($base);
+ next if ($candidate eq $rel); # Relative path not possible?
+ next if ($rel =~ m|^(\.\.)?/|); # Relative path upwards?
+ return 1;
+ }
+
+ return undef; # We always have at least one base location.
}
##################################################
Index: checklink.pod
===================================================================
RCS file: /sources/public/perl/modules/W3C/LinkChecker/bin/checklink.pod,v
retrieving revision 1.13
retrieving revision 1.14
diff -u -d -r1.13 -r1.14
--- checklink.pod 7 Nov 2004 11:17:55 -0000 1.13
+++ checklink.pod 15 Jun 2006 17:53:14 -0000 1.14
@@ -60,8 +60,12 @@
=item B<-l, --location> I<uri>
-Scope of the documents checked in recursive mode. By default, for
-L<http://www.w3.org/TR/html4/Overview.html> for example, it would be
+Scope of the documents checked (implies B<--recursive>).
+Can be specified multiple times in order to specify multiple recursion
+bases. If the URI of a candidate document is downwards relative to any of
+the bases, it is considered to be within the scope. If not specified, the
+default is the base URI of the initial document, for example for
+L<http://www.w3.org/TR/html4/Overview.html> it would be
L<http://www.w3.org/TR/html4/>.
=item B<--exclude-docs> I<regexp>
Received on Thursday, 15 June 2006 17:53:26 UTC