- From: Mercurial notifier <nobody@w3.org>
- Date: Thu, 05 Aug 2010 14:47:05 +0000
- To: link-checker updates <www-validator-cvs@w3.org>
changeset: 144:92ac276d7eab user: ville date: Thu Jun 15 17:53:14 2006 +0000 files: bin/checklink bin/checklink.pod description: Allow specifying --location multiple times, make it imply --recursive. diff -r 2f530988cd47 -r 92ac276d7eab bin/checklink --- a/bin/checklink Thu May 25 15:13:22 2006 +0000 +++ b/bin/checklink Thu Jun 15 17:53:14 2006 +0000 @@ -5,7 +5,7 @@ # (c) 1999-2006 World Wide Web Consortium # based on Renaud Bruyeron's checklink.pl # -# $Id: checklink,v 4.34 2006-05-25 15:13:22 ville Exp $ +# $Id: checklink,v 4.35 2006-06-15 17:53:14 ville Exp $ # # This program is licensed under the W3C(r) Software License: # http://www.w3.org/Consortium/Legal/copyright-software @@ -125,7 +125,7 @@ $PROGRAM = 'W3C-checklink'; $VERSION = '4.2.1'; $REVISION = sprintf('version %s (c) 1999-2006 W3C', $VERSION); - my ($cvsver) = q$Revision: 4.34 $ =~ /(\d+[\d\.]*\.\d+)/; + my ($cvsver) = q$Revision: 4.35 $ =~ /(\d+[\d\.]*\.\d+)/; $AGENT = sprintf('%s/%s [%s] %s', $PROGRAM, $VERSION, $cvsver, LWP::RobotUA->_agent()); @@ -215,7 +215,7 @@ Max_Documents => 150, # For the online version. User => undef, Password => undef, - Base_Location => '.', + Base_Locations => [], Exclude_Docs => undef, Masquerade => 0, Masquerade_From => '', @@ -423,6 +423,7 @@ Getopt::Long->import('GetOptions'); Getopt::Long::Configure('bundling', 'no_ignore_case'); my $masq = ''; + my @locs = (); GetOptions('help|h|?' => sub { usage(0) }, 'q|quiet' => sub { $Opts{Quiet} = 1; @@ -438,7 +439,7 @@ 'H|html' => \$Opts{HTML}, 'r|recursive' => sub { $Opts{Depth} = -1 if $Opts{Depth} == 0; }, - 'l|location=s' => \$Opts{Base_Location}, + 'l|location=s' => \@locs, 'exclude-docs=s', => \$Opts{Exclude_Docs}, 'u|user=s' => \$Opts{User}, 'p|password=s' => \$Opts{Password}, @@ -486,6 +487,10 @@ warn("*** Warning: minimum allowed sleep time is 1 second, resetting.\n"); $Opts{Sleep_Time} = 1; } + + push(@{$Opts{Base_Locations}}, map { URI->new($_)->canonical() } @locs); + + $Opts{Depth} = -1 if ($Opts{Depth} == 0 && @locs); } sub version () @@ -516,10 +521,11 @@ -r, --recursive Check the documents linked from the first one. -D, --depth N Check the documents linked from the first one to depth N (implies --recursive). - -l, --location URI Scope of the documents checked in recursive mode. - By default, for example for + -l, --location URI Scope of the documents checked in recursive mode + (implies --recursive). Can be specified multiple + times. If not specified, the default eg. for http://www.w3.org/TR/html4/Overview.html - it would be http://www.w3.org/TR/html4/ + would be http://www.w3.org/TR/html4/ --exclude-docs REGEXP In recursive mode, do not check links in documents whose URIs match REGEXP. -L, --languages LANGS Accept-Language header to send. The special value @@ -642,9 +648,8 @@ if ($first) { # Use the first URI as the recursion base unless specified otherwise. - $Opts{Base_Location} = ($Opts{Base_Location} eq '.') - ? $response->{absolute_uri}->canonical() : - URI->new($Opts{Base_Location})->canonical(); + push(@{$Opts{Base_Locations}}, $response->{absolute_uri}->canonical()) + unless @{$Opts{Base_Locations}}; } else { # Before fetching the document, we don't know if we'll be within the # recursion scope or not (think redirects). @@ -964,14 +969,19 @@ my ($uri) = @_; return undef unless $uri; - my $current = URI->new($uri)->canonical(); - my $rel = $current->rel($Opts{Base_Location}); # base -> current! + my $candidate = URI->new($uri)->canonical(); - return undef if ($current eq $rel); # Relative path not possible? - return undef if ($rel =~ m|^(\.\.)?/|); # Relative path starts with ../ or /? - return undef if (defined($Opts{Exclude_Docs}) && - $current =~ $Opts{Exclude_Docs}); - return 1; + return undef + if (defined($Opts{Exclude_Docs}) && $candidate =~ $Opts{Exclude_Docs}); + + foreach my $base (@{$Opts{Base_Locations}}) { + my $rel = $candidate->rel($base); + next if ($candidate eq $rel); # Relative path not possible? + next if ($rel =~ m|^(\.\.)?/|); # Relative path upwards? + return 1; + } + + return undef; # We always have at least one base location. } ################################################## diff -r 2f530988cd47 -r 92ac276d7eab bin/checklink.pod --- a/bin/checklink.pod Thu May 25 15:13:22 2006 +0000 +++ b/bin/checklink.pod Thu Jun 15 17:53:14 2006 +0000 @@ -1,4 +1,4 @@ -$Id: checklink.pod,v 1.13 2004-11-07 11:17:55 ville Exp $ +$Id: checklink.pod,v 1.14 2006-06-15 17:53:14 ville Exp $ =head1 NAME @@ -60,8 +60,12 @@ =item B<-l, --location> I<uri> -Scope of the documents checked in recursive mode. By default, for -L<http://www.w3.org/TR/html4/Overview.html> for example, it would be +Scope of the documents checked (implies B<--recursive>). +Can be specified multiple times in order to specify multiple recursion +bases. If the URI of a candidate document is downwards relative to any of +the bases, it is considered to be within the scope. If not specified, the +default is the base URI of the initial document, for example for +L<http://www.w3.org/TR/html4/Overview.html> it would be L<http://www.w3.org/TR/html4/>. =item B<--exclude-docs> I<regexp>
Received on Thursday, 5 August 2010 14:47:24 UTC