- From: Mercurial notifier <nobody@w3.org>
- Date: Thu, 05 Aug 2010 14:47:07 +0000
- To: link-checker updates <www-validator-cvs@w3.org>
changeset: 163:3a6812a7c653 user: ville date: Mon Apr 09 22:18:21 2007 +0000 files: bin/checklink bin/checklink.pod description: Add --exclude option for excluding links matching given regexp (#689). diff -r 2137beef7996 -r 3a6812a7c653 bin/checklink --- a/bin/checklink Fri Mar 30 20:47:55 2007 +0000 +++ b/bin/checklink Mon Apr 09 22:18:21 2007 +0000 @@ -5,7 +5,7 @@ # (c) 1999-2007 World Wide Web Consortium # based on Renaud Bruyeron's checklink.pl # -# $Id: checklink,v 4.51 2007-03-30 20:47:54 ville Exp $ +# $Id: checklink,v 4.52 2007-04-09 22:18:21 ville Exp $ # # This program is licensed under the W3C(r) Software License: # http://www.w3.org/Consortium/Legal/copyright-software @@ -128,7 +128,7 @@ $PROGRAM = 'W3C-checklink'; $VERSION = '4.3'; $REVISION = sprintf('version %s (c) 1999-2007 W3C', $VERSION); - my ($cvsver) = q$Revision: 4.51 $ =~ /(\d+[\d\.]*\.\d+)/; + my ($cvsver) = q$Revision: 4.52 $ =~ /(\d+[\d\.]*\.\d+)/; $AGENT = sprintf('%s/%s [%s] %s', $PROGRAM, $VERSION, $cvsver, LWP::RobotUA->_agent()); @@ -219,6 +219,7 @@ User => undef, Password => undef, Base_Locations => [], + Exclude => undef, Exclude_Docs => undef, Masquerade => 0, Masquerade_From => '', @@ -262,6 +263,10 @@ &parse_arguments() if $Opts{Command_Line}; # Precompile/error-check regular expressions. +if (defined($Opts{Exclude})) { + eval { $Opts{Exclude} = qr/$Opts{Exclude}/o; }; + &usage(1, "Error in exclude regexp: $@") if $@; +} if (defined($Opts{Exclude_Docs})) { eval { $Opts{Exclude_Docs} = qr/$Opts{Exclude_Docs}/o; }; &usage(1, "Error in exclude-docs regexp: $@") if $@; @@ -449,6 +454,7 @@ 'r|recursive' => sub { $Opts{Depth} = -1 if $Opts{Depth} == 0; }, 'l|location=s' => \@locs, + 'X|exclude=s', => \$Opts{Exclude}, 'exclude-docs=s', => \$Opts{Exclude_Docs}, 'u|user=s' => \$Opts{User}, 'p|password=s' => \$Opts{Password}, @@ -537,6 +543,9 @@ times. If not specified, the default eg. for http://www.w3.org/TR/html4/Overview.html would be http://www.w3.org/TR/html4/ + -X, --exclude REGEXP Do not check links whose full, canonical URIs + match REGEXP; also limits recursion the same way + as --exclude-docs with the same regexp would. --exclude-docs REGEXP In recursive mode, do not check links in documents whose full, canonical URIs match REGEXP. -L, --languages LANGS Accept-Language header to send. The special value @@ -692,6 +701,8 @@ - Accept-Language: %s - Sleeping %d second%s between requests to each server EOF + printf("- Excluding links matching %s\n", $Opts{Exclude}) + if defined($Opts{Exclude}); printf("- Excluding links in documents whose URIs match %s\n", $Opts{Exclude_Docs}) if defined($Opts{Exclude_Docs}); } @@ -786,10 +797,10 @@ }; } foreach my $lines (keys %{$p->{Links}{$link}}) { - my $canonical = URI->new($abs_link_uri->canonical()); - my $url = $canonical->scheme().':'.$canonical->opaque(); - my $fragment = $canonical->fragment(); - if (! $fragment) { + my $url = URI->new($abs_link_uri->canonical()); + my $fragment = $url->fragment(undef); + next if (defined($Opts{Exclude}) && $url =~ $Opts{Exclude}); + if (!defined($fragment) || $fragment eq '') { # Document without fragment $links{$url}{location}{$lines} = 1; } else { @@ -992,7 +1003,8 @@ my $candidate = URI->new($uri)->canonical(); return 0 - if (defined($Opts{Exclude_Docs}) && $candidate =~ $Opts{Exclude_Docs}); + if ((defined($Opts{Exclude}) && $candidate =~ $Opts{Exclude}) || + (defined($Opts{Exclude_Docs}) && $candidate =~ $Opts{Exclude_Docs})); foreach my $base (@{$Opts{Base_Locations}}) { my $rel = $candidate->rel($base); diff -r 2137beef7996 -r 3a6812a7c653 bin/checklink.pod --- a/bin/checklink.pod Fri Mar 30 20:47:55 2007 +0000 +++ b/bin/checklink.pod Mon Apr 09 22:18:21 2007 +0000 @@ -1,4 +1,4 @@ -$Id: checklink.pod,v 1.15 2007-03-30 20:47:55 ville Exp $ +$Id: checklink.pod,v 1.16 2007-04-09 22:18:21 ville Exp $ =head1 NAME @@ -67,6 +67,12 @@ default is the base URI of the initial document, for example for L<http://www.w3.org/TR/html4/Overview.html> it would be L<http://www.w3.org/TR/html4/>. + +=item B<-X, --exclude> I<regexp> + +Do not check links whose full, canonical URIs match I<regexp>. Note that +this option limits recursion the same way as B<--exclude-docs> with the same +regular expression would. =item B<--exclude-docs> I<regexp>
Received on Thursday, 5 August 2010 14:47:27 UTC