- From: Ville Skytta via cvs-syncmail <cvsmail@w3.org>
- Date: Mon, 09 Apr 2007 22:18:24 +0000
- To: www-validator-cvs@w3.org
Update of /sources/public/perl/modules/W3C/LinkChecker/bin In directory hutz:/tmp/cvs-serv1201/bin Modified Files: checklink checklink.pod Log Message: Add --exclude option for excluding links matching given regexp (#689). Index: checklink =================================================================== RCS file: /sources/public/perl/modules/W3C/LinkChecker/bin/checklink,v retrieving revision 4.51 retrieving revision 4.52 diff -u -d -r4.51 -r4.52 --- checklink 30 Mar 2007 20:47:54 -0000 4.51 +++ checklink 9 Apr 2007 22:18:21 -0000 4.52 @@ -219,6 +219,7 @@ User => undef, Password => undef, Base_Locations => [], + Exclude => undef, Exclude_Docs => undef, Masquerade => 0, Masquerade_From => '', @@ -262,6 +263,10 @@ &parse_arguments() if $Opts{Command_Line}; # Precompile/error-check regular expressions. +if (defined($Opts{Exclude})) { + eval { $Opts{Exclude} = qr/$Opts{Exclude}/o; }; + &usage(1, "Error in exclude regexp: $@") if $@; +} if (defined($Opts{Exclude_Docs})) { eval { $Opts{Exclude_Docs} = qr/$Opts{Exclude_Docs}/o; }; &usage(1, "Error in exclude-docs regexp: $@") if $@; @@ -449,6 +454,7 @@ 'r|recursive' => sub { $Opts{Depth} = -1 if $Opts{Depth} == 0; }, 'l|location=s' => \@locs, + 'X|exclude=s', => \$Opts{Exclude}, 'exclude-docs=s', => \$Opts{Exclude_Docs}, 'u|user=s' => \$Opts{User}, 'p|password=s' => \$Opts{Password}, @@ -537,6 +543,9 @@ times. If not specified, the default eg. for http://www.w3.org/TR/html4/Overview.html would be http://www.w3.org/TR/html4/ + -X, --exclude REGEXP Do not check links whose full, canonical URIs + match REGEXP; also limits recursion the same way + as --exclude-docs with the same regexp would. --exclude-docs REGEXP In recursive mode, do not check links in documents whose full, canonical URIs match REGEXP. -L, --languages LANGS Accept-Language header to send. The special value @@ -692,6 +701,8 @@ - Accept-Language: %s - Sleeping %d second%s between requests to each server EOF + printf("- Excluding links matching %s\n", $Opts{Exclude}) + if defined($Opts{Exclude}); printf("- Excluding links in documents whose URIs match %s\n", $Opts{Exclude_Docs}) if defined($Opts{Exclude_Docs}); } @@ -786,10 +797,10 @@ }; } foreach my $lines (keys %{$p->{Links}{$link}}) { - my $canonical = URI->new($abs_link_uri->canonical()); - my $url = $canonical->scheme().':'.$canonical->opaque(); - my $fragment = $canonical->fragment(); - if (! $fragment) { + my $url = URI->new($abs_link_uri->canonical()); + my $fragment = $url->fragment(undef); + next if (defined($Opts{Exclude}) && $url =~ $Opts{Exclude}); + if (!defined($fragment) || $fragment eq '') { # Document without fragment $links{$url}{location}{$lines} = 1; } else { @@ -992,7 +1003,8 @@ my $candidate = URI->new($uri)->canonical(); return 0 - if (defined($Opts{Exclude_Docs}) && $candidate =~ $Opts{Exclude_Docs}); + if ((defined($Opts{Exclude}) && $candidate =~ $Opts{Exclude}) || + (defined($Opts{Exclude_Docs}) && $candidate =~ $Opts{Exclude_Docs})); foreach my $base (@{$Opts{Base_Locations}}) { my $rel = $candidate->rel($base); Index: checklink.pod =================================================================== RCS file: /sources/public/perl/modules/W3C/LinkChecker/bin/checklink.pod,v retrieving revision 1.15 retrieving revision 1.16 diff -u -d -r1.15 -r1.16 --- checklink.pod 30 Mar 2007 20:47:55 -0000 1.15 +++ checklink.pod 9 Apr 2007 22:18:21 -0000 1.16 @@ -68,6 +68,12 @@ L<http://www.w3.org/TR/html4/Overview.html> it would be L<http://www.w3.org/TR/html4/>. +=item B<-X, --exclude> I<regexp> + +Do not check links whose full, canonical URIs match I<regexp>. Note that +this option limits recursion the same way as B<--exclude-docs> with the same +regular expression would. + =item B<--exclude-docs> I<regexp> In recursive mode, do not check links in documents whose full, canonical
Received on Monday, 9 April 2007 22:18:26 UTC