W3C home > Mailing lists > Public > www-validator-cvs@w3.org > April 2007

perl/modules/W3C/LinkChecker/bin checklink,4.51,4.52 checklink.pod,1.15,1.16

From: Ville Skytta via cvs-syncmail <cvsmail@w3.org>
Date: Mon, 09 Apr 2007 22:18:24 +0000
To: www-validator-cvs@w3.org
Message-Id: <E1Hb2Bs-0000Jf-Bf@lionel-hutz.w3.org>

Update of /sources/public/perl/modules/W3C/LinkChecker/bin
In directory hutz:/tmp/cvs-serv1201/bin

Modified Files:
	checklink checklink.pod 
Log Message:
Add --exclude option for excluding links matching given regexp (#689).

Index: checklink
===================================================================
RCS file: /sources/public/perl/modules/W3C/LinkChecker/bin/checklink,v
retrieving revision 4.51
retrieving revision 4.52
diff -u -d -r4.51 -r4.52
--- checklink	30 Mar 2007 20:47:54 -0000	4.51
+++ checklink	9 Apr 2007 22:18:21 -0000	4.52
@@ -219,6 +219,7 @@
     User              => undef,
     Password          => undef,
     Base_Locations    => [],
+    Exclude           => undef,
     Exclude_Docs      => undef,
     Masquerade        => 0,
     Masquerade_From   => '',
@@ -262,6 +263,10 @@
 &parse_arguments() if $Opts{Command_Line};
 
 # Precompile/error-check regular expressions.
+if (defined($Opts{Exclude})) {
+  eval { $Opts{Exclude} = qr/$Opts{Exclude}/o; };
+  &usage(1, "Error in exclude regexp: $@") if $@;
+}
 if (defined($Opts{Exclude_Docs})) {
   eval { $Opts{Exclude_Docs} = qr/$Opts{Exclude_Docs}/o; };
   &usage(1, "Error in exclude-docs regexp: $@") if $@;
@@ -449,6 +454,7 @@
              'r|recursive'     => sub { $Opts{Depth} = -1
                                           if $Opts{Depth} == 0; },
              'l|location=s'    => \@locs,
+             'X|exclude=s',    => \$Opts{Exclude},
              'exclude-docs=s', => \$Opts{Exclude_Docs},
              'u|user=s'        => \$Opts{User},
              'p|password=s'    => \$Opts{Password},
@@ -537,6 +543,9 @@
                             times.  If not specified, the default eg. for
                             http://www.w3.org/TR/html4/Overview.html
                             would be http://www.w3.org/TR/html4/
+ -X, --exclude REGEXP       Do not check links whose full, canonical URIs
+                            match REGEXP; also limits recursion the same way
+                            as --exclude-docs with the same regexp would.
  --exclude-docs REGEXP      In recursive mode, do not check links in documents
                             whose full, canonical URIs match REGEXP.
  -L, --languages LANGS      Accept-Language header to send.  The special value
@@ -692,6 +701,8 @@
 - Accept-Language: %s
 - Sleeping %d second%s between requests to each server
 EOF
+    printf("- Excluding links matching %s\n", $Opts{Exclude})
+      if defined($Opts{Exclude});
     printf("- Excluding links in documents whose URIs match %s\n",
       $Opts{Exclude_Docs}) if defined($Opts{Exclude_Docs});
   }
@@ -786,10 +797,10 @@
       };
     }
     foreach my $lines (keys %{$p->{Links}{$link}}) {
-      my $canonical = URI->new($abs_link_uri->canonical());
-      my $url = $canonical->scheme().':'.$canonical->opaque();
-      my $fragment = $canonical->fragment();
-      if (! $fragment) {
+      my $url = URI->new($abs_link_uri->canonical());
+      my $fragment = $url->fragment(undef);
+      next if (defined($Opts{Exclude}) && $url =~ $Opts{Exclude});
+      if (!defined($fragment) || $fragment eq '') {
         # Document without fragment
         $links{$url}{location}{$lines} = 1;
       } else {
@@ -992,7 +1003,8 @@
   my $candidate = URI->new($uri)->canonical();
 
   return 0
-    if (defined($Opts{Exclude_Docs}) && $candidate =~ $Opts{Exclude_Docs});
+      if ((defined($Opts{Exclude}) && $candidate =~ $Opts{Exclude}) ||
+          (defined($Opts{Exclude_Docs}) && $candidate =~ $Opts{Exclude_Docs}));
 
   foreach my $base (@{$Opts{Base_Locations}}) {
     my $rel = $candidate->rel($base);

Index: checklink.pod
===================================================================
RCS file: /sources/public/perl/modules/W3C/LinkChecker/bin/checklink.pod,v
retrieving revision 1.15
retrieving revision 1.16
diff -u -d -r1.15 -r1.16
--- checklink.pod	30 Mar 2007 20:47:55 -0000	1.15
+++ checklink.pod	9 Apr 2007 22:18:21 -0000	1.16
@@ -68,6 +68,12 @@
 L<http://www.w3.org/TR/html4/Overview.html> it would be
 L<http://www.w3.org/TR/html4/>.
 
+=item B<-X, --exclude> I<regexp>
+
+Do not check links whose full, canonical URIs match I<regexp>.  Note that
+this option limits recursion the same way as B<--exclude-docs> with the same
+regular expression would.
+
 =item B<--exclude-docs> I<regexp>
 
 In recursive mode, do not check links in documents whose full, canonical
Received on Monday, 9 April 2007 22:18:26 GMT

This archive was generated by hypermail 2.2.0+W3C-0.50 : Thursday, 26 April 2012 12:54:56 GMT