link-checker commit: User bug report (hash key was Suppress..., shourd have been Suppress...)

changeset:   319:def35ad5f837
user:        mernst
date:        Sat Jul 04 03:45:57 2009 +0000
files:       bin/checklink
description:
User bug report (hash key was Suppress..., shourd have been Suppress...)
led me to rewrites, shorten, & optimize redirect checking.


diff -r 2aa66ff72527 -r def35ad5f837 bin/checklink
--- a/bin/checklink	Mon Jun 29 20:14:38 2009 +0000
+++ b/bin/checklink	Sat Jul 04 03:45:57 2009 +0000
@@ -5,7 +5,7 @@
 # (c) 1999-2009 World Wide Web Consortium
 # based on Renaud Bruyeron's checklink.pl
 #
-# $Id: checklink,v 4.161 2009-06-29 20:14:38 mernst Exp $
+# $Id: checklink,v 4.162 2009-07-04 03:45:57 mernst Exp $
 #
 # This program is licensed under the W3C(r) Software License:
 #       http://www.w3.org/Consortium/Legal/copyright-software
@@ -275,7 +275,7 @@
   $PROGRAM     = 'W3C-checklink';
   $VERSION     = '4.5';
   $REVISION    = sprintf('version %s (c) 1999-2009 W3C', $VERSION);
-  my ($cvsver) = q$Revision: 4.161 $ =~ /(\d+[\d\.]*\.\d+)/;
+  my ($cvsver) = q$Revision: 4.162 $ =~ /(\d+[\d\.]*\.\d+)/;
   $AGENT       = sprintf('%s/%s [%s] %s',
                          $PROGRAM, $VERSION, $cvsver,
                          (W3C::UserAgent::USE_ROBOT_UA
@@ -395,6 +395,7 @@
     Exclude_Docs      => undef,
     Suppress_Redirect => [],
     Suppress_Redirect_Prefix => [],
+    Suppress_Redirect_Regexp => [],
     Suppress_Broken   => [],
     Suppress_Fragment => [],
     Masquerade        => 0,
@@ -701,9 +702,11 @@
     }
   }
   for my $srp_arg (@{$Opts{Suppress_Redirect_Prefix}}) {
-    if ($srp_arg !~ /.->./) {
+    if ($srp_arg !~ /^(.*)->(.*)$/) {
       &usage(1, "Bad suppress-redirect-prefix argument, should contain \"->\": $srp_arg");
     }
+    # Turn prefixes into a regexp.
+    push @{$Opts{Suppress_Redirect_Regexp}}, qr/^\Q$1\E(.*)->\Q$2\E\1$/ism;
   }
   for my $sb_arg (@{$Opts{Suppress_Broken}}) {
     if ($sb_arg !~ /^(-1|[0-9]+):./) {
@@ -1932,37 +1935,12 @@
     my $to = $response->request()->url(); # same on every loop iteration
     my $from_to = $from . '->' . $to;
     my $match = grep { $_ eq $from_to } @{$Opts{Suppress_Redirect}};
-    # print STDERR "Result $match of checking $from_to\n";
+    # print STDERR "Result $match of redirect checking $from_to\n";
     if ($match) { next; }
 
-    # Check for redirect_prefix match
-    my $prefix_match = 0;
-    my $from_len = length($from);
-    my $to_len = length($to);
-    for my $redir_prefix (@{$Opts{Suppresss_Redirect_Prefix}}) {
-      if ($redir_prefix !~ /^(.*)->(.*)$/) {
-        die "Bad suppress-redirect-prefix: $redir_prefix";
-      }
-      my $from_prefix = $1;
-      my $to_prefix = $2;
-      my $from_prefix_len = length($from_prefix);
-      my $to_prefix_len = length($to_prefix);
-      if (($from eq $from_prefix) && ($to eq $to_prefix)) {
-        $prefix_match = 1;
-        last;
-      } elsif (($from_prefix_len < $from_len)
-                 && ($to_prefix_len < $to_len)
-                 && ($from_prefix eq substr($from, 0, $from_prefix_len))
-                 && ($to_prefix eq substr($to, 0, $to_prefix_len))
-                 && (substr($from, $from_prefix_len) eq substr($to, $to_prefix_len))) {
-        $prefix_match = 1;
-        last;
-      }
-    }
-    if ($prefix_match) {
-      # print STDERR "A SUPPRESSED REDIRECT:\n  $from\n  $to\n";
-      next;
-    }
+    $match = grep { $from_to =~ /$_/ } @{$Opts{Suppress_Redirect_Regexp}};
+    # print STDERR "Result $match of regexp checking $from_to\n";
+    if ($match) { next; }
 
     $redirects->{$prev->request()->url()} = $response->request()->url();
   }

Received on Thursday, 5 August 2010 14:47:40 UTC