- From: Mercurial notifier <nobody@w3.org>
- Date: Thu, 05 Aug 2010 14:47:22 +0000
- To: link-checker updates <www-validator-cvs@w3.org>
changeset: 319:def35ad5f837
user: mernst
date: Sat Jul 04 03:45:57 2009 +0000
files: bin/checklink
description:
User bug report (hash key was Suppress..., shourd have been Suppress...)
led me to rewrites, shorten, & optimize redirect checking.
diff -r 2aa66ff72527 -r def35ad5f837 bin/checklink
--- a/bin/checklink Mon Jun 29 20:14:38 2009 +0000
+++ b/bin/checklink Sat Jul 04 03:45:57 2009 +0000
@@ -5,7 +5,7 @@
# (c) 1999-2009 World Wide Web Consortium
# based on Renaud Bruyeron's checklink.pl
#
-# $Id: checklink,v 4.161 2009-06-29 20:14:38 mernst Exp $
+# $Id: checklink,v 4.162 2009-07-04 03:45:57 mernst Exp $
#
# This program is licensed under the W3C(r) Software License:
# http://www.w3.org/Consortium/Legal/copyright-software
@@ -275,7 +275,7 @@
$PROGRAM = 'W3C-checklink';
$VERSION = '4.5';
$REVISION = sprintf('version %s (c) 1999-2009 W3C', $VERSION);
- my ($cvsver) = q$Revision: 4.161 $ =~ /(\d+[\d\.]*\.\d+)/;
+ my ($cvsver) = q$Revision: 4.162 $ =~ /(\d+[\d\.]*\.\d+)/;
$AGENT = sprintf('%s/%s [%s] %s',
$PROGRAM, $VERSION, $cvsver,
(W3C::UserAgent::USE_ROBOT_UA
@@ -395,6 +395,7 @@
Exclude_Docs => undef,
Suppress_Redirect => [],
Suppress_Redirect_Prefix => [],
+ Suppress_Redirect_Regexp => [],
Suppress_Broken => [],
Suppress_Fragment => [],
Masquerade => 0,
@@ -701,9 +702,11 @@
}
}
for my $srp_arg (@{$Opts{Suppress_Redirect_Prefix}}) {
- if ($srp_arg !~ /.->./) {
+ if ($srp_arg !~ /^(.*)->(.*)$/) {
&usage(1, "Bad suppress-redirect-prefix argument, should contain \"->\": $srp_arg");
}
+ # Turn prefixes into a regexp.
+ push @{$Opts{Suppress_Redirect_Regexp}}, qr/^\Q$1\E(.*)->\Q$2\E\1$/ism;
}
for my $sb_arg (@{$Opts{Suppress_Broken}}) {
if ($sb_arg !~ /^(-1|[0-9]+):./) {
@@ -1932,37 +1935,12 @@
my $to = $response->request()->url(); # same on every loop iteration
my $from_to = $from . '->' . $to;
my $match = grep { $_ eq $from_to } @{$Opts{Suppress_Redirect}};
- # print STDERR "Result $match of checking $from_to\n";
+ # print STDERR "Result $match of redirect checking $from_to\n";
if ($match) { next; }
- # Check for redirect_prefix match
- my $prefix_match = 0;
- my $from_len = length($from);
- my $to_len = length($to);
- for my $redir_prefix (@{$Opts{Suppresss_Redirect_Prefix}}) {
- if ($redir_prefix !~ /^(.*)->(.*)$/) {
- die "Bad suppress-redirect-prefix: $redir_prefix";
- }
- my $from_prefix = $1;
- my $to_prefix = $2;
- my $from_prefix_len = length($from_prefix);
- my $to_prefix_len = length($to_prefix);
- if (($from eq $from_prefix) && ($to eq $to_prefix)) {
- $prefix_match = 1;
- last;
- } elsif (($from_prefix_len < $from_len)
- && ($to_prefix_len < $to_len)
- && ($from_prefix eq substr($from, 0, $from_prefix_len))
- && ($to_prefix eq substr($to, 0, $to_prefix_len))
- && (substr($from, $from_prefix_len) eq substr($to, $to_prefix_len))) {
- $prefix_match = 1;
- last;
- }
- }
- if ($prefix_match) {
- # print STDERR "A SUPPRESSED REDIRECT:\n $from\n $to\n";
- next;
- }
+ $match = grep { $from_to =~ /$_/ } @{$Opts{Suppress_Redirect_Regexp}};
+ # print STDERR "Result $match of regexp checking $from_to\n";
+ if ($match) { next; }
$redirects->{$prev->request()->url()} = $response->request()->url();
}
Received on Thursday, 5 August 2010 14:47:40 UTC