- From: Mercurial notifier <nobody@w3.org>
- Date: Thu, 05 Aug 2010 14:47:26 +0000
- To: link-checker updates <www-validator-cvs@w3.org>
changeset: 355:f18b7c157c3a user: ville date: Sun Mar 07 21:07:03 2010 +0000 files: bin/checklink description: Canonicalize input URI earlier, avoids dupe output sections when recursing. diff -r a78683677c8b -r f18b7c157c3a bin/checklink --- a/bin/checklink Sun Mar 07 20:50:02 2010 +0000 +++ b/bin/checklink Sun Mar 07 21:07:03 2010 +0000 @@ -5,7 +5,7 @@ # (c) 1999-2010 World Wide Web Consortium # based on Renaud Bruyeron's checklink.pl # -# $Id: checklink,v 4.189 2010-03-07 20:50:02 ville Exp $ +# $Id: checklink,v 4.190 2010-03-07 21:07:03 ville Exp $ # # This program is licensed under the W3C(r) Software License: # http://www.w3.org/Consortium/Legal/copyright-software @@ -306,7 +306,7 @@ $PROGRAM = 'W3C-checklink'; $VERSION = '4.5'; $REVISION = sprintf('version %s (c) 1999-2010 W3C', $VERSION); - my ($cvsver) = q$Revision: 4.189 $ =~ /(\d+[\d\.]*\.\d+)/; + my ($cvsver) = q$Revision: 4.190 $ =~ /(\d+[\d\.]*\.\d+)/; $AGENT = sprintf( '%s/%s [%s] %s', $PROGRAM, $VERSION, $cvsver, @@ -663,21 +663,25 @@ } if (MP2() && !$ENV{HTTP_AUTHORIZATION}); $uri =~ s/^\s+//g; - if ($uri !~ m/:/) { + if ($uri =~ /:/) { + $uri = URI->new($uri); + } + else { if ($uri =~ m|^//|) { - $uri = 'http:' . $uri; + $uri = URI->new("http:$uri"); } else { local $ENV{URL_GUESS_PATTERN} = ''; my $guess = URI::Heuristic::uf_uri($uri); if ($guess->scheme() && $ua->is_protocol_supported($guess)) { - $uri = $guess->as_string(); + $uri = $guess; } else { - $uri = 'http://' . $uri; + $uri = URI->new("http://$uri"); } } } + $uri = $uri->canonical()->as_string(); &check_uri(scalar($query->Vars()), $uri, 1, $Opts{Depth}, $cookie); undef $query; # Not needed any more. @@ -1024,7 +1028,7 @@ $uri = URI::file->new_abs($uri) unless $uri->scheme(); } } - return $uri->as_string(); + return $uri->canonical()->as_string(); } ########################################
Received on Thursday, 5 August 2010 14:47:39 UTC