- From: Mercurial notifier <nobody@w3.org>
- Date: Thu, 05 Aug 2010 14:47:25 +0000
- To: link-checker updates <www-validator-cvs@w3.org>
changeset: 344:db78fb7c2ce7 user: ville date: Sun Feb 21 17:17:47 2010 +0000 files: bin/checklink description: Improve heuristics when passed non-absolute URIs. diff -r 4754ddab123b -r db78fb7c2ce7 bin/checklink --- a/bin/checklink Sun Feb 21 16:57:43 2010 +0000 +++ b/bin/checklink Sun Feb 21 17:17:47 2010 +0000 @@ -5,7 +5,7 @@ # (c) 1999-2010 World Wide Web Consortium # based on Renaud Bruyeron's checklink.pl # -# $Id: checklink,v 4.180 2010-02-21 16:57:43 ville Exp $ +# $Id: checklink,v 4.181 2010-02-21 17:17:47 ville Exp $ # # This program is licensed under the W3C(r) Software License: # http://www.w3.org/Consortium/Legal/copyright-software @@ -219,6 +219,7 @@ use Time::HiRes qw(); use URI 1.31 qw(); # >= 1.31 for sip: abs/rel use URI::Escape qw(); +use URI::Heuristic qw(); # @@@ Needs also W3C::UserAgent but can't use() it here. use constant RC_ROBOTS_TXT => -1; @@ -284,7 +285,7 @@ $PROGRAM = 'W3C-checklink'; $VERSION = '4.5'; $REVISION = sprintf('version %s (c) 1999-2010 W3C', $VERSION); - my ($cvsver) = q$Revision: 4.180 $ =~ /(\d+[\d\.]*\.\d+)/; + my ($cvsver) = q$Revision: 4.181 $ =~ /(\d+[\d\.]*\.\d+)/; $AGENT = sprintf('%s/%s [%s] %s', $PROGRAM, $VERSION, $cvsver, (W3C::UserAgent::USE_ROBOT_UA @@ -619,7 +620,13 @@ if ($uri =~ m|^//|) { $uri = 'http:'.$uri; } else { - $uri = 'http://'.$uri; + local $ENV{URL_GUESS_PATTERN} = ''; + my $guess = URI::Heuristic::uf_uri($uri); + if ($guess->scheme() && $ua->is_protocol_supported($guess)) { + $uri = $guess->as_string(); + } else { + $uri = 'http://'.$uri; + } } } @@ -904,9 +911,9 @@ return $res; } -########################################### -# Transform foo into file://localhost/foo # -########################################### +############################ +# Transform foo into a URI # +############################ sub urize ($) { @@ -925,8 +932,17 @@ } # return dir itself if an index file was not found $uri ||= URI::file->new_abs($uarg); + } elsif ($uarg =~ /^[.\/\\]/ || -e $uarg) { + $uri = URI::file->new_abs($uarg); } else { - $uri = URI->new_abs($arg, URI::file->cwd()); + my $newuri = URI->new($arg); + if ($newuri->scheme()) { + $uri = $newuri; + } else { + local $ENV{URL_GUESS_PATTERN} = ''; + $uri = URI::Heuristic::uf_uri($arg); + $uri = URI::file->new_abs($uri) unless $uri->scheme(); + } } return $uri->as_string(); }
Received on Thursday, 5 August 2010 14:47:40 UTC