- From: Ville Skytta via cvs-syncmail <cvsmail@w3.org>
- Date: Sun, 21 Feb 2010 17:17:49 +0000
- To: www-validator-cvs@w3.org
Update of /sources/public/perl/modules/W3C/LinkChecker/bin In directory hutz:/tmp/cvs-serv16010/bin Modified Files: checklink Log Message: Improve heuristics when passed non-absolute URIs. Index: checklink =================================================================== RCS file: /sources/public/perl/modules/W3C/LinkChecker/bin/checklink,v retrieving revision 4.180 retrieving revision 4.181 diff -u -d -r4.180 -r4.181 --- checklink 21 Feb 2010 16:57:43 -0000 4.180 +++ checklink 21 Feb 2010 17:17:47 -0000 4.181 @@ -219,6 +219,7 @@ use Time::HiRes qw(); use URI 1.31 qw(); # >= 1.31 for sip: abs/rel use URI::Escape qw(); +use URI::Heuristic qw(); # @@@ Needs also W3C::UserAgent but can't use() it here. use constant RC_ROBOTS_TXT => -1; @@ -619,7 +620,13 @@ if ($uri =~ m|^//|) { $uri = 'http:'.$uri; } else { - $uri = 'http://'.$uri; + local $ENV{URL_GUESS_PATTERN} = ''; + my $guess = URI::Heuristic::uf_uri($uri); + if ($guess->scheme() && $ua->is_protocol_supported($guess)) { + $uri = $guess->as_string(); + } else { + $uri = 'http://'.$uri; + } } } @@ -904,9 +911,9 @@ return $res; } -########################################### -# Transform foo into file://localhost/foo # -########################################### +############################ +# Transform foo into a URI # +############################ sub urize ($) { @@ -925,8 +932,17 @@ } # return dir itself if an index file was not found $uri ||= URI::file->new_abs($uarg); + } elsif ($uarg =~ /^[.\/\\]/ || -e $uarg) { + $uri = URI::file->new_abs($uarg); } else { - $uri = URI->new_abs($arg, URI::file->cwd()); + my $newuri = URI->new($arg); + if ($newuri->scheme()) { + $uri = $newuri; + } else { + local $ENV{URL_GUESS_PATTERN} = ''; + $uri = URI::Heuristic::uf_uri($arg); + $uri = URI::file->new_abs($uri) unless $uri->scheme(); + } } return $uri->as_string(); }
Received on Sunday, 21 February 2010 17:17:50 UTC