- From: Mercurial notifier <nobody@w3.org>
- Date: Thu, 05 Aug 2010 14:47:25 +0000
- To: link-checker updates <www-validator-cvs@w3.org>
changeset: 344:db78fb7c2ce7
user: ville
date: Sun Feb 21 17:17:47 2010 +0000
files: bin/checklink
description:
Improve heuristics when passed non-absolute URIs.
diff -r 4754ddab123b -r db78fb7c2ce7 bin/checklink
--- a/bin/checklink Sun Feb 21 16:57:43 2010 +0000
+++ b/bin/checklink Sun Feb 21 17:17:47 2010 +0000
@@ -5,7 +5,7 @@
# (c) 1999-2010 World Wide Web Consortium
# based on Renaud Bruyeron's checklink.pl
#
-# $Id: checklink,v 4.180 2010-02-21 16:57:43 ville Exp $
+# $Id: checklink,v 4.181 2010-02-21 17:17:47 ville Exp $
#
# This program is licensed under the W3C(r) Software License:
# http://www.w3.org/Consortium/Legal/copyright-software
@@ -219,6 +219,7 @@
use Time::HiRes qw();
use URI 1.31 qw(); # >= 1.31 for sip: abs/rel
use URI::Escape qw();
+use URI::Heuristic qw();
# @@@ Needs also W3C::UserAgent but can't use() it here.
use constant RC_ROBOTS_TXT => -1;
@@ -284,7 +285,7 @@
$PROGRAM = 'W3C-checklink';
$VERSION = '4.5';
$REVISION = sprintf('version %s (c) 1999-2010 W3C', $VERSION);
- my ($cvsver) = q$Revision: 4.180 $ =~ /(\d+[\d\.]*\.\d+)/;
+ my ($cvsver) = q$Revision: 4.181 $ =~ /(\d+[\d\.]*\.\d+)/;
$AGENT = sprintf('%s/%s [%s] %s',
$PROGRAM, $VERSION, $cvsver,
(W3C::UserAgent::USE_ROBOT_UA
@@ -619,7 +620,13 @@
if ($uri =~ m|^//|) {
$uri = 'http:'.$uri;
} else {
- $uri = 'http://'.$uri;
+ local $ENV{URL_GUESS_PATTERN} = '';
+ my $guess = URI::Heuristic::uf_uri($uri);
+ if ($guess->scheme() && $ua->is_protocol_supported($guess)) {
+ $uri = $guess->as_string();
+ } else {
+ $uri = 'http://'.$uri;
+ }
}
}
@@ -904,9 +911,9 @@
return $res;
}
-###########################################
-# Transform foo into file://localhost/foo #
-###########################################
+############################
+# Transform foo into a URI #
+############################
sub urize ($)
{
@@ -925,8 +932,17 @@
}
# return dir itself if an index file was not found
$uri ||= URI::file->new_abs($uarg);
+ } elsif ($uarg =~ /^[.\/\\]/ || -e $uarg) {
+ $uri = URI::file->new_abs($uarg);
} else {
- $uri = URI->new_abs($arg, URI::file->cwd());
+ my $newuri = URI->new($arg);
+ if ($newuri->scheme()) {
+ $uri = $newuri;
+ } else {
+ local $ENV{URL_GUESS_PATTERN} = '';
+ $uri = URI::Heuristic::uf_uri($arg);
+ $uri = URI::file->new_abs($uri) unless $uri->scheme();
+ }
}
return $uri->as_string();
}
Received on Thursday, 5 August 2010 14:47:40 UTC