W3C home > Mailing lists > Public > www-validator-cvs@w3.org > February 2010

perl/modules/W3C/LinkChecker/bin checklink,4.180,4.181

From: Ville Skytta via cvs-syncmail <cvsmail@w3.org>
Date: Sun, 21 Feb 2010 17:17:49 +0000
To: www-validator-cvs@w3.org
Message-Id: <E1NjFRF-0004Aa-3r@lionel-hutz.w3.org>
Update of /sources/public/perl/modules/W3C/LinkChecker/bin
In directory hutz:/tmp/cvs-serv16010/bin

Modified Files:
	checklink 
Log Message:
Improve heuristics when passed non-absolute URIs.


Index: checklink
===================================================================
RCS file: /sources/public/perl/modules/W3C/LinkChecker/bin/checklink,v
retrieving revision 4.180
retrieving revision 4.181
diff -u -d -r4.180 -r4.181
--- checklink	21 Feb 2010 16:57:43 -0000	4.180
+++ checklink	21 Feb 2010 17:17:47 -0000	4.181
@@ -219,6 +219,7 @@
 use Time::HiRes          qw();
 use URI             1.31 qw(); # >= 1.31 for sip: abs/rel
 use URI::Escape          qw();
+use URI::Heuristic       qw();
 # @@@ Needs also W3C::UserAgent but can't use() it here.
 
 use constant RC_ROBOTS_TXT          => -1;
@@ -619,7 +620,13 @@
     if ($uri =~ m|^//|) {
       $uri = 'http:'.$uri;
     } else {
-      $uri = 'http://'.$uri;
+      local $ENV{URL_GUESS_PATTERN} = '';
+      my $guess = URI::Heuristic::uf_uri($uri);
+      if ($guess->scheme() && $ua->is_protocol_supported($guess)) {
+        $uri = $guess->as_string();
+      } else {
+        $uri = 'http://'.$uri;
+      }
     }
   }
 
@@ -904,9 +911,9 @@
   return $res;
 }
 
-###########################################
-# Transform foo into file://localhost/foo #
-###########################################
+############################
+# Transform foo into a URI #
+############################
 
 sub urize ($)
 {
@@ -925,8 +932,17 @@
     }
     # return dir itself if an index file was not found
     $uri ||= URI::file->new_abs($uarg);
+  } elsif ($uarg =~ /^[.\/\\]/ || -e $uarg) {
+    $uri = URI::file->new_abs($uarg);
   } else {
-    $uri = URI->new_abs($arg, URI::file->cwd());
+    my $newuri = URI->new($arg);
+    if ($newuri->scheme()) {
+      $uri = $newuri;
+    } else {
+      local $ENV{URL_GUESS_PATTERN} = '';
+      $uri = URI::Heuristic::uf_uri($arg);
+      $uri = URI::file->new_abs($uri) unless $uri->scheme();
+    }
   }
   return $uri->as_string();
 }
Received on Sunday, 21 February 2010 17:17:50 UTC

This archive was generated by hypermail 2.3.1 : Wednesday, 7 January 2015 15:17:41 UTC