W3C home > Mailing lists > Public > www-validator-cvs@w3.org > March 2010

perl/modules/W3C/LinkChecker/bin checklink,4.189,4.190

From: Ville Skytta via cvs-syncmail <cvsmail@w3.org>
Date: Sun, 07 Mar 2010 21:07:05 +0000
To: www-validator-cvs@w3.org
Message-Id: <E1NoNgn-0004q9-ED@lionel-hutz.w3.org>
Update of /sources/public/perl/modules/W3C/LinkChecker/bin
In directory hutz:/tmp/cvs-serv18459/bin

Modified Files:
	checklink 
Log Message:
Canonicalize input URI earlier, avoids dupe output sections when recursing.


Index: checklink
===================================================================
RCS file: /sources/public/perl/modules/W3C/LinkChecker/bin/checklink,v
retrieving revision 4.189
retrieving revision 4.190
diff -u -d -r4.189 -r4.190
--- checklink	7 Mar 2010 20:50:02 -0000	4.189
+++ checklink	7 Mar 2010 21:07:03 -0000	4.190
@@ -663,21 +663,25 @@
     } if (MP2() && !$ENV{HTTP_AUTHORIZATION});
 
     $uri =~ s/^\s+//g;
-    if ($uri !~ m/:/) {
+    if ($uri =~ /:/) {
+        $uri = URI->new($uri);
+    }
+    else {
         if ($uri =~ m|^//|) {
-            $uri = 'http:' . $uri;
+            $uri = URI->new("http:$uri");
         }
         else {
             local $ENV{URL_GUESS_PATTERN} = '';
             my $guess = URI::Heuristic::uf_uri($uri);
             if ($guess->scheme() && $ua->is_protocol_supported($guess)) {
-                $uri = $guess->as_string();
+                $uri = $guess;
             }
             else {
-                $uri = 'http://' . $uri;
+                $uri = URI->new("http://$uri");
             }
         }
     }
+    $uri = $uri->canonical()->as_string();
 
     &check_uri(scalar($query->Vars()), $uri, 1, $Opts{Depth}, $cookie);
     undef $query;    # Not needed any more.
@@ -1024,7 +1028,7 @@
             $uri = URI::file->new_abs($uri) unless $uri->scheme();
         }
     }
-    return $uri->as_string();
+    return $uri->canonical()->as_string();
 }
 
 ########################################
Received on Sunday, 7 March 2010 21:07:06 GMT

This archive was generated by hypermail 2.2.0+W3C-0.50 : Thursday, 26 April 2012 12:55:19 GMT