link-checker commit: Weed out duplicate canonical URLs before distributing links.

changeset:   396:2f338aa935af
tag:         tip
user:        Ville Skyttä <ville.skytta@iki.fi>
date:        Tue Mar 29 22:13:03 2011 +0300
files:       bin/checklink
description:
Weed out duplicate canonical URLs before distributing links.


diff -r db4d0fe63d96 -r 2f338aa935af bin/checklink
--- a/bin/checklink	Mon Mar 28 23:49:07 2011 +0300
+++ b/bin/checklink	Tue Mar 29 22:13:03 2011 +0300
@@ -1233,9 +1233,12 @@
         my $canon_uri = URI->new($abs_link_uri->canonical());
         my $fragment  = $canon_uri->fragment(undef);
         if (!defined($Opts{Exclude}) || $canon_uri !~ $Opts{Exclude}) {
-            my $hostport =
-                $canon_uri->can('host_port') ? $canon_uri->host_port() : '';
-            push(@{$hostlinks{$hostport}}, $canon_uri);
+            if (!exists($links{$canon_uri})) {
+                my $hostport =
+                    $canon_uri->can('host_port') ? $canon_uri->host_port() :
+                                                   '';
+                push(@{$hostlinks{$hostport}}, $canon_uri);
+            }
             for my $line_num (keys(%$lines)) {
                 if (!defined($fragment) || !length($fragment)) {
 

Received on Tuesday, 29 March 2011 19:13:12 UTC