link-checker commit: Code cleanups. from Mercurial notifier on 2011-03-22 (www-validator-cvs@w3.org from March 2011)

From: Mercurial notifier <nobody@w3.org>
Date: Tue, 22 Mar 2011 20:34:01 +0000
To: link-checker updates <www-validator-cvs@w3.org>
Message-Id: <E1Q28HB-0005cI-Dz@mcbain.w3.org>
changeset:   390:20e62a9e944f
user:        Ville Skyttä <ville.skytta@iki.fi>
date:        Tue Mar 22 21:01:43 2011 +0200
files:       bin/checklink
description:
Code cleanups.


diff -r 4407b1a4c6b8 -r 20e62a9e944f bin/checklink
--- a/bin/checklink	Tue Mar 22 20:46:42 2011 +0200
+++ b/bin/checklink	Tue Mar 22 21:01:43 2011 +0200
@@ -388,7 +388,7 @@
 EOF
 
     # Trusted environment variables that need laundering in taint mode.
-    foreach (qw(NNTPSERVER NEWSHOST)) {
+    for (qw(NNTPSERVER NEWSHOST)) {
         ($ENV{$_}) = ($ENV{$_} =~ /^(.*)$/) if $ENV{$_};
     }
 
@@ -533,7 +533,7 @@
 
     my $check_num = 1;
     my @bases     = @{$Opts{Base_Locations}};
-    foreach my $uri (@ARGV) {
+    for my $uri (@ARGV) {
 
         # Reset base locations so that previous URI's given on the command line
         # won't affect the recursion scope for this URI (see check_uri())
@@ -550,7 +550,7 @@
     if ($Opts{HTML}) {
         &html_footer();
     }
-    elsif (($doc_count > 0) && !$Opts{Summary_Only}) {
+    elsif ($doc_count > 0 && !$Opts{Summary_Only}) {
         printf("\n%s\n", &global_stats());
     }
 
@@ -1247,7 +1247,7 @@
         my $canon_uri = URI->new($abs_link_uri->canonical());
         my $fragment  = $canon_uri->fragment(undef);
         if (!defined($Opts{Exclude}) || $canon_uri !~ $Opts{Exclude}) {
-            foreach my $line_num (keys(%$lines)) {
+            for my $line_num (keys(%$lines)) {
                 if (!defined($fragment) || !length($fragment)) {
 
                     # Document without fragment
@@ -1330,7 +1330,7 @@
             $broken{$u}{location} = 1;
 
             # All the fragments associated are hence broken
-            foreach my $fragment (keys %{$ulinks->{fragments}}) {
+            for my $fragment (keys %{$ulinks->{fragments}}) {
                 $broken{$u}{fragments}{$fragment}++;
             }
         }
@@ -1357,7 +1357,7 @@
     # Do we want to process other documents?
     if ($depth != 0) {
 
-        foreach my $u (map { URI->new($_) } keys %links) {
+        for my $u (map { URI->new($_) } keys %links) {
 
             next unless $results{$u}{location}{success};    # Broken link?
 
@@ -1457,7 +1457,7 @@
     # Get the resource
     my $response;
     if (defined($results{$uri}{response}) &&
-        !(($method eq 'GET') && ($results{$uri}{method} eq 'HEAD')))
+        !($method eq 'GET' && $results{$uri}{method} eq 'HEAD'))
     {
         $response = $results{$uri}{response};
     }
@@ -1543,7 +1543,7 @@
         return 0 if ($candidate =~ $excluded_doc);
     }
 
-    foreach my $base (@{$Opts{Base_Locations}}) {
+    for my $base (@{$Opts{Base_Locations}}) {
         my $rel = $candidate->rel($base);
         next if ($candidate eq $rel);    # Relative path not possible?
         next if ($rel =~ m|^(\.\.)?/|);  # Relative path upwards?
@@ -1754,8 +1754,8 @@
 
     # What type of broken link is it? (stored in {record} - the {display}
     #              information is just for visual use only)
-    if (($results{$uri}{location}{display} == 401) &&
-        ($results{$uri}{location}{code} == 404))
+    if ($results{$uri}{location}{display} == 401 &&
+        $results{$uri}{location}{code} == 404)
     {
         $results{$uri}{location}{record} = 404;
     }
@@ -2110,9 +2110,9 @@
 
     # Extract the doctype
     my @declaration = split(/\s+/, $text, 4);
-    if (($#declaration >= 3) &&
-        ($declaration[0] eq 'DOCTYPE') &&
-        (lc($declaration[1]) eq 'html'))
+    if ($#declaration >= 3 &&
+        $declaration[0] eq 'DOCTYPE' &&
+        lc($declaration[1]) eq 'html')
     {
 
         # Parse the doctype declaration
@@ -2168,13 +2168,13 @@
     # Get the document with the appropriate method
     # Only use GET if there are fragments. HEAD is enough if it's not the
     # case.
-    my @fragments = keys %{$links->{$uri}{fragments}};
-    my $method = scalar(@fragments) ? 'GET' : 'HEAD';
+    my $fragments = $links->{$uri}{fragments} || {};
+    my $method = scalar(%$fragments) ? 'GET' : 'HEAD';
 
     my $response;
     my $being_processed = 0;
-    if ((!defined($results{$uri})) ||
-        (($method eq 'GET') && ($results{$uri}{method} eq 'HEAD')))
+    if (!defined($results{$uri}) ||
+        ($method eq 'GET' && $results{$uri}{method} eq 'HEAD'))
     {
         $being_processed = 1;
         $response = &get_uri($method, $uri, $referer);
@@ -2225,7 +2225,7 @@
     }
 
     # Check that the fragments exist
-    foreach my $fragment (keys %{$links->{$uri}{fragments}}) {
+    for my $fragment (keys %$fragments) {
         if (defined($p->{Anchors}{$fragment}) ||
             &escape_match($fragment, $p->{Anchors}) ||
             grep { $_ eq "$uri#$fragment" } @{$Opts{Suppress_Fragment}})
@@ -2242,7 +2242,7 @@
 sub escape_match ($\%)
 {
     my ($a, $hash) = (URI::Escape::uri_unescape($_[0]), $_[1]);
-    foreach my $b (keys %$hash) {
+    for my $b (keys %$hash) {
         return 1 if ($a eq URI::Escape::uri_unescape($b));
     }
     return 0;
@@ -2472,7 +2472,7 @@
 EOF
     print("\n");
 
-    foreach my $anchor (@errors) {
+    for my $anchor (@errors) {
         my $format;
         my @unique = &sort_unique(
             map { line_number($_) }
@@ -2504,7 +2504,7 @@
 
     # Process each URL
     my ($c, $previous_c);
-    foreach my $u (@$urls) {
+    for my $u (@$urls) {
         my @fragments = keys %{$broken->{$u}{fragments}};
 
         # Did we get a redirect?
@@ -2513,7 +2513,7 @@
         # List of lines
         my @total_lines;
         push(@total_lines, keys(%{$links->{$u}{location}}));
-        foreach my $f (@fragments) {
+        for my $f (@fragments) {
             push(@total_lines, keys(%{$links->{$u}{fragments}{$f}}))
                 unless ($f eq $u && defined($links->{$u}{$u}{LINE_UNKNOWN()}));
         }
@@ -2692,7 +2692,7 @@
         }
 
         # Fragments
-        foreach my $f (@fragments) {
+        for my $f (@fragments) {
             my @unique_lines =
                 &sort_unique(keys %{$links->{$u}{fragments}{$f}});
             my $plural = (scalar(@unique_lines) > 1) ? 's' : '';
@@ -2772,10 +2772,8 @@
         RC_ROBOTS_TXT() => sprintf(
             'The link was not checked due to %srobots exclusion rules%s. Check the link manually, and see also the link checker %sdocumentation on robots exclusion%s.',
             $Opts{HTML} ? (
-                '<a href="http://www.robotstxt.org/robotstxt.html">',
-                '</a>',
-                "<a href=\"$Cfg{Doc_URI}#bot\">",
-                '</a>'
+                '<a href="http://www.robotstxt.org/robotstxt.html">', '</a>',
+                "<a href=\"$Cfg{Doc_URI}#bot\">",                     '</a>'
                 ) : ('') x 4
         ),
         RC_DNS_ERROR() =>
@@ -2845,7 +2843,7 @@
         # Sort the URI's by HTTP Code
         my %code_summary;
         my @idx;
-        foreach my $u (@urls) {
+        for my $u (@urls) {
             if (defined($results->{$u}{location}{record})) {
                 my $c = &code_shown($u, $results);
                 $code_summary{$c}++;
@@ -2883,7 +2881,7 @@
 </thead>
 <tbody>
 EOF
-            foreach my $code (sort(keys(%code_summary))) {
+            for my $code (sort(keys(%code_summary))) {
                 printf('<tr%s>', &bgcolor($code));
                 printf('<td><a href="#d%scode_%s">%s</a></td>',
                     $doc_count, $code, http_rc($code));
Received on Tuesday, 22 March 2011 20:34:02 UTC