W3C home > Mailing lists > Public > www-validator@w3.org > February 2004

checklink: patch to make "--quiet" option quiet

From: Michael Ernst <mernst@csail.mit.edu>
Date: Sun, 8 Feb 2004 14:03:48 -0500
Message-Id: <200402081903.i18J3ms19355@manioc.lcs.mit.edu>
To: www-validator@w3.org

Even if a page has no errors, checklink.pl --quiet still prints "Processing
<URL>" (along with a horizontal line and vertical whitespace).  This makes
it hard to scan the output looking for problems, especially if very many
pages (say, thousands or tens of thousands) have just been processed.

The below patch changes the behavior of --quiet so that it prints the
document header (the separator, vertical space, and "Processing <URL") only
if some other message regarding the URL is also printed.

Users who wish an indication regarding each page that is processed can
still use the --summary switch, as before.

					-Michael Ernst
					 mernst@csail.mit.edu



cd ~/bin/share/
diff -u -b -r /g2/users/mernst/bin/share/checklink.pl-orig /g2/users/mernst/bin/share/checklink.pl
--- /g2/users/mernst/bin/share/checklink.pl-orig	Fri Feb  6 09:36:19 2004
+++ /g2/users/mernst/bin/share/checklink.pl	Fri Feb  6 11:54:10 2004
@@ -187,6 +187,8 @@
 my $doc_count = 0;
 # Time stamp
 my $timestamp = &get_timestamp();
+# Per-document header; undefined if already printed.  See print_doc_header().
+my $doc_header;
 
 if ($Opts{Command_Line}) {
 
@@ -503,31 +505,44 @@
     $Opts{Base_Location} = ($Opts{Base_Location} eq '.')
       ? $response->{absolute_uri}->canonical() :
         URI->new($Opts{Base_Location})->canonical();
+    &html_header($uri, 0, $cookie) if ($Opts{HTML});
   } else {
     # Before fetching the document, we don't know if we'll be within the
     # recursion scope or not (think redirects).
     return -1 unless &in_recursion_scope($response->{absolute_uri});
+  }
 
-    print $Opts{HTML} ? '<hr>' : '-' x 40, "\n";
+  # Define the document header, and perhaps print it.
+  # (It might still be defined if the previous document had no errors;
+  # just redefine it in that case.)
+  if (! $first) {
+    if ($Opts{HTML}) {
+      $doc_header = "\n<hr>\n";
+    } else {
+      $doc_header = "\n" . ('-' x 40) . "\n";
+    }
   }
 
-  # We are checking a new document
-  $doc_count++;
+  my $absolute_uri = $response->{absolute_uri}->as_string();
 
   if ($Opts{HTML}) {
-    &html_header($uri, 0, $cookie) if $first;
-    print('<h2>');
+    $doc_header .= ("<h2>\nProcessing\t"
+                    . &show_url($absolute_uri)
+                    . "\n</h2>\n\n");
+  } else {
+    $doc_header .= "\nProcessing\t$absolute_uri\n\n";
   }
 
-  my $absolute_uri = $response->{absolute_uri}->as_string();
+  if (! $Opts{Quiet}) {
+    print_doc_header();
+  }
 
-  my $result_anchor = 'results'.$doc_count;
+  # We are checking a new document
+  $doc_count++;
 
-  printf("\nProcessing\t%s\n\n",
-         $Opts{HTML} ? &show_url($absolute_uri) : $absolute_uri);
+  my $result_anchor = 'results'.$doc_count;
 
   if ($Opts{HTML}) {
-    print("</h2>\n");
     if (! $Opts{Summary_Only}) {
       printf("<p>Go to <a href=\"#%s\">the results</a>.</p>\n",
              $result_anchor);
@@ -577,6 +592,7 @@
     my $abs_link_uri = URI->new_abs($link_uri, $base);
     if ($Opts{Masquerade}) {
       if ($abs_link_uri =~ m|^$Opts{Masquerade_From}|) {
+        print_doc_header();
         printf("processing %s in base %s\n",
                $abs_link_uri, $Opts{Masquerade_To});
         my $nlink = $abs_link_uri;
@@ -672,12 +688,12 @@
       next if &already_processed($u);
 
       # Do the job
-      print "\n";
+      print "\n" unless $Opts{Quiet};
       if ($Opts{HTML}) {
         # For the online version, wait for a while to avoid abuses
         if (!$Opts{Command_Line}) {
           if ($doc_count == $Opts{Max_Documents}) {
-            print("<hr>\n<p><strong>Maximum number of documents reached!</strong></p>\n");
+            print("<hr>\n<p><strong>Maximum number of documents ($Opts{Max_Documents}) reached!</strong></p>\n");
           }
           if ($doc_count >= $Opts{Max_Documents}) {
             $doc_count++;
@@ -1388,6 +1404,7 @@
   }
   undef $n;
 
+  print_doc_header();
   print('<p>') if $Opts{HTML};
   print('List of duplicate and empty anchors');
   print <<EOF if $Opts{HTML};
@@ -1732,6 +1749,7 @@
       print "\n";
     }
   } else {
+    print_doc_header();
     print('<h3>') if $Opts{HTML};
     print("\nList of broken links");
     print(' and redirects') if $Opts{Redirects};
@@ -2063,6 +2081,7 @@
 
 sub hprintf (@)
 {
+  print_doc_header();
   if (! $Opts{HTML}) {
     printf(@_);
   } else {
@@ -2070,6 +2089,19 @@
   }
 }
 
+# Print the document header, if it hasn't been printed already.
+# This is invoked before most other output operations, in order
+# to enable quiet processing that doesn't clutter the output with
+# "Processing..." messages when nothing else will be reported.
+sub print_doc_header ()
+{
+  if (defined($doc_header)) {
+    print $doc_header;
+    undef($doc_header);
+  }
+}
+
+
 =head1 NAME
 
 checklink - check the validity of links in an HTML or XHTML document

Diff finished at Sun Feb  8 09:04:04
Received on Sunday, 8 February 2004 15:15:04 GMT

This archive was generated by hypermail 2.2.0+W3C-0.50 : Wednesday, 25 April 2012 12:14:10 GMT