- From: Michael Ernst <mernst@csail.mit.edu>
- Date: Sun, 8 Feb 2004 14:03:48 -0500
- To: www-validator@w3.org
Even if a page has no errors, checklink.pl --quiet still prints "Processing <URL>" (along with a horizontal line and vertical whitespace). This makes it hard to scan the output looking for problems, especially if very many pages (say, thousands or tens of thousands) have just been processed. The below patch changes the behavior of --quiet so that it prints the document header (the separator, vertical space, and "Processing <URL") only if some other message regarding the URL is also printed. Users who wish an indication regarding each page that is processed can still use the --summary switch, as before. -Michael Ernst mernst@csail.mit.edu cd ~/bin/share/ diff -u -b -r /g2/users/mernst/bin/share/checklink.pl-orig /g2/users/mernst/bin/share/checklink.pl --- /g2/users/mernst/bin/share/checklink.pl-orig Fri Feb 6 09:36:19 2004 +++ /g2/users/mernst/bin/share/checklink.pl Fri Feb 6 11:54:10 2004 @@ -187,6 +187,8 @@ my $doc_count = 0; # Time stamp my $timestamp = &get_timestamp(); +# Per-document header; undefined if already printed. See print_doc_header(). +my $doc_header; if ($Opts{Command_Line}) { @@ -503,31 +505,44 @@ $Opts{Base_Location} = ($Opts{Base_Location} eq '.') ? $response->{absolute_uri}->canonical() : URI->new($Opts{Base_Location})->canonical(); + &html_header($uri, 0, $cookie) if ($Opts{HTML}); } else { # Before fetching the document, we don't know if we'll be within the # recursion scope or not (think redirects). return -1 unless &in_recursion_scope($response->{absolute_uri}); + } - print $Opts{HTML} ? '<hr>' : '-' x 40, "\n"; + # Define the document header, and perhaps print it. + # (It might still be defined if the previous document had no errors; + # just redefine it in that case.) + if (! $first) { + if ($Opts{HTML}) { + $doc_header = "\n<hr>\n"; + } else { + $doc_header = "\n" . ('-' x 40) . "\n"; + } } - # We are checking a new document - $doc_count++; + my $absolute_uri = $response->{absolute_uri}->as_string(); if ($Opts{HTML}) { - &html_header($uri, 0, $cookie) if $first; - print('<h2>'); + $doc_header .= ("<h2>\nProcessing\t" + . &show_url($absolute_uri) + . "\n</h2>\n\n"); + } else { + $doc_header .= "\nProcessing\t$absolute_uri\n\n"; } - my $absolute_uri = $response->{absolute_uri}->as_string(); + if (! $Opts{Quiet}) { + print_doc_header(); + } - my $result_anchor = 'results'.$doc_count; + # We are checking a new document + $doc_count++; - printf("\nProcessing\t%s\n\n", - $Opts{HTML} ? &show_url($absolute_uri) : $absolute_uri); + my $result_anchor = 'results'.$doc_count; if ($Opts{HTML}) { - print("</h2>\n"); if (! $Opts{Summary_Only}) { printf("<p>Go to <a href=\"#%s\">the results</a>.</p>\n", $result_anchor); @@ -577,6 +592,7 @@ my $abs_link_uri = URI->new_abs($link_uri, $base); if ($Opts{Masquerade}) { if ($abs_link_uri =~ m|^$Opts{Masquerade_From}|) { + print_doc_header(); printf("processing %s in base %s\n", $abs_link_uri, $Opts{Masquerade_To}); my $nlink = $abs_link_uri; @@ -672,12 +688,12 @@ next if &already_processed($u); # Do the job - print "\n"; + print "\n" unless $Opts{Quiet}; if ($Opts{HTML}) { # For the online version, wait for a while to avoid abuses if (!$Opts{Command_Line}) { if ($doc_count == $Opts{Max_Documents}) { - print("<hr>\n<p><strong>Maximum number of documents reached!</strong></p>\n"); + print("<hr>\n<p><strong>Maximum number of documents ($Opts{Max_Documents}) reached!</strong></p>\n"); } if ($doc_count >= $Opts{Max_Documents}) { $doc_count++; @@ -1388,6 +1404,7 @@ } undef $n; + print_doc_header(); print('<p>') if $Opts{HTML}; print('List of duplicate and empty anchors'); print <<EOF if $Opts{HTML}; @@ -1732,6 +1749,7 @@ print "\n"; } } else { + print_doc_header(); print('<h3>') if $Opts{HTML}; print("\nList of broken links"); print(' and redirects') if $Opts{Redirects}; @@ -2063,6 +2081,7 @@ sub hprintf (@) { + print_doc_header(); if (! $Opts{HTML}) { printf(@_); } else { @@ -2070,6 +2089,19 @@ } } +# Print the document header, if it hasn't been printed already. +# This is invoked before most other output operations, in order +# to enable quiet processing that doesn't clutter the output with +# "Processing..." messages when nothing else will be reported. +sub print_doc_header () +{ + if (defined($doc_header)) { + print $doc_header; + undef($doc_header); + } +} + + =head1 NAME checklink - check the validity of links in an HTML or XHTML document Diff finished at Sun Feb 8 09:04:04
Received on Sunday, 8 February 2004 15:15:04 UTC