- From: Michael Ernst <mernst@csail.mit.edu>
- Date: Sun, 29 Jun 2008 20:10:27 +0200
- To: www-validator@w3.org
The implementation of the -q flag to the checklink program has several problems. You can reproduce these by running, for example, checklink -q -r -e http://pag.csail.mit.edu/~mernst/ 1. checklink omits the "processing http://foo.com/my/webpage.html" output, so the output gives no indication of which page the errors are on. 2. checklink isn't actually quiet: it always prints the "----------------------------------------" separator even if there is no other output. This makes it hard to scan the output looking for problems, especially if very many pages are being processed. The separator and the "processing" line should be output if, and only if, checklink outputs other information regarding the given webpage. Users who wish an indication regarding each page that is processed can still use the --summary switch, as before. The below patch corrects the problems. -Michael Ernst diff -u -b -r --exclude=CVS --exclude=.hg --exclude=.svn /DS/home-0/mernst/bin/src/perl/W3C-LinkChecker/perl/modules/W3C/LinkChecker/bin/checklink-orig /DS/home-0/mernst/bin/src/perl/W3C-LinkChecker/perl/modules/W3C/LinkChecker/bin/checklink --- /DS/home-0/mernst/bin/src/perl/W3C-LinkChecker/perl/modules/W3C/LinkChecker/bin/checklink-orig 2008-05-04 18:59:22.000000000 +0200 +++ /DS/home-0/mernst/bin/src/perl/W3C-LinkChecker/perl/modules/W3C/LinkChecker/bin/checklink 2008-06-28 14:56:35.240690771 +0200 @@ -365,6 +365,8 @@ my $doc_count = 0; # Time stamp my $timestamp = &get_timestamp(); +# Per-document header; undefined if already printed. See print_doc_header(). +my $doc_header; &parse_arguments() if $Opts{Command_Line}; @@ -802,18 +804,42 @@ # Before fetching the document, we don't know if we'll be within the # recursion scope or not (think redirects). if (!&in_recursion_scope($response->{absolute_uri})) { + hprintf("Not in recursion scope: %s\n") + if ($Opts{Verbose}); $response->content(""); return; } + } + + # Define the document header, and perhaps print it. + # (It might still be defined if the previous document had no errors; + # just redefine it in that case.) + + if ($check_num != 1) { + if ($Opts{HTML}) { + $doc_header = "\n<hr>\n"; + } else { + $doc_header = "\n" . ('-' x 40) . "\n"; + } + } + + my $absolute_uri = $response->{absolute_uri}->as_string(); + + if ($Opts{HTML}) { + $doc_header .= ("<h2>\nProcessing\t" + . &show_url($absolute_uri) + . "\n</h2>\n\n"); + } else { + $doc_header .= "\nProcessing\t$absolute_uri\n\n"; + } - print $Opts{HTML} ? '<hr />' : '-' x 40, "\n"; + if (! $Opts{Quiet}) { + print_doc_header(); } # We are checking a new document $doc_count++; - my $absolute_uri = $response->{absolute_uri}->as_string(); - my $result_anchor = 'results'.$doc_count; if ($check_num == 1 && !$Opts{HTML} && !$Opts{Summary_Only}) { @@ -918,6 +944,7 @@ if ($Opts{Masquerade}) { if ($abs_link_uri =~ m|^\Q$Opts{Masquerade_From}\E|) { + print_doc_header(); printf("processing %s in base %s\n", $abs_link_uri, $Opts{Masquerade_To}); my $nlink = $abs_link_uri; @@ -1027,7 +1054,7 @@ next if &already_processed($u, $uri); # Do the job - print "\n"; + print "\n" unless $Opts{Quiet}; if ($Opts{HTML}) { if (!$Opts{Command_Line}) { if ($doc_count == $Opts{Max_Documents}) { @@ -1834,9 +1861,8 @@ } else { print("Anchors\n\n"); } - &hprintf("Found %d anchor%s.", $n, ($n == 1) ? '' : 's'); - print('</p>') if $Opts{HTML}; - print("\n"); + &hprintf("Found %d anchor%s.\n", $n, ($n == 1) ? '' : 's'); + print('</p>\n') if $Opts{HTML}; } # List of the duplicates, if any. my @errors = keys %{$errors}; @@ -1846,6 +1872,7 @@ } undef $n; + print_doc_header(); print('<p>') if $Opts{HTML}; print('List of duplicate and empty anchors'); print <<EOF if $Opts{HTML}; @@ -1886,7 +1913,7 @@ my ($links, $results, $broken, $redirects, $urls, $codes, $todo) = @_; print("\n<dl class=\"report\">") if $Opts{HTML}; - print("\n"); + print("\n") if (! $Opts{Quiet}); # Process each URL my ($c, $previous_c); @@ -2136,14 +2163,15 @@ # Broken links and redirects if ($#urls < 0) { if (! $Opts{Quiet}) { + print_doc_header(); if ($Opts{HTML}) { - print "<h3>Links</h3>\n<p>Valid links!</p>"; + print "<h3>Links</h3>\n<p>Valid links!</p>\n"; } else { - print "\nValid links."; + print "\nValid links.\n"; } - print "\n"; } } else { + print_doc_header(); print('<h3>') if $Opts{HTML}; print("\nList of broken links"); #print(' and redirects') if $Opts{Redirects}; @@ -2207,6 +2235,7 @@ # Show directory redirects if ($Opts{Dir_Redirects} && ($#dir_redirect_urls > -1)) { + print_doc_header(); print('<h3>') if $Opts{HTML}; print("\nList of redirects"); print("</h3>\n<p>The links below are not broken, but the document does not use the exact URL, and the links were redirected. It may be a good idea to link to the final location, for the sake of speed.</p>") if $Opts{HTML}; @@ -2492,6 +2521,7 @@ sub hprintf (@) { + print_doc_header(); if (! $Opts{HTML}) { printf(@_); } else { @@ -2500,6 +2530,19 @@ return; } +# Print the document header, if it hasn't been printed already. +# This is invoked before most other output operations, in order +# to enable quiet processing that doesn't clutter the output with +# "Processing..." messages when nothing else will be reported. +sub print_doc_header () +{ + if (defined($doc_header)) { + print $doc_header; + undef($doc_header); + } +} + + # Local Variables: # mode: perl # indent-tabs-mode: nil Diff finished. Sun Jun 29 20:01:01 2008
Received on Sunday, 29 June 2008 18:11:11 UTC