- From: Mercurial notifier <nobody@w3.org>
- Date: Thu, 05 Aug 2010 14:46:57 +0000
- To: link-checker updates <www-validator-cvs@w3.org>
changeset: 56:f804a818cf3c user: ville date: Sun Apr 11 19:33:41 2004 +0000 files: bin/checklink bin/checklink.pod docs/checklink.html description: Make sleep time between requests to each server configurable in command line use (-S/--sleep, defaults to 1 second), remove old "sleep 3 seconds between documents" feature, and show used settings in the results. diff -r 81a8546f3614 -r f804a818cf3c bin/checklink --- a/bin/checklink Sun Apr 11 16:19:36 2004 +0000 +++ b/bin/checklink Sun Apr 11 19:33:41 2004 +0000 @@ -5,7 +5,7 @@ # (c) 1999-2004 World Wide Web Consortium # based on Renaud Bruyeron's checklink.pl # -# $Id: checklink,v 3.31 2004-04-11 16:19:36 ville Exp $ +# $Id: checklink,v 3.32 2004-04-11 19:33:39 ville Exp $ # # This program is licensed under the W3C(r) Software License: # http://www.w3.org/Consortium/Legal/copyright-software @@ -106,7 +106,7 @@ $PACKAGE = 'W3C Link Checker'; $PROGRAM = 'W3C-checklink'; $VERSION = '3.9.3-dev'; - my ($cvsver) = q$Revision: 3.31 $ =~ /(\d+[\d\.]*\.\d+)/; + my ($cvsver) = q$Revision: 3.32 $ =~ /(\d+[\d\.]*\.\d+)/; $REVISION = sprintf('version %s [%s] (c) 1999-2004 W3C', $VERSION, $cvsver); $AGENT = sprintf('%s/%s [%s] %s', @@ -187,8 +187,8 @@ HTTP_Proxy => undef, Hide_Same_Realm => 0, Depth => 0, # -1 means unlimited recursion. - Sleep_Time => 3, # For the online version. - Max_Documents => 150, # Ditto. + Sleep_Time => 1, + Max_Documents => 150, # For the online version. User => undef, Password => undef, Base_Location => '.', @@ -398,6 +398,7 @@ 'u|user=s' => \$Opts{User}, 'p|password=s' => \$Opts{Password}, 't|timeout=i' => \$Opts{Timeout}, + 'S|sleep=i' => \$Opts{Sleep_Time}, 'L|languages=s' => \$Opts{Accept_Language}, 'n|noacclanguage' => sub { warn("*** Warning: The " . "-n/--noacclanguage option is " . @@ -427,6 +428,11 @@ if ($Opts{Accept_Language} && $Opts{Accept_Language} eq 'auto') { $Opts{Accept_Language} = &guess_language(); + } + + if (($Opts{Sleep_Time} || 0) < 1) { + warn("*** Warning: minimum allowed sleep time is 1 second, resetting.\n"); + $Opts{Sleep_Time} = 1; } } @@ -473,6 +479,8 @@ -p, --password PASSWORD Specify a password. --hide-same-realm Hide 401's that are in the same realm as the document checked. + -S, --sleep SECS Sleep SECS seconds between requests to each server + (default and minimum: 1 second). -t, --timeout SECS Timeout for requests (in seconds). -d, --domain DOMAIN Regular expression describing the domain to which authentication information will be sent @@ -611,6 +619,19 @@ if ($Opts{HTML}) { print("</h2>\n"); if (! $Opts{Summary_Only}) { + my $accept = &encode($Accept); + my $acclang = &encode($Opts{Accept_Language} || '(not sent)'); + my $s = $Opts{Sleep_Time} == 1 ? '' : 's'; + printf(<<'EOF', $accept, $acclang, $Opts{Sleep_Time}, $s); +<div id="settings"> +Settings used: + <ul> + <li><tt><a href="http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.1">Accept</a></tt>: %s</li> + <li><tt><a href="http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.4">Accept-Language</a></tt>: %s</li> + <li>Sleeping %d second%s between requests to each server</li> + </ul> +</div> +EOF printf("<p>Go to <a href=\"#%s\">the results</a>.</p>\n", $result_anchor); my $esc_uri = URI::Escape::uri_escape($absolute_uri, "^A-Za-z0-9."); @@ -623,6 +644,16 @@ &encode($Opts{_Self_URI})); print("<pre>\n"); } + } elsif (! $Opts{Summary_Only}) { + my $s = $Opts{Sleep_Time} == 1 ? '' : 's'; + my $acclang = $Opts{Accept_Language} || '(not sent)'; + printf(<<'EOF', $Accept, $acclang, $Opts{Sleep_Time}, $s); +Settings used: +- Accept: %s +- Accept-Language: %s +- Sleeping %d second%s between requests to each server + +EOF } # Record that we have processed this resource @@ -758,7 +789,6 @@ # Do the job print "\n"; if ($Opts{HTML}) { - # For the online version, wait for a while to avoid abuses if (!$Opts{Command_Line}) { if ($doc_count == $Opts{Max_Documents}) { print("<hr>\n<p><strong>Maximum number of documents reached!</strong></p>\n"); @@ -770,7 +800,6 @@ next; } } - sleep($Opts{Sleep_Time}); } if ($depth < 0) { &check_uri($u, 0, -1); @@ -912,9 +941,9 @@ # Prepare the query my $ua = W3C::UserAgent->new($AGENT); # @@@ TODO: admin address - # @@@ make number of keep-alive connections and delay customizable + # @@@ make number of keep-alive connections customizable $ua->conn_cache({ total_capacity => 1}); # 1 keep-alive connection - $ua->delay(1/60); # 1 second + $ua->delay($Opts{Sleep_Time}/60); $ua->timeout($Opts{Timeout}); $ua->proxy('http', 'http://' . $Opts{HTTP_Proxy}) if $Opts{HTTP_Proxy}; @@ -1967,6 +1996,9 @@ border-bottom: 1px solid black; padding: .25em; } +h2 { + margin-bottom: 0.5em; +} address { padding: 1ex; border-top: 1px solid black; @@ -2000,6 +2032,14 @@ } .multiple { background-color: fuchsia; +} +div#settings { + font-size: smaller; + float: right; +} +ul { + margin: 0; + padding-left: 1.5em; } </style>", $script, " </head> @@ -2116,7 +2156,7 @@ <br> <label for=\"no_accept_language\"><input type=\"checkbox\" id=\"no_accept_language\" name=\"no_accept_language\" value=\"on\"", $acc, "> Don't send <tt><a href=\"http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.4\">Accept-Language</a></tt> headers</label> <br> - <label title=\"Check linked documents recursively (maximum: ", $Opts{Max_Documents}, " documents; sleeping ", $Opts{Sleep_Time}, " seconds between each document)\" for=\"recursive\"><input type=\"checkbox\" id=\"recursive\" name=\"recursive\" value=\"on\"", $rec, "> Check linked documents recursively</label>, + <label title=\"Check linked documents recursively (maximum: ", $Opts{Max_Documents}, " documents)\" for=\"recursive\"><input type=\"checkbox\" id=\"recursive\" name=\"recursive\" value=\"on\"", $rec, "> Check linked documents recursively</label>, <label title=\"Depth of the recursion (-1 is the default and means unlimited)\" for=\"depth\">recursion depth: <input type=\"text\" size=\"3\" maxlength=\"3\" id=\"depth\" name=\"depth\" value=\"", $dep, "\"></label> <br><br>", $cookie_options, " </p> diff -r 81a8546f3614 -r f804a818cf3c bin/checklink.pod --- a/bin/checklink.pod Sun Apr 11 16:19:36 2004 +0000 +++ b/bin/checklink.pod Sun Apr 11 19:33:41 2004 +0000 @@ -1,4 +1,4 @@ -$Id: checklink.pod,v 1.8 2004-04-04 16:13:39 ville Exp $ +$Id: checklink.pod,v 1.9 2004-04-11 19:33:39 ville Exp $ =head1 NAME @@ -100,6 +100,11 @@ =item B<--hide-same-realm> Hide 401's that are in the same realm as the document checked. + +=item B<-S, --sleep> I<secs> + +Sleep the specified number of seconds between requests to each server. +Defaults to 1 second, which is also the minimum allowed. =item B<-t, --timeout> I<secs> diff -r 81a8546f3614 -r f804a818cf3c docs/checklink.html --- a/docs/checklink.html Sun Apr 11 16:19:36 2004 +0000 +++ b/docs/checklink.html Sun Apr 11 19:33:41 2004 +0000 @@ -6,7 +6,7 @@ <title>W3C Link Checker Documentation</title> <link rev="made" href="mailto:www-validator@w3.org" /> <style type="text/css" media="all">@import "linkchecker.css";</style> - <meta name="revision" content="$Id: checklink.html,v 1.13 2004-04-10 17:00:56 ville Exp $" /> + <meta name="revision" content="$Id: checklink.html,v 1.14 2004-04-11 19:33:41 ville Exp $" /> </head> <body> @@ -94,8 +94,10 @@ <p> In the online version (and in general, when run as a CGI script), - the number of documents that can be checked recursively is limited - and there is a delay between each document checked to avoid abuses. + the number of documents that can be checked recursively is limited. + Both the command line version and the online one sleep at least one + second between requests to each server to avoid abuses and target + server congestion. </p> <h2><a name="install" id="install">Install it locally</a></h2> @@ -206,7 +208,7 @@ alt="Valid XHTML 1.0!" /></a> <a title="Send Feedback for the W3C Link Checker" href="http://validator.w3.org/feedback.html">The W3C Validator Team</a><br /> - $Date: 2004-04-10 17:00:56 $ + $Date: 2004-04-11 19:33:41 $ </address> <p class="copyright"> <a rel="Copyright" href="http://www.w3.org/Consortium/Legal/ipr-notice#Copyright">Copyright</a> © 1994-2004
Received on Thursday, 5 August 2010 14:47:07 UTC