- From: Mercurial notifier <nobody@w3.org>
- Date: Thu, 05 Aug 2010 14:46:57 +0000
- To: link-checker updates <www-validator-cvs@w3.org>
changeset: 63:72e90e45ea38 user: ville date: Tue Apr 20 17:13:28 2004 +0000 files: bin/checklink docs/checklink.html docs/linkchecker.css description: Add instructions how to allow us in /robots.txt, and include a link to it in the results. diff -r 65e7f84fa6ff -r 72e90e45ea38 bin/checklink --- a/bin/checklink Mon Apr 19 20:10:38 2004 +0000 +++ b/bin/checklink Tue Apr 20 17:13:28 2004 +0000 @@ -5,7 +5,7 @@ # (c) 1999-2004 World Wide Web Consortium # based on Renaud Bruyeron's checklink.pl # -# $Id: checklink,v 3.36 2004-04-19 20:10:38 ville Exp $ +# $Id: checklink,v 3.37 2004-04-20 17:13:26 ville Exp $ # # This program is licensed under the W3C(r) Software License: # http://www.w3.org/Consortium/Legal/copyright-software @@ -112,7 +112,7 @@ $PACKAGE = 'W3C Link Checker'; $PROGRAM = 'W3C-checklink'; $VERSION = '3.9.3-dev'; - my ($cvsver) = q$Revision: 3.36 $ =~ /(\d+[\d\.]*\.\d+)/; + my ($cvsver) = q$Revision: 3.37 $ =~ /(\d+[\d\.]*\.\d+)/; $REVISION = sprintf('version %s [%s] (c) 1999-2004 W3C', $VERSION, $cvsver); $AGENT = sprintf('%s/%s [%s] %s', @@ -1794,7 +1794,7 @@ 501 => 'Could not check this link: method not implemented or scheme not supported.', 503 => 'The server cannot service the request, for some unknown reason.', # Non-HTTP codes: - RC_ROBOTS_TXT() => 'The link was not checked due to <a href="http://www.robotstxt.org/wc/exclusion.html#robotstxt">robots exclusion rules</a>. Check the link manually.', + RC_ROBOTS_TXT() => "The link was not checked due to <a href=\"http://www.robotstxt.org/wc/exclusion.html#robotstxt\">robots exclusion rules</a>. Check the link manually, and see also the link checker <a href=\"$Cfg{Doc_URI}#bot\">documentation on robots exclusion</a>.", RC_DNS_ERROR() => 'The hostname could not be resolved. This link needs to be fixed.', ); my %priority = ( 410 => 1, diff -r 65e7f84fa6ff -r 72e90e45ea38 docs/checklink.html --- a/docs/checklink.html Mon Apr 19 20:10:38 2004 +0000 +++ b/docs/checklink.html Tue Apr 20 17:13:28 2004 +0000 @@ -6,7 +6,7 @@ <title>W3C Link Checker Documentation</title> <link rev="made" href="mailto:www-validator@w3.org" /> <style type="text/css" media="all">@import "linkchecker.css";</style> - <meta name="revision" content="$Id: checklink.html,v 1.15 2004-04-11 20:27:27 ville Exp $" /> + <meta name="revision" content="$Id: checklink.html,v 1.16 2004-04-20 17:13:28 ville Exp $" /> </head> <body> @@ -24,6 +24,7 @@ <li><a href="#what">What it does</a></li> <li><a href="#online">Use it online</a></li> <li><a href="#install">Install it locally</a></li> + <li><a href="#bot">Robots exclusion</a></li> <li><a href="#csb">Comments, suggestions and bugs</a></li> </ul> @@ -187,6 +188,30 @@ for more information. </p> + <h2><a name="bot" id="bot">Robots exclusion</a></h2> + + <p> + As of version 3.9.3, the link checker honors + <a href="http://www.robotstxt.org/wc/exclusion.html#robotstxt">robots exclusion rules</a>. To place rules specific to the W3C Link Checker in + <code>/robots.txt</code> files, sites can use the + <code>W3C-checklink</code> user agent string. For example, to allow + the link checker to access all documents on a server and to disallow + all other robots, one could use the following: + </p> + + <pre> +User-Agent: * +Disallow: / + +User-Agent: W3C-checklink +Disallow: +</pre> + + <p> + Note that <code>/robots.txt</code> rules affect only user agents + that honor it; it is not a generic method for access control. + </p> + <h2><a name="csb" id="csb">Comments, suggestions and bugs</a></h2> <p> @@ -208,7 +233,7 @@ alt="Valid XHTML 1.0!" /></a> <a title="Send Feedback for the W3C Link Checker" href="http://validator.w3.org/feedback.html">The W3C Validator Team</a><br /> - $Date: 2004-04-11 20:27:27 $ + $Date: 2004-04-20 17:13:28 $ </address> <p class="copyright"> <a rel="Copyright" href="http://www.w3.org/Consortium/Legal/ipr-notice#Copyright">Copyright</a> © 1994-2004 diff -r 65e7f84fa6ff -r 72e90e45ea38 docs/linkchecker.css --- a/docs/linkchecker.css Mon Apr 19 20:10:38 2004 +0000 +++ b/docs/linkchecker.css Tue Apr 20 17:13:28 2004 +0000 @@ -4,7 +4,7 @@ Copyright 2000-2004 W3C (MIT, INRIA, Keio). All Rights Reserved. See http://www.w3.org/Consortium/Legal/ipr-notice.html#Copyright - $Id: linkchecker.css,v 1.1 2004-04-09 11:36:17 ville Exp $ + $Id: linkchecker.css,v 1.2 2004-04-20 17:13:28 ville Exp $ */ html, body { @@ -51,6 +51,9 @@ font-family: monospace; line-height: 100%; white-space: pre; +} +pre { + padding-left: 2em; } a:link img, a:visited img {
Received on Thursday, 5 August 2010 14:47:14 UTC