link-checker commit: Add instructions how to allow us in /robots.txt, and include a link to it in the results.

changeset:   63:72e90e45ea38
user:        ville
date:        Tue Apr 20 17:13:28 2004 +0000
files:       bin/checklink docs/checklink.html docs/linkchecker.css
description:
Add instructions how to allow us in /robots.txt, and include a link to it in the results.


diff -r 65e7f84fa6ff -r 72e90e45ea38 bin/checklink
--- a/bin/checklink	Mon Apr 19 20:10:38 2004 +0000
+++ b/bin/checklink	Tue Apr 20 17:13:28 2004 +0000
@@ -5,7 +5,7 @@
 # (c) 1999-2004 World Wide Web Consortium
 # based on Renaud Bruyeron's checklink.pl
 #
-# $Id: checklink,v 3.36 2004-04-19 20:10:38 ville Exp $
+# $Id: checklink,v 3.37 2004-04-20 17:13:26 ville Exp $
 #
 # This program is licensed under the W3C(r) Software License:
 #       http://www.w3.org/Consortium/Legal/copyright-software
@@ -112,7 +112,7 @@
   $PACKAGE       = 'W3C Link Checker';
   $PROGRAM       = 'W3C-checklink';
   $VERSION       = '3.9.3-dev';
-  my ($cvsver)   = q$Revision: 3.36 $ =~ /(\d+[\d\.]*\.\d+)/;
+  my ($cvsver)   = q$Revision: 3.37 $ =~ /(\d+[\d\.]*\.\d+)/;
   $REVISION      = sprintf('version %s [%s] (c) 1999-2004 W3C',
                            $VERSION, $cvsver);
   $AGENT         = sprintf('%s/%s [%s] %s',
@@ -1794,7 +1794,7 @@
                501 => 'Could not check this link: method not implemented or scheme not supported.',
                503 => 'The server cannot service the request, for some unknown reason.',
                # Non-HTTP codes:
-               RC_ROBOTS_TXT() => 'The link was not checked due to <a href="http://www.robotstxt.org/wc/exclusion.html#robotstxt">robots exclusion rules</a>. Check the link manually.',
+               RC_ROBOTS_TXT() => "The link was not checked due to <a href=\"http://www.robotstxt.org/wc/exclusion.html#robotstxt\">robots exclusion rules</a>. Check the link manually, and see also the link checker <a href=\"$Cfg{Doc_URI}#bot\">documentation on robots exclusion</a>.",
                RC_DNS_ERROR() => 'The hostname could not be resolved. This link needs to be fixed.',
              );
   my %priority = ( 410 => 1,
diff -r 65e7f84fa6ff -r 72e90e45ea38 docs/checklink.html
--- a/docs/checklink.html	Mon Apr 19 20:10:38 2004 +0000
+++ b/docs/checklink.html	Tue Apr 20 17:13:28 2004 +0000
@@ -6,7 +6,7 @@
     <title>W3C Link Checker Documentation</title>
     <link rev="made" href="mailto:www-validator@w3.org" />
     <style type="text/css" media="all">@import "linkchecker.css";</style>
-    <meta name="revision" content="$Id: checklink.html,v 1.15 2004-04-11 20:27:27 ville Exp $" />
+    <meta name="revision" content="$Id: checklink.html,v 1.16 2004-04-20 17:13:28 ville Exp $" />
   </head>
 
   <body>
@@ -24,6 +24,7 @@
       <li><a href="#what">What it does</a></li>
       <li><a href="#online">Use it online</a></li>
       <li><a href="#install">Install it locally</a></li>
+      <li><a href="#bot">Robots exclusion</a></li>
       <li><a href="#csb">Comments, suggestions and bugs</a></li>
     </ul>
 
@@ -187,6 +188,30 @@
       for more information.
     </p>
 
+    <h2><a name="bot" id="bot">Robots exclusion</a></h2>
+
+    <p>
+      As of version 3.9.3, the link checker honors
+      <a href="http://www.robotstxt.org/wc/exclusion.html#robotstxt">robots exclusion rules</a>.  To place rules specific to the W3C Link Checker in
+      <code>/robots.txt</code> files, sites can use the
+      <code>W3C-checklink</code> user agent string.  For example, to allow
+      the link checker to access all documents on a server and to disallow
+      all other robots, one could use the following:
+    </p>
+
+    <pre>
+User-Agent: *
+Disallow: /
+
+User-Agent: W3C-checklink
+Disallow:
+</pre>
+
+    <p>
+      Note that <code>/robots.txt</code> rules affect only user agents
+      that honor it; it is not a generic method for access control.
+    </p>
+
     <h2><a name="csb" id="csb">Comments, suggestions and bugs</a></h2>
 
     <p>
@@ -208,7 +233,7 @@
         alt="Valid XHTML 1.0!" /></a>
       <a title="Send Feedback for the W3C Link Checker"
         href="http://validator.w3.org/feedback.html">The W3C Validator Team</a><br />
-      $Date: 2004-04-11 20:27:27 $
+      $Date: 2004-04-20 17:13:28 $
     </address>
     <p class="copyright">
       <a rel="Copyright" href="http://www.w3.org/Consortium/Legal/ipr-notice#Copyright">Copyright</a> &copy; 1994-2004
diff -r 65e7f84fa6ff -r 72e90e45ea38 docs/linkchecker.css
--- a/docs/linkchecker.css	Mon Apr 19 20:10:38 2004 +0000
+++ b/docs/linkchecker.css	Tue Apr 20 17:13:28 2004 +0000
@@ -4,7 +4,7 @@
    Copyright 2000-2004 W3C (MIT, INRIA, Keio). All Rights Reserved.
    See http://www.w3.org/Consortium/Legal/ipr-notice.html#Copyright
 
-   $Id: linkchecker.css,v 1.1 2004-04-09 11:36:17 ville Exp $
+   $Id: linkchecker.css,v 1.2 2004-04-20 17:13:28 ville Exp $
 */
 
 html, body {
@@ -51,6 +51,9 @@
   font-family:      monospace;
   line-height:      100%;
   white-space:      pre;
+}
+pre {
+  padding-left: 2em;
 }
 
 a:link img, a:visited img {

Received on Thursday, 5 August 2010 14:47:14 UTC