- From: Mercurial notifier <nobody@w3.org>
- Date: Thu, 05 Aug 2010 14:46:57 +0000
- To: link-checker updates <www-validator-cvs@w3.org>
changeset: 63:72e90e45ea38
user: ville
date: Tue Apr 20 17:13:28 2004 +0000
files: bin/checklink docs/checklink.html docs/linkchecker.css
description:
Add instructions how to allow us in /robots.txt, and include a link to it in the results.
diff -r 65e7f84fa6ff -r 72e90e45ea38 bin/checklink
--- a/bin/checklink Mon Apr 19 20:10:38 2004 +0000
+++ b/bin/checklink Tue Apr 20 17:13:28 2004 +0000
@@ -5,7 +5,7 @@
# (c) 1999-2004 World Wide Web Consortium
# based on Renaud Bruyeron's checklink.pl
#
-# $Id: checklink,v 3.36 2004-04-19 20:10:38 ville Exp $
+# $Id: checklink,v 3.37 2004-04-20 17:13:26 ville Exp $
#
# This program is licensed under the W3C(r) Software License:
# http://www.w3.org/Consortium/Legal/copyright-software
@@ -112,7 +112,7 @@
$PACKAGE = 'W3C Link Checker';
$PROGRAM = 'W3C-checklink';
$VERSION = '3.9.3-dev';
- my ($cvsver) = q$Revision: 3.36 $ =~ /(\d+[\d\.]*\.\d+)/;
+ my ($cvsver) = q$Revision: 3.37 $ =~ /(\d+[\d\.]*\.\d+)/;
$REVISION = sprintf('version %s [%s] (c) 1999-2004 W3C',
$VERSION, $cvsver);
$AGENT = sprintf('%s/%s [%s] %s',
@@ -1794,7 +1794,7 @@
501 => 'Could not check this link: method not implemented or scheme not supported.',
503 => 'The server cannot service the request, for some unknown reason.',
# Non-HTTP codes:
- RC_ROBOTS_TXT() => 'The link was not checked due to <a href="http://www.robotstxt.org/wc/exclusion.html#robotstxt">robots exclusion rules</a>. Check the link manually.',
+ RC_ROBOTS_TXT() => "The link was not checked due to <a href=\"http://www.robotstxt.org/wc/exclusion.html#robotstxt\">robots exclusion rules</a>. Check the link manually, and see also the link checker <a href=\"$Cfg{Doc_URI}#bot\">documentation on robots exclusion</a>.",
RC_DNS_ERROR() => 'The hostname could not be resolved. This link needs to be fixed.',
);
my %priority = ( 410 => 1,
diff -r 65e7f84fa6ff -r 72e90e45ea38 docs/checklink.html
--- a/docs/checklink.html Mon Apr 19 20:10:38 2004 +0000
+++ b/docs/checklink.html Tue Apr 20 17:13:28 2004 +0000
@@ -6,7 +6,7 @@
<title>W3C Link Checker Documentation</title>
<link rev="made" href="mailto:www-validator@w3.org" />
<style type="text/css" media="all">@import "linkchecker.css";</style>
- <meta name="revision" content="$Id: checklink.html,v 1.15 2004-04-11 20:27:27 ville Exp $" />
+ <meta name="revision" content="$Id: checklink.html,v 1.16 2004-04-20 17:13:28 ville Exp $" />
</head>
<body>
@@ -24,6 +24,7 @@
<li><a href="#what">What it does</a></li>
<li><a href="#online">Use it online</a></li>
<li><a href="#install">Install it locally</a></li>
+ <li><a href="#bot">Robots exclusion</a></li>
<li><a href="#csb">Comments, suggestions and bugs</a></li>
</ul>
@@ -187,6 +188,30 @@
for more information.
</p>
+ <h2><a name="bot" id="bot">Robots exclusion</a></h2>
+
+ <p>
+ As of version 3.9.3, the link checker honors
+ <a href="http://www.robotstxt.org/wc/exclusion.html#robotstxt">robots exclusion rules</a>. To place rules specific to the W3C Link Checker in
+ <code>/robots.txt</code> files, sites can use the
+ <code>W3C-checklink</code> user agent string. For example, to allow
+ the link checker to access all documents on a server and to disallow
+ all other robots, one could use the following:
+ </p>
+
+ <pre>
+User-Agent: *
+Disallow: /
+
+User-Agent: W3C-checklink
+Disallow:
+</pre>
+
+ <p>
+ Note that <code>/robots.txt</code> rules affect only user agents
+ that honor it; it is not a generic method for access control.
+ </p>
+
<h2><a name="csb" id="csb">Comments, suggestions and bugs</a></h2>
<p>
@@ -208,7 +233,7 @@
alt="Valid XHTML 1.0!" /></a>
<a title="Send Feedback for the W3C Link Checker"
href="http://validator.w3.org/feedback.html">The W3C Validator Team</a><br />
- $Date: 2004-04-11 20:27:27 $
+ $Date: 2004-04-20 17:13:28 $
</address>
<p class="copyright">
<a rel="Copyright" href="http://www.w3.org/Consortium/Legal/ipr-notice#Copyright">Copyright</a> © 1994-2004
diff -r 65e7f84fa6ff -r 72e90e45ea38 docs/linkchecker.css
--- a/docs/linkchecker.css Mon Apr 19 20:10:38 2004 +0000
+++ b/docs/linkchecker.css Tue Apr 20 17:13:28 2004 +0000
@@ -4,7 +4,7 @@
Copyright 2000-2004 W3C (MIT, INRIA, Keio). All Rights Reserved.
See http://www.w3.org/Consortium/Legal/ipr-notice.html#Copyright
- $Id: linkchecker.css,v 1.1 2004-04-09 11:36:17 ville Exp $
+ $Id: linkchecker.css,v 1.2 2004-04-20 17:13:28 ville Exp $
*/
html, body {
@@ -51,6 +51,9 @@
font-family: monospace;
line-height: 100%;
white-space: pre;
+}
+pre {
+ padding-left: 2em;
}
a:link img, a:visited img {
Received on Thursday, 5 August 2010 14:47:14 UTC