W3C home > Mailing lists > Public > www-validator-cvs@w3.org > August 2010

link-checker commit: Fix bug in recursion logic: when parsing a document in recursive mode, we

From: Mercurial notifier <nobody@w3.org>
Date: Thu, 05 Aug 2010 14:47:00 +0000
To: link-checker updates <www-validator-cvs@w3.org>
Message-Id: <E1Oh1im-0005fs-Re@blinky.w3.org>
changeset:   96:46fdbac9a93a
user:        ville
date:        Sun Jul 11 19:14:46 2004 +0000
files:       bin/checklink
description:
Fix bug in recursion logic: when parsing a document in recursive mode, we
are interested in links in it if it's in recursion scope, even if the document
retrieval is "only" caused by an anchor in a referring document.
http://lists.w3.org/Archives/Public/www-validator/2004Jun/0181.html


diff -r bd698e19ca62 -r 46fdbac9a93a bin/checklink
--- a/bin/checklink	Sun Jul 11 16:46:42 2004 +0000
+++ b/bin/checklink	Sun Jul 11 19:14:46 2004 +0000
@@ -5,7 +5,7 @@
 # (c) 1999-2004 World Wide Web Consortium
 # based on Renaud Bruyeron's checklink.pl
 #
-# $Id: checklink,v 4.1 2004-07-11 16:45:13 ville Exp $
+# $Id: checklink,v 4.2 2004-07-11 19:14:46 ville Exp $
 #
 # This program is licensed under the W3C(r) Software License:
 #       http://www.w3.org/Consortium/Legal/copyright-software
@@ -112,7 +112,7 @@
   $PACKAGE       = 'W3C Link Checker';
   $PROGRAM       = 'W3C-checklink';
   $VERSION       = '4.0-dev';
-  my ($cvsver)   = q$Revision: 4.1 $ =~ /(\d+[\d\.]*\.\d+)/;
+  my ($cvsver)   = q$Revision: 4.2 $ =~ /(\d+[\d\.]*\.\d+)/;
   $REVISION      = sprintf('version %s [%s] (c) 1999-2004 W3C',
                            $VERSION, $cvsver);
   $AGENT         = sprintf('%s/%s [%s] %s',
@@ -739,7 +739,9 @@
     &hprintf("Checking link %s\n", $u) unless $Opts{Summary_Only};
 
     # Check that a link is valid
-    &check_validity($uri, $u, \%links, \%redirects);
+    &check_validity($uri, $u,
+                    ($depth != 0 && &in_recursion_scope($u)),
+                    \%links, \%redirects);
     &hprintf("\tReturn code: %s\n", $results{$u}{location}{code})
       if ($Opts{Verbose});
     if ($results{$u}{location}{success}) {
@@ -834,7 +836,7 @@
   my ($method, $uri, $in_recursion, $redirects) = @_;
   # $method contains the HTTP method the use (GET or HEAD)
   # $uri contains the identifier of the resource
-  # $in_recursion equals 1 if we are in recursion mode (i.e. it is at least
+  # $in_recursion is > 0 if we are in recursion mode (i.e. it is at least
   #                        the second resource checked)
   # $redirects is a pointer to the hash containing the map of the redirects
 
@@ -1089,7 +1091,7 @@
 # Parse a document #
 ####################
 
-sub parse_document ($$$$;$)
+sub parse_document ($$$$$)
 {
   my ($uri, $location, $document, $links, $rec_needs_links) = @_;
 
@@ -1301,11 +1303,12 @@
 # Check the validity of a link #
 ################################
 
-sub check_validity ($$\%\%)
+sub check_validity ($$$\%\%)
 {
-  my ($testing, $uri, $links, $redirects) = @_;
+  my ($testing, $uri, $in_recursion, $links, $redirects) = @_;
   # $testing is the URI of the document checked
   # $uri is the URI of the target that we are verifying
+  # $in_recursion is > 0 if we're in recursive mode
   # $links is a hash of the links in the documents checked
   # $redirects is a map of the redirects encountered
 
@@ -1357,7 +1360,7 @@
     }
     # Do it then
     $p = &parse_document($uri, $response->base(),
-                         $response->as_string(), 0);
+                         $response->as_string(), 0, $in_recursion);
   } else {
     # We already had the information
     $p->{Anchors} = $results{$uri}{parsing}{Anchors};
Received on Thursday, 5 August 2010 14:47:11 UTC

This archive was generated by hypermail 2.3.1 : Wednesday, 7 January 2015 15:17:43 UTC