checklink --depth

From: rowe@excc.ex.ac.uk
Date: Sun, Mar 18 2001

  • Next message: Brian Gilkison: "Re: Bug in W3C HTML validator"

    Date: Sun, 18 Mar 2001 18:56:51 -0500 (EST)
    From: rowe@excc.ex.ac.uk
    Message-Id: <5478.200103182356@deimos.ex.ac.uk.ex.ac.uk>
    To: www-validator@w3.org
    Subject: checklink --depth
    
    First, thanks for writing checklink - needless to say I've already
    found a few broken ones!
    
    I needed a --depth option, ie like --recurse but finite, and a quiet
    option for CGI operation. Minor patch enclosed - you may need to
    change the name of the file from 'checklink' to 'checklink.pl' or
    whatever.
    
    Thanks again.
    
    John
    
    
    *** checklink	Sun Mar 18 21:54:26 2001
    --- checklink.new	Sun Mar 18 23:46:34 2001
    ***************
    *** 63,68 ****
    --- 63,69 ----
      my $_masquerade = 0;
      my $_local_dir = my $_remote_masqueraded_uri = '';
      my $_hide_same_realm = 0;
    + my $_depth = 0;
      
      # Restrictions for the online version
      my $_sleep_time = 3;
    ***************
    *** 99,105 ****
              }
              # Transform the parameter into a URI
              $uri = urize($uri);
    !         &check_uri($uri);
          }
          if (($doc_count > 0) && !$_summary) {
              printf("\n%s\n", &global_stats());
    --- 100,106 ----
              }
              # Transform the parameter into a URI
              $uri = urize($uri);
    !         &check_uri($uri, 0, $_depth);
          }
          if (($doc_count > 0) && !$_summary) {
              printf("\n%s\n", &global_stats());
    ***************
    *** 128,133 ****
    --- 129,141 ----
          if ($query->param('recursive')) {
              $_recursive = 1;
          }
    +     if ($query->param('quiet')) {
    +         $_quiet = $query->param('quiet');
    + 	$_quiet and $_summary = 1;
    +     }
    +     if ($query->param('depth')) {
    +         $_depth = $query->param('depth');
    +     }
          $_html = 1;
          my $uri;
          if ($query->param('uri')) {
    ***************
    *** 148,154 ****
                  $uri = 'http://'.$uri;
              }
          }
    !     &check_uri($uri, 1);
          &html_footer();
      }
      
    --- 156,162 ----
                  $uri = 'http://'.$uri;
              }
          }
    !     &check_uri($uri, 1, $_depth);
          &html_footer();
      }
      
    ***************
    *** 167,173 ****
                  push(@uris, $_);
              } elsif (m/^--$/) {
                  $uris = 1;
    !         } elsif (m/^-[^-upytdlL]/) {
                  if (m/q/) {
                      $_quiet = 1;
                      $_summary = 1;
    --- 175,181 ----
                  push(@uris, $_);
              } elsif (m/^--$/) {
                  $uris = 1;
    !         } elsif (m/^-[^-DupytdlL]/) {
                  if (m/q/) {
                      $_quiet = 1;
                      $_summary = 1;
    ***************
    *** 226,234 ****
                  $_timeout = shift(@ARGV);
              } elsif (m/^-L|--languages$/) {
                  $_languages = shift(@ARGV);
    !         } elsif (m/^-d|--domain$/) {
                  $_trusted = shift(@ARGV);
    !         } elsif (m/^-y|--proxy$/) {
                  $_http_proxy = shift(@ARGV);
              } elsif (m/^--masquerade$/) {
                  $_masquerade = 1;
    --- 234,244 ----
                  $_timeout = shift(@ARGV);
              } elsif (m/^-L|--languages$/) {
                  $_languages = shift(@ARGV);
    !         } elsif (m/^-D|--depth$/) {
    !             $_depth = shift(@ARGV);
    !          } elsif (m/^-d|--domain$/) {
                  $_trusted = shift(@ARGV);
    !        } elsif (m/^-y|--proxy$/) {
                  $_http_proxy = shift(@ARGV);
              } elsif (m/^--masquerade$/) {
                  $_masquerade = 1;
    ***************
    *** 253,258 ****
    --- 263,270 ----
      	-e/--directory		Hide directory redirects - e.g.
      				http://www.w3.org/TR -> http://www.w3.org/TR/
      	-r/--recursive		Check the documents linked from the first one.
    + 	-D/--depth n	        Check the documents linked from the first one
    +                                 to depth n.
      	-l/--location uri	Scope of the documents checked.
      				By default, for
      				http://www.w3.org/TR/html4/Overview.html
    ***************
    *** 325,331 ****
      ########################################
      
      sub check_uri() {
    !     my ($uri, $html_header) = @_;
          # If $html_header equals 1, we need to generate a HTML header (first
          # instance called in HTML mode).
      
    --- 337,343 ----
      ########################################
      
      sub check_uri() {
    !     my ($uri, $html_header, $depth) = @_;
          # If $html_header equals 1, we need to generate a HTML header (first
          # instance called in HTML mode).
      
    ***************
    *** 359,365 ****
          printf("\nProcessing\t%s\n\n", $_html ? &show_url(&encode($absolute_uri))
                 : $absolute_uri);
      
    !     if ($_html) {
              printf("</h2>\n<p>Go to <a href='#%s'>the results</a>.</p>\n",
                     $result_anchor);
              printf("<p>Check also: <a href=\"http://validator.w3.org/check?uri=%s\">HTML Validity</a> &amp; <a href=\"http://jigsaw.w3.org/css-validator/validator?uri=%s\">CSS Validity</a></p>\n<p>Back to the <a href=\"checklink\">link checker</a>.</p>\n", map{&encode($absolute_uri)}(1..2));
    --- 371,377 ----
          printf("\nProcessing\t%s\n\n", $_html ? &show_url(&encode($absolute_uri))
                 : $absolute_uri);
      
    !     if ($_html && ! $_quiet) {
              printf("</h2>\n<p>Go to <a href='#%s'>the results</a>.</p>\n",
                     $result_anchor);
              printf("<p>Check also: <a href=\"http://validator.w3.org/check?uri=%s\">HTML Validity</a> &amp; <a href=\"http://jigsaw.w3.org/css-validator/validator?uri=%s\">CSS Validity</a></p>\n<p>Back to the <a href=\"checklink\">link checker</a>.</p>\n", map{&encode($absolute_uri)}(1..2));
    ***************
    *** 372,378 ****
          $processed{$absolute_uri} = 1;
          # Parse the document
          my $p = &parse_document($uri, $absolute_uri,
    !                             $response->content(), 1);
          my $base = URI->new($p->{base});
      
          # Check anchors
    --- 384,391 ----
          $processed{$absolute_uri} = 1;
          # Parse the document
          my $p = &parse_document($uri, $absolute_uri,
    !                             $response->content(), 1, 
    ! 			    $_recursive || $depth > 0);
          my $base = URI->new($p->{base});
      
          # Check anchors
    ***************
    *** 503,509 ****
          &links_summary(\%links, \%results, \%broken, \%redirects);
      
          # Do we want to process other documents?
    !     if ($_recursive) {
              if ($_base_location eq '.') {
                  # Get the name of the original directory
                  # e.g. http://www.w3.org/TR/html4/Overview.html
    --- 516,522 ----
          &links_summary(\%links, \%results, \%broken, \%redirects);
      
          # Do we want to process other documents?
    !     if ($_recursive || $depth > 0) {
              if ($_base_location eq '.') {
                  # Get the name of the original directory
                  # e.g. http://www.w3.org/TR/html4/Overview.html
    ***************
    *** 546,552 ****
                      sleep($_sleep_time);
                  }
                  print "\n";
    !             &check_uri($u, 0);
              }
          }
      }
    --- 559,565 ----
                      sleep($_sleep_time);
                  }
                  print "\n";
    !             &check_uri($u, 0, $depth - 1);
              }
          }
      }
    ***************
    *** 810,816 ****
      ####################
      
      sub parse_document() {
    !     my ($uri, $location, $document, $links) = @_;
      
          my $p;
      
    --- 823,829 ----
      ####################
      
      sub parse_document() {
    !     my ($uri, $location, $document, $links, $need_links) = @_;
      
          my $p;
      
    ***************
    *** 835,841 ****
          # We only look for anchors if we are not interested in the links
          # obviously, or if we are running a recursive checking because we
          # might need this information later
    !     $p->{only_anchors} = !($links || $_recursive);
      
          # Transform <?xml:stylesheet ...?> into <xml:stylesheet ...> for parsing
          # Processing instructions are not parsed by process, but in this case
    --- 848,854 ----
          # We only look for anchors if we are not interested in the links
          # obviously, or if we are running a recursive checking because we
          # might need this information later
    !     $p->{only_anchors} = !($links || $need_links);
      
          # Transform <?xml:stylesheet ...?> into <xml:stylesheet ...> for parsing
          # Processing instructions are not parsed by process, but in this case