checklink --depth

First, thanks for writing checklink - needless to say I've already
found a few broken ones!

I needed a --depth option, ie like --recurse but finite, and a quiet
option for CGI operation. Minor patch enclosed - you may need to
change the name of the file from 'checklink' to 'checklink.pl' or
whatever.

Thanks again.

John


*** checklink	Sun Mar 18 21:54:26 2001
--- checklink.new	Sun Mar 18 23:46:34 2001
***************
*** 63,68 ****
--- 63,69 ----
  my $_masquerade = 0;
  my $_local_dir = my $_remote_masqueraded_uri = '';
  my $_hide_same_realm = 0;
+ my $_depth = 0;
  
  # Restrictions for the online version
  my $_sleep_time = 3;
***************
*** 99,105 ****
          }
          # Transform the parameter into a URI
          $uri = urize($uri);
!         &check_uri($uri);
      }
      if (($doc_count > 0) && !$_summary) {
          printf("\n%s\n", &global_stats());
--- 100,106 ----
          }
          # Transform the parameter into a URI
          $uri = urize($uri);
!         &check_uri($uri, 0, $_depth);
      }
      if (($doc_count > 0) && !$_summary) {
          printf("\n%s\n", &global_stats());
***************
*** 128,133 ****
--- 129,141 ----
      if ($query->param('recursive')) {
          $_recursive = 1;
      }
+     if ($query->param('quiet')) {
+         $_quiet = $query->param('quiet');
+ 	$_quiet and $_summary = 1;
+     }
+     if ($query->param('depth')) {
+         $_depth = $query->param('depth');
+     }
      $_html = 1;
      my $uri;
      if ($query->param('uri')) {
***************
*** 148,154 ****
              $uri = 'http://'.$uri;
          }
      }
!     &check_uri($uri, 1);
      &html_footer();
  }
  
--- 156,162 ----
              $uri = 'http://'.$uri;
          }
      }
!     &check_uri($uri, 1, $_depth);
      &html_footer();
  }
  
***************
*** 167,173 ****
              push(@uris, $_);
          } elsif (m/^--$/) {
              $uris = 1;
!         } elsif (m/^-[^-upytdlL]/) {
              if (m/q/) {
                  $_quiet = 1;
                  $_summary = 1;
--- 175,181 ----
              push(@uris, $_);
          } elsif (m/^--$/) {
              $uris = 1;
!         } elsif (m/^-[^-DupytdlL]/) {
              if (m/q/) {
                  $_quiet = 1;
                  $_summary = 1;
***************
*** 226,234 ****
              $_timeout = shift(@ARGV);
          } elsif (m/^-L|--languages$/) {
              $_languages = shift(@ARGV);
!         } elsif (m/^-d|--domain$/) {
              $_trusted = shift(@ARGV);
!         } elsif (m/^-y|--proxy$/) {
              $_http_proxy = shift(@ARGV);
          } elsif (m/^--masquerade$/) {
              $_masquerade = 1;
--- 234,244 ----
              $_timeout = shift(@ARGV);
          } elsif (m/^-L|--languages$/) {
              $_languages = shift(@ARGV);
!         } elsif (m/^-D|--depth$/) {
!             $_depth = shift(@ARGV);
!          } elsif (m/^-d|--domain$/) {
              $_trusted = shift(@ARGV);
!        } elsif (m/^-y|--proxy$/) {
              $_http_proxy = shift(@ARGV);
          } elsif (m/^--masquerade$/) {
              $_masquerade = 1;
***************
*** 253,258 ****
--- 263,270 ----
  	-e/--directory		Hide directory redirects - e.g.
  				http://www.w3.org/TR -> http://www.w3.org/TR/
  	-r/--recursive		Check the documents linked from the first one.
+ 	-D/--depth n	        Check the documents linked from the first one
+                                 to depth n.
  	-l/--location uri	Scope of the documents checked.
  				By default, for
  				http://www.w3.org/TR/html4/Overview.html
***************
*** 325,331 ****
  ########################################
  
  sub check_uri() {
!     my ($uri, $html_header) = @_;
      # If $html_header equals 1, we need to generate a HTML header (first
      # instance called in HTML mode).
  
--- 337,343 ----
  ########################################
  
  sub check_uri() {
!     my ($uri, $html_header, $depth) = @_;
      # If $html_header equals 1, we need to generate a HTML header (first
      # instance called in HTML mode).
  
***************
*** 359,365 ****
      printf("\nProcessing\t%s\n\n", $_html ? &show_url(&encode($absolute_uri))
             : $absolute_uri);
  
!     if ($_html) {
          printf("</h2>\n<p>Go to <a href='#%s'>the results</a>.</p>\n",
                 $result_anchor);
          printf("<p>Check also: <a href=\"http://validator.w3.org/check?uri=%s\">HTML Validity</a> &amp; <a href=\"http://jigsaw.w3.org/css-validator/validator?uri=%s\">CSS Validity</a></p>\n<p>Back to the <a href=\"checklink\">link checker</a>.</p>\n", map{&encode($absolute_uri)}(1..2));
--- 371,377 ----
      printf("\nProcessing\t%s\n\n", $_html ? &show_url(&encode($absolute_uri))
             : $absolute_uri);
  
!     if ($_html && ! $_quiet) {
          printf("</h2>\n<p>Go to <a href='#%s'>the results</a>.</p>\n",
                 $result_anchor);
          printf("<p>Check also: <a href=\"http://validator.w3.org/check?uri=%s\">HTML Validity</a> &amp; <a href=\"http://jigsaw.w3.org/css-validator/validator?uri=%s\">CSS Validity</a></p>\n<p>Back to the <a href=\"checklink\">link checker</a>.</p>\n", map{&encode($absolute_uri)}(1..2));
***************
*** 372,378 ****
      $processed{$absolute_uri} = 1;
      # Parse the document
      my $p = &parse_document($uri, $absolute_uri,
!                             $response->content(), 1);
      my $base = URI->new($p->{base});
  
      # Check anchors
--- 384,391 ----
      $processed{$absolute_uri} = 1;
      # Parse the document
      my $p = &parse_document($uri, $absolute_uri,
!                             $response->content(), 1, 
! 			    $_recursive || $depth > 0);
      my $base = URI->new($p->{base});
  
      # Check anchors
***************
*** 503,509 ****
      &links_summary(\%links, \%results, \%broken, \%redirects);
  
      # Do we want to process other documents?
!     if ($_recursive) {
          if ($_base_location eq '.') {
              # Get the name of the original directory
              # e.g. http://www.w3.org/TR/html4/Overview.html
--- 516,522 ----
      &links_summary(\%links, \%results, \%broken, \%redirects);
  
      # Do we want to process other documents?
!     if ($_recursive || $depth > 0) {
          if ($_base_location eq '.') {
              # Get the name of the original directory
              # e.g. http://www.w3.org/TR/html4/Overview.html
***************
*** 546,552 ****
                  sleep($_sleep_time);
              }
              print "\n";
!             &check_uri($u, 0);
          }
      }
  }
--- 559,565 ----
                  sleep($_sleep_time);
              }
              print "\n";
!             &check_uri($u, 0, $depth - 1);
          }
      }
  }
***************
*** 810,816 ****
  ####################
  
  sub parse_document() {
!     my ($uri, $location, $document, $links) = @_;
  
      my $p;
  
--- 823,829 ----
  ####################
  
  sub parse_document() {
!     my ($uri, $location, $document, $links, $need_links) = @_;
  
      my $p;
  
***************
*** 835,841 ****
      # We only look for anchors if we are not interested in the links
      # obviously, or if we are running a recursive checking because we
      # might need this information later
!     $p->{only_anchors} = !($links || $_recursive);
  
      # Transform <?xml:stylesheet ...?> into <xml:stylesheet ...> for parsing
      # Processing instructions are not parsed by process, but in this case
--- 848,854 ----
      # We only look for anchors if we are not interested in the links
      # obviously, or if we are running a recursive checking because we
      # might need this information later
!     $p->{only_anchors} = !($links || $need_links);
  
      # Transform <?xml:stylesheet ...?> into <xml:stylesheet ...> for parsing
      # Processing instructions are not parsed by process, but in this case

Received on Monday, 19 March 2001 01:48:06 UTC