- From: <rowe@excc.ex.ac.uk>
- Date: Sun, 18 Mar 2001 18:56:51 -0500 (EST)
- To: www-validator@w3.org
First, thanks for writing checklink - needless to say I've already found a few broken ones! I needed a --depth option, ie like --recurse but finite, and a quiet option for CGI operation. Minor patch enclosed - you may need to change the name of the file from 'checklink' to 'checklink.pl' or whatever. Thanks again. John *** checklink Sun Mar 18 21:54:26 2001 --- checklink.new Sun Mar 18 23:46:34 2001 *************** *** 63,68 **** --- 63,69 ---- my $_masquerade = 0; my $_local_dir = my $_remote_masqueraded_uri = ''; my $_hide_same_realm = 0; + my $_depth = 0; # Restrictions for the online version my $_sleep_time = 3; *************** *** 99,105 **** } # Transform the parameter into a URI $uri = urize($uri); ! &check_uri($uri); } if (($doc_count > 0) && !$_summary) { printf("\n%s\n", &global_stats()); --- 100,106 ---- } # Transform the parameter into a URI $uri = urize($uri); ! &check_uri($uri, 0, $_depth); } if (($doc_count > 0) && !$_summary) { printf("\n%s\n", &global_stats()); *************** *** 128,133 **** --- 129,141 ---- if ($query->param('recursive')) { $_recursive = 1; } + if ($query->param('quiet')) { + $_quiet = $query->param('quiet'); + $_quiet and $_summary = 1; + } + if ($query->param('depth')) { + $_depth = $query->param('depth'); + } $_html = 1; my $uri; if ($query->param('uri')) { *************** *** 148,154 **** $uri = 'http://'.$uri; } } ! &check_uri($uri, 1); &html_footer(); } --- 156,162 ---- $uri = 'http://'.$uri; } } ! &check_uri($uri, 1, $_depth); &html_footer(); } *************** *** 167,173 **** push(@uris, $_); } elsif (m/^--$/) { $uris = 1; ! } elsif (m/^-[^-upytdlL]/) { if (m/q/) { $_quiet = 1; $_summary = 1; --- 175,181 ---- push(@uris, $_); } elsif (m/^--$/) { $uris = 1; ! } elsif (m/^-[^-DupytdlL]/) { if (m/q/) { $_quiet = 1; $_summary = 1; *************** *** 226,234 **** $_timeout = shift(@ARGV); } elsif (m/^-L|--languages$/) { $_languages = shift(@ARGV); ! } elsif (m/^-d|--domain$/) { $_trusted = shift(@ARGV); ! } elsif (m/^-y|--proxy$/) { $_http_proxy = shift(@ARGV); } elsif (m/^--masquerade$/) { $_masquerade = 1; --- 234,244 ---- $_timeout = shift(@ARGV); } elsif (m/^-L|--languages$/) { $_languages = shift(@ARGV); ! } elsif (m/^-D|--depth$/) { ! $_depth = shift(@ARGV); ! } elsif (m/^-d|--domain$/) { $_trusted = shift(@ARGV); ! } elsif (m/^-y|--proxy$/) { $_http_proxy = shift(@ARGV); } elsif (m/^--masquerade$/) { $_masquerade = 1; *************** *** 253,258 **** --- 263,270 ---- -e/--directory Hide directory redirects - e.g. http://www.w3.org/TR -> http://www.w3.org/TR/ -r/--recursive Check the documents linked from the first one. + -D/--depth n Check the documents linked from the first one + to depth n. -l/--location uri Scope of the documents checked. By default, for http://www.w3.org/TR/html4/Overview.html *************** *** 325,331 **** ######################################## sub check_uri() { ! my ($uri, $html_header) = @_; # If $html_header equals 1, we need to generate a HTML header (first # instance called in HTML mode). --- 337,343 ---- ######################################## sub check_uri() { ! my ($uri, $html_header, $depth) = @_; # If $html_header equals 1, we need to generate a HTML header (first # instance called in HTML mode). *************** *** 359,365 **** printf("\nProcessing\t%s\n\n", $_html ? &show_url(&encode($absolute_uri)) : $absolute_uri); ! if ($_html) { printf("</h2>\n<p>Go to <a href='#%s'>the results</a>.</p>\n", $result_anchor); printf("<p>Check also: <a href=\"http://validator.w3.org/check?uri=%s\">HTML Validity</a> & <a href=\"http://jigsaw.w3.org/css-validator/validator?uri=%s\">CSS Validity</a></p>\n<p>Back to the <a href=\"checklink\">link checker</a>.</p>\n", map{&encode($absolute_uri)}(1..2)); --- 371,377 ---- printf("\nProcessing\t%s\n\n", $_html ? &show_url(&encode($absolute_uri)) : $absolute_uri); ! if ($_html && ! $_quiet) { printf("</h2>\n<p>Go to <a href='#%s'>the results</a>.</p>\n", $result_anchor); printf("<p>Check also: <a href=\"http://validator.w3.org/check?uri=%s\">HTML Validity</a> & <a href=\"http://jigsaw.w3.org/css-validator/validator?uri=%s\">CSS Validity</a></p>\n<p>Back to the <a href=\"checklink\">link checker</a>.</p>\n", map{&encode($absolute_uri)}(1..2)); *************** *** 372,378 **** $processed{$absolute_uri} = 1; # Parse the document my $p = &parse_document($uri, $absolute_uri, ! $response->content(), 1); my $base = URI->new($p->{base}); # Check anchors --- 384,391 ---- $processed{$absolute_uri} = 1; # Parse the document my $p = &parse_document($uri, $absolute_uri, ! $response->content(), 1, ! $_recursive || $depth > 0); my $base = URI->new($p->{base}); # Check anchors *************** *** 503,509 **** &links_summary(\%links, \%results, \%broken, \%redirects); # Do we want to process other documents? ! if ($_recursive) { if ($_base_location eq '.') { # Get the name of the original directory # e.g. http://www.w3.org/TR/html4/Overview.html --- 516,522 ---- &links_summary(\%links, \%results, \%broken, \%redirects); # Do we want to process other documents? ! if ($_recursive || $depth > 0) { if ($_base_location eq '.') { # Get the name of the original directory # e.g. http://www.w3.org/TR/html4/Overview.html *************** *** 546,552 **** sleep($_sleep_time); } print "\n"; ! &check_uri($u, 0); } } } --- 559,565 ---- sleep($_sleep_time); } print "\n"; ! &check_uri($u, 0, $depth - 1); } } } *************** *** 810,816 **** #################### sub parse_document() { ! my ($uri, $location, $document, $links) = @_; my $p; --- 823,829 ---- #################### sub parse_document() { ! my ($uri, $location, $document, $links, $need_links) = @_; my $p; *************** *** 835,841 **** # We only look for anchors if we are not interested in the links # obviously, or if we are running a recursive checking because we # might need this information later ! $p->{only_anchors} = !($links || $_recursive); # Transform <?xml:stylesheet ...?> into <xml:stylesheet ...> for parsing # Processing instructions are not parsed by process, but in this case --- 848,854 ---- # We only look for anchors if we are not interested in the links # obviously, or if we are running a recursive checking because we # might need this information later ! $p->{only_anchors} = !($links || $need_links); # Transform <?xml:stylesheet ...?> into <xml:stylesheet ...> for parsing # Processing instructions are not parsed by process, but in this case
Received on Monday, 19 March 2001 01:48:06 UTC