- From: <rowe@excc.ex.ac.uk>
- Date: Sun, 18 Mar 2001 18:56:51 -0500 (EST)
- To: www-validator@w3.org
First, thanks for writing checklink - needless to say I've already
found a few broken ones!
I needed a --depth option, ie like --recurse but finite, and a quiet
option for CGI operation. Minor patch enclosed - you may need to
change the name of the file from 'checklink' to 'checklink.pl' or
whatever.
Thanks again.
John
*** checklink Sun Mar 18 21:54:26 2001
--- checklink.new Sun Mar 18 23:46:34 2001
***************
*** 63,68 ****
--- 63,69 ----
my $_masquerade = 0;
my $_local_dir = my $_remote_masqueraded_uri = '';
my $_hide_same_realm = 0;
+ my $_depth = 0;
# Restrictions for the online version
my $_sleep_time = 3;
***************
*** 99,105 ****
}
# Transform the parameter into a URI
$uri = urize($uri);
! &check_uri($uri);
}
if (($doc_count > 0) && !$_summary) {
printf("\n%s\n", &global_stats());
--- 100,106 ----
}
# Transform the parameter into a URI
$uri = urize($uri);
! &check_uri($uri, 0, $_depth);
}
if (($doc_count > 0) && !$_summary) {
printf("\n%s\n", &global_stats());
***************
*** 128,133 ****
--- 129,141 ----
if ($query->param('recursive')) {
$_recursive = 1;
}
+ if ($query->param('quiet')) {
+ $_quiet = $query->param('quiet');
+ $_quiet and $_summary = 1;
+ }
+ if ($query->param('depth')) {
+ $_depth = $query->param('depth');
+ }
$_html = 1;
my $uri;
if ($query->param('uri')) {
***************
*** 148,154 ****
$uri = 'http://'.$uri;
}
}
! &check_uri($uri, 1);
&html_footer();
}
--- 156,162 ----
$uri = 'http://'.$uri;
}
}
! &check_uri($uri, 1, $_depth);
&html_footer();
}
***************
*** 167,173 ****
push(@uris, $_);
} elsif (m/^--$/) {
$uris = 1;
! } elsif (m/^-[^-upytdlL]/) {
if (m/q/) {
$_quiet = 1;
$_summary = 1;
--- 175,181 ----
push(@uris, $_);
} elsif (m/^--$/) {
$uris = 1;
! } elsif (m/^-[^-DupytdlL]/) {
if (m/q/) {
$_quiet = 1;
$_summary = 1;
***************
*** 226,234 ****
$_timeout = shift(@ARGV);
} elsif (m/^-L|--languages$/) {
$_languages = shift(@ARGV);
! } elsif (m/^-d|--domain$/) {
$_trusted = shift(@ARGV);
! } elsif (m/^-y|--proxy$/) {
$_http_proxy = shift(@ARGV);
} elsif (m/^--masquerade$/) {
$_masquerade = 1;
--- 234,244 ----
$_timeout = shift(@ARGV);
} elsif (m/^-L|--languages$/) {
$_languages = shift(@ARGV);
! } elsif (m/^-D|--depth$/) {
! $_depth = shift(@ARGV);
! } elsif (m/^-d|--domain$/) {
$_trusted = shift(@ARGV);
! } elsif (m/^-y|--proxy$/) {
$_http_proxy = shift(@ARGV);
} elsif (m/^--masquerade$/) {
$_masquerade = 1;
***************
*** 253,258 ****
--- 263,270 ----
-e/--directory Hide directory redirects - e.g.
http://www.w3.org/TR -> http://www.w3.org/TR/
-r/--recursive Check the documents linked from the first one.
+ -D/--depth n Check the documents linked from the first one
+ to depth n.
-l/--location uri Scope of the documents checked.
By default, for
http://www.w3.org/TR/html4/Overview.html
***************
*** 325,331 ****
########################################
sub check_uri() {
! my ($uri, $html_header) = @_;
# If $html_header equals 1, we need to generate a HTML header (first
# instance called in HTML mode).
--- 337,343 ----
########################################
sub check_uri() {
! my ($uri, $html_header, $depth) = @_;
# If $html_header equals 1, we need to generate a HTML header (first
# instance called in HTML mode).
***************
*** 359,365 ****
printf("\nProcessing\t%s\n\n", $_html ? &show_url(&encode($absolute_uri))
: $absolute_uri);
! if ($_html) {
printf("</h2>\n<p>Go to <a href='#%s'>the results</a>.</p>\n",
$result_anchor);
printf("<p>Check also: <a href=\"http://validator.w3.org/check?uri=%s\">HTML Validity</a> & <a href=\"http://jigsaw.w3.org/css-validator/validator?uri=%s\">CSS Validity</a></p>\n<p>Back to the <a href=\"checklink\">link checker</a>.</p>\n", map{&encode($absolute_uri)}(1..2));
--- 371,377 ----
printf("\nProcessing\t%s\n\n", $_html ? &show_url(&encode($absolute_uri))
: $absolute_uri);
! if ($_html && ! $_quiet) {
printf("</h2>\n<p>Go to <a href='#%s'>the results</a>.</p>\n",
$result_anchor);
printf("<p>Check also: <a href=\"http://validator.w3.org/check?uri=%s\">HTML Validity</a> & <a href=\"http://jigsaw.w3.org/css-validator/validator?uri=%s\">CSS Validity</a></p>\n<p>Back to the <a href=\"checklink\">link checker</a>.</p>\n", map{&encode($absolute_uri)}(1..2));
***************
*** 372,378 ****
$processed{$absolute_uri} = 1;
# Parse the document
my $p = &parse_document($uri, $absolute_uri,
! $response->content(), 1);
my $base = URI->new($p->{base});
# Check anchors
--- 384,391 ----
$processed{$absolute_uri} = 1;
# Parse the document
my $p = &parse_document($uri, $absolute_uri,
! $response->content(), 1,
! $_recursive || $depth > 0);
my $base = URI->new($p->{base});
# Check anchors
***************
*** 503,509 ****
&links_summary(\%links, \%results, \%broken, \%redirects);
# Do we want to process other documents?
! if ($_recursive) {
if ($_base_location eq '.') {
# Get the name of the original directory
# e.g. http://www.w3.org/TR/html4/Overview.html
--- 516,522 ----
&links_summary(\%links, \%results, \%broken, \%redirects);
# Do we want to process other documents?
! if ($_recursive || $depth > 0) {
if ($_base_location eq '.') {
# Get the name of the original directory
# e.g. http://www.w3.org/TR/html4/Overview.html
***************
*** 546,552 ****
sleep($_sleep_time);
}
print "\n";
! &check_uri($u, 0);
}
}
}
--- 559,565 ----
sleep($_sleep_time);
}
print "\n";
! &check_uri($u, 0, $depth - 1);
}
}
}
***************
*** 810,816 ****
####################
sub parse_document() {
! my ($uri, $location, $document, $links) = @_;
my $p;
--- 823,829 ----
####################
sub parse_document() {
! my ($uri, $location, $document, $links, $need_links) = @_;
my $p;
***************
*** 835,841 ****
# We only look for anchors if we are not interested in the links
# obviously, or if we are running a recursive checking because we
# might need this information later
! $p->{only_anchors} = !($links || $_recursive);
# Transform <?xml:stylesheet ...?> into <xml:stylesheet ...> for parsing
# Processing instructions are not parsed by process, but in this case
--- 848,854 ----
# We only look for anchors if we are not interested in the links
# obviously, or if we are running a recursive checking because we
# might need this information later
! $p->{only_anchors} = !($links || $need_links);
# Transform <?xml:stylesheet ...?> into <xml:stylesheet ...> for parsing
# Processing instructions are not parsed by process, but in this case
Received on Monday, 19 March 2001 01:48:06 UTC