- From: Ville Skytta via cvs-syncmail <cvsmail@w3.org>
- Date: Thu, 01 May 2008 11:28:25 +0000
- To: www-validator-cvs@w3.org
Update of /sources/public/perl/modules/W3C/LinkChecker/bin
In directory hutz:/tmp/cvs-serv23789
Modified Files:
checklink
Log Message:
Micro-optimizations, cleanups.
Index: checklink
===================================================================
RCS file: /sources/public/perl/modules/W3C/LinkChecker/bin/checklink,v
retrieving revision 4.105
retrieving revision 4.106
diff -u -d -r4.105 -r4.106
--- checklink 1 May 2008 11:07:18 -0000 4.105
+++ checklink 1 May 2008 11:28:23 -0000 4.106
@@ -409,17 +409,18 @@
&ask_password() if ($Opts{User} && !$Opts{Password});
+ if (!$Opts{Summary_Only}) {
+ printf("%s %s\n", $PACKAGE, $REVISION) unless $Opts{HTML};
+ } else {
+ $Opts{Verbose} = 0;
+ $Opts{Progress} = 0;
+ }
+
my $is_first = 1;
my @bases = @{$Opts{Base_Locations}};
foreach my $uri (@ARGV) {
- if (!$Opts{Summary_Only}) {
- printf("%s %s\n", $PACKAGE, $REVISION) unless $Opts{HTML};
- } else {
- $Opts{Verbose} = 0;
- $Opts{Progress} = 0;
- }
# Reset base locations so that previous URI's given on the command line
- # won't affect the recursion scope for this URI
+ # won't affect the recursion scope for this URI (see check_uri())
@{$Opts{Base_Locations}} = @bases;
# Transform the parameter into a URI
$uri = &urize($uri);
@@ -879,15 +880,16 @@
print "Checking anchors...\n" unless $Opts{Summary_Only};
my %errors;
- foreach my $anchor (keys %{$p->{Anchors}}) {
- my $times = 0;
- foreach my $l (keys %{$p->{Anchors}{$anchor}}) {
- $times += $p->{Anchors}{$anchor}{$l};
+ while (my ($anchor, $lines) = each(%{$p->{Anchors}})) {
+ if (!length($anchor)) {
+ # Empty IDREF's are not allowed
+ $errors{$anchor} = 1;
+ } else {
+ my $times = 0;
+ $times += $_ for values(%$lines);
+ # They should appear only once
+ $errors{$anchor} = 1 if ($times > 1);
}
- # They should appear only once
- $errors{$anchor} = 1 if ($times > 1);
- # Empty IDREF's are not allowed
- $errors{$anchor} = 1 if ($anchor eq '');
}
print " done.\n" unless $Opts{Summary_Only};
@@ -896,7 +898,7 @@
my %links;
# Record all the links found
- foreach my $link (keys %{$p->{Links}}) {
+ while (my ($link, $lines) = each(%{$p->{Links}})) {
my $link_uri = URI->new($link);
my $abs_link_uri = URI->new_abs($link_uri, $base);
@@ -908,25 +910,27 @@
$nlink =~
s|^\Q$Opts{Masquerade_From}\E|$Opts{Masquerade_To}|;
$abs_link_uri = URI->new($nlink);
- };
+ }
}
- foreach my $lines (keys %{$p->{Links}{$link}}) {
- my $url = URI->new($abs_link_uri->canonical());
- my $fragment = $url->fragment(undef);
- next if (defined($Opts{Exclude}) && $url =~ $Opts{Exclude});
- if (!defined($fragment) || $fragment eq '') {
- # Document without fragment
- $links{$url}{location}{$lines} = 1;
- } else {
- # Resource with a fragment
- $links{$url}{fragments}{$fragment}{$lines} = 1;
+
+ my $canon_uri = URI->new($abs_link_uri->canonical());
+ my $fragment = $canon_uri->fragment(undef);
+ if (!defined($Opts{Exclude}) || $canon_uri !~ $Opts{Exclude}) {
+ foreach my $line_num (keys(%$lines)) {
+ if (!defined($fragment) || !length($fragment)) {
+ # Document without fragment
+ $links{$canon_uri}{location}{$line_num} = 1;
+ } else {
+ # Resource with a fragment
+ $links{$canon_uri}{fragments}{$fragment}{$line_num} = 1;
+ }
}
}
}
# Build the list of broken URI's
my %broken;
- foreach my $u (keys %links) {
+ while (my ($u, $ulinks) = each(%links)) {
# Don't check mailto: URI's
# TODO: bug 29
@@ -954,14 +958,14 @@
$broken{$u}{location} = 1 if ($results{$u}{location}{display} >= 400);
# List the broken fragments
- foreach my $fragment (keys %{$links{$u}{fragments}}) {
+ while (my ($fragment, $lines) = each(%{$ulinks->{fragments}})) {
if ($Opts{Verbose}) {
- my @frags = sort {$a<=>$b} keys %{$links{$u}{fragments}{$fragment}};
+ my @line_nums = sort { $a <=> $b } keys(%$lines);
&hprintf("\t\t%s %s - Line%s: %s\n",
$fragment,
($results{$u}{fragments}{$fragment}) ? 'OK' : 'Not found',
- (scalar(@frags) > 1) ? 's' : '',
- join(', ', @frags)
+ (scalar(@line_nums) > 1) ? 's' : '',
+ join(', ', @line_nums)
);
}
# A broken fragment?
@@ -973,7 +977,7 @@
# Couldn't find the document
$broken{$u}{location} = 1;
# All the fragments associated are hence broken
- foreach my $fragment (keys %{$links{$u}{fragments}}) {
+ foreach my $fragment (keys %{$ulinks->{fragments}}) {
$broken{$u}{fragments}{$fragment}++;
}
}
@@ -1023,6 +1027,7 @@
}
}
}
+
if ($depth < 0) {
&check_uri($query, $u, 0, -1, undef, $uri);
} else {
@@ -1505,7 +1510,7 @@
# --> it does in LWP >= 5.810
if ($tag eq 'base') {
# Treat <base> (without href) or <base href=""> as if it didn't exist.
- if (defined($attr->{href}) && $attr->{href} ne '') {
+ if (defined($attr->{href}) && length($attr->{href})) {
$self->{base} = $attr->{href};
}
# Note: base/@href intentionally not treated as a dereferenceable link:
@@ -1853,7 +1858,7 @@
$format = "\t%s\tLine$s: %s\n";
}
printf($format,
- &encode($anchor eq '' ? 'Empty anchor' : $anchor),
+ &encode(length($anchor) ? $anchor : 'Empty anchor'),
join(', ', @unique));
}
@@ -1877,14 +1882,10 @@
my $redirected = &is_redirected($u, %$redirects);
# List of lines
my @total_lines;
- foreach my $l (keys %{$links->{$u}{location}}) {
- push (@total_lines, $l);
- }
+ push(@total_lines, keys(%{$links->{$u}{location}}));
foreach my $f (@fragments) {
- next if ($f eq $u && defined($links->{$u}{$u}{LINE_UNKNOWN()}));
- foreach my $l (keys %{$links->{$u}{fragments}{$f}}) {
- push (@total_lines, $l);
- }
+ push(@total_lines, keys(%{$links->{$u}{fragments}{$f}}))
+ unless ($f eq $u && defined($links->{$u}{$u}{LINE_UNKNOWN()}));
}
my ($redirect_loop, @redirects_urls) = get_redirects($u, %$redirects);
Received on Thursday, 1 May 2008 11:28:59 UTC