- From: Olivier Thereaux <ot@dev.w3.org>
- Date: Wed, 14 Jul 2004 02:40:04 +0000
- To: www-validator-cvs@w3.org
Update of /sources/public/perl/modules/W3C/LogValidator/lib/W3C/LogValidator In directory hutz:/tmp/cvs-serv2518 Modified Files: HTMLValidator.pm CSSValidator.pm SurveyEngine.pm Basic.pm Log Message: adding support for IgnoreArea Index: SurveyEngine.pm =================================================================== RCS file: /sources/public/perl/modules/W3C/LogValidator/lib/W3C/LogValidator/SurveyEngine.pm,v retrieving revision 1.2 retrieving revision 1.3 diff -u -d -r1.2 -r1.3 --- SurveyEngine.pm 8 Jun 2004 05:00:09 -0000 1.2 +++ SurveyEngine.pm 14 Jul 2004 02:40:01 -0000 1.3 @@ -62,6 +62,43 @@ return $self->{AUTH_EXT}; } +sub trim_uris +{ + my $self = shift; + my @authorized_extensions = split(" ", $self->auth_ext); + my @trimmed_uris; + my $uri; + my $ignore_regexp = ""; + $ignore_regexp = $config{IgnoreArea}; + $ignore_regexp =~ s/\//\\\//g ; + my @ignored_areas = split(" ", $ignore_regexp); + while ($uri = shift) + { + my $uri_ext = ""; + my $match = 0; + if ($uri =~ /(\.[0-9a-zA-Z]+)$/) + { + $uri_ext = $1; + } + elsif ($uri =~ /\/$/) { $uri_ext = "/";} + elsif ( $self->HEAD_check($uri) ) { $match = 1; } + foreach my $ext (@authorized_extensions) + { + if ($ext eq $uri_ext) { $match = 1; } + } + foreach my $area (@ignored_areas) + { + if ($uri =~ /$area/) + { + print "ignoring $uri matching $area \n" if ($verbose > 2) ; + $match = 0; + } + } + + push @trimmed_uris,$uri if ($match); + } + return @trimmed_uris; +} ######################################### @@ -112,22 +149,7 @@ my $localDate = "$year-$mon-$mday" ; my $census = 0; - my @trimmed_uris; - foreach my $uri (@uris) - { - my @authorized_extensions = split(" ", $self->auth_ext); - foreach my $ext (@authorized_extensions) - { - if ($uri=~ /$ext$/ ) - { - push @trimmed_uris,$uri; - # print "$uri accepted" if ($verbose >2); #debug - } - #else { print "$uri left out" if ($verbose >2);} # debug - - } - } - @uris = @trimmed_uris; + @uris = $self->trim_uris(@uris); while ((@uris) and (($census < $max_documents) or (!$max_documents)) ) { Index: HTMLValidator.pm =================================================================== RCS file: /sources/public/perl/modules/W3C/LogValidator/lib/W3C/LogValidator/HTMLValidator.pm,v retrieving revision 1.10 retrieving revision 1.11 diff -u -d -r1.10 -r1.11 --- HTMLValidator.pm 8 Jun 2004 06:36:04 -0000 1.10 +++ HTMLValidator.pm 14 Jul 2004 02:40:01 -0000 1.11 @@ -136,7 +136,12 @@ my $self = shift; my @authorized_extensions = split(" ", $self->auth_ext); my @trimmed_uris; - my $uri; + my $ignore_regexp = ""; + $ignore_regexp = $config{IgnoreArea}; + $ignore_regexp =~ s/\//\\\//g ; + my @ignored_areas = split(" ", $ignore_regexp); + + my $uri; while ($uri = shift) { my $uri_ext = ""; @@ -151,12 +156,20 @@ { if ($ext eq $uri_ext) { $match = 1; } } + foreach my $area (@ignored_areas) + { + if ($uri =~ /$area/) + { + print "ignoring $uri matching $area \n" if ($verbose > 2) ; + $match = 0; + } + } + push @trimmed_uris,$uri if ($match); } return @trimmed_uris; } - ######################################### # Actual subroutine to check the list of uris # ######################################### Index: Basic.pm =================================================================== RCS file: /sources/public/perl/modules/W3C/LogValidator/lib/W3C/LogValidator/Basic.pm,v retrieving revision 1.4 retrieving revision 1.5 diff -u -d -r1.4 -r1.5 --- Basic.pm 7 Jun 2004 14:25:54 -0000 1.4 +++ Basic.pm 14 Jul 2004 02:40:01 -0000 1.5 @@ -43,6 +43,34 @@ # unused } + +sub trim_uris +{ + my $self = shift; + my @trimmed_uris; + my $ignore_regexp = ""; + $ignore_regexp = $config{IgnoreArea}; + $ignore_regexp =~ s/\//\\\//g ; + my @ignored_areas = split(" ", $ignore_regexp); + + my $uri; + while ($uri = shift) + { + my $acceptable = 1; + foreach my $area (@ignored_areas) + { + if ($uri =~ /$area/) + { + print "ignoring $uri matching $area \n" if ($verbose > 2) ; + $acceptable = 0; + } + } + push @trimmed_uris,$uri if ($acceptable); + } + return @trimmed_uris; +} + + ######################################### # Actual subroutine to check the list of uris # ######################################### @@ -74,6 +102,8 @@ my @uris = sort { $hits{$b} <=> $hits{$a} } keys %hits; + @uris = $self->trim_uris(@uris); + my $intro="Here are the <census> most popular documents overall for $name."; my @result; my @result_head; Index: CSSValidator.pm =================================================================== RCS file: /sources/public/perl/modules/W3C/LogValidator/lib/W3C/LogValidator/CSSValidator.pm,v retrieving revision 1.5 retrieving revision 1.6 diff -u -d -r1.5 -r1.6 --- CSSValidator.pm 29 Jun 2004 00:01:40 -0000 1.5 +++ CSSValidator.pm 14 Jul 2004 02:40:01 -0000 1.6 @@ -119,6 +119,10 @@ my @authorized_extensions = split(" ", $self->auth_ext); my @trimmed_uris; my $uri; + my $ignore_regexp = ""; + $ignore_regexp = $config{IgnoreArea}; + $ignore_regexp =~ s/\//\\\//g ; + my @ignored_areas = split(" ", $ignore_regexp); while ($uri = shift) { my $uri_ext = ""; @@ -133,9 +137,18 @@ { if ($ext eq $uri_ext) { $match = 1; } } + foreach my $area (@ignored_areas) + { + if ($uri =~ /$area/) + { + print "ignoring $uri matching $area \n" if ($verbose > 2) ; + $match = 0; + } + } + push @trimmed_uris,$uri if ($match); } - return @trimmed_uris; + return @trimmed_uris; } #########################################
Received on Tuesday, 13 July 2004 22:40:05 UTC