- From: Olivier Thereaux <ot@dev.w3.org>
- Date: Wed, 14 Jul 2004 02:40:04 +0000
- To: www-validator-cvs@w3.org
Update of /sources/public/perl/modules/W3C/LogValidator/lib/W3C/LogValidator
In directory hutz:/tmp/cvs-serv2518
Modified Files:
HTMLValidator.pm CSSValidator.pm SurveyEngine.pm Basic.pm
Log Message:
adding support for IgnoreArea
Index: SurveyEngine.pm
===================================================================
RCS file: /sources/public/perl/modules/W3C/LogValidator/lib/W3C/LogValidator/SurveyEngine.pm,v
retrieving revision 1.2
retrieving revision 1.3
diff -u -d -r1.2 -r1.3
--- SurveyEngine.pm 8 Jun 2004 05:00:09 -0000 1.2
+++ SurveyEngine.pm 14 Jul 2004 02:40:01 -0000 1.3
@@ -62,6 +62,43 @@
return $self->{AUTH_EXT};
}
+sub trim_uris
+{
+ my $self = shift;
+ my @authorized_extensions = split(" ", $self->auth_ext);
+ my @trimmed_uris;
+ my $uri;
+ my $ignore_regexp = "";
+ $ignore_regexp = $config{IgnoreArea};
+ $ignore_regexp =~ s/\//\\\//g ;
+ my @ignored_areas = split(" ", $ignore_regexp);
+ while ($uri = shift)
+ {
+ my $uri_ext = "";
+ my $match = 0;
+ if ($uri =~ /(\.[0-9a-zA-Z]+)$/)
+ {
+ $uri_ext = $1;
+ }
+ elsif ($uri =~ /\/$/) { $uri_ext = "/";}
+ elsif ( $self->HEAD_check($uri) ) { $match = 1; }
+ foreach my $ext (@authorized_extensions)
+ {
+ if ($ext eq $uri_ext) { $match = 1; }
+ }
+ foreach my $area (@ignored_areas)
+ {
+ if ($uri =~ /$area/)
+ {
+ print "ignoring $uri matching $area \n" if ($verbose > 2) ;
+ $match = 0;
+ }
+ }
+
+ push @trimmed_uris,$uri if ($match);
+ }
+ return @trimmed_uris;
+}
#########################################
@@ -112,22 +149,7 @@
my $localDate = "$year-$mon-$mday" ;
my $census = 0;
- my @trimmed_uris;
- foreach my $uri (@uris)
- {
- my @authorized_extensions = split(" ", $self->auth_ext);
- foreach my $ext (@authorized_extensions)
- {
- if ($uri=~ /$ext$/ )
- {
- push @trimmed_uris,$uri;
- # print "$uri accepted" if ($verbose >2); #debug
- }
- #else { print "$uri left out" if ($verbose >2);} # debug
-
- }
- }
- @uris = @trimmed_uris;
+ @uris = $self->trim_uris(@uris);
while ((@uris) and (($census < $max_documents) or (!$max_documents)) )
{
Index: HTMLValidator.pm
===================================================================
RCS file: /sources/public/perl/modules/W3C/LogValidator/lib/W3C/LogValidator/HTMLValidator.pm,v
retrieving revision 1.10
retrieving revision 1.11
diff -u -d -r1.10 -r1.11
--- HTMLValidator.pm 8 Jun 2004 06:36:04 -0000 1.10
+++ HTMLValidator.pm 14 Jul 2004 02:40:01 -0000 1.11
@@ -136,7 +136,12 @@
my $self = shift;
my @authorized_extensions = split(" ", $self->auth_ext);
my @trimmed_uris;
- my $uri;
+ my $ignore_regexp = "";
+ $ignore_regexp = $config{IgnoreArea};
+ $ignore_regexp =~ s/\//\\\//g ;
+ my @ignored_areas = split(" ", $ignore_regexp);
+
+ my $uri;
while ($uri = shift)
{
my $uri_ext = "";
@@ -151,12 +156,20 @@
{
if ($ext eq $uri_ext) { $match = 1; }
}
+ foreach my $area (@ignored_areas)
+ {
+ if ($uri =~ /$area/)
+ {
+ print "ignoring $uri matching $area \n" if ($verbose > 2) ;
+ $match = 0;
+ }
+ }
+
push @trimmed_uris,$uri if ($match);
}
return @trimmed_uris;
}
-
#########################################
# Actual subroutine to check the list of uris #
#########################################
Index: Basic.pm
===================================================================
RCS file: /sources/public/perl/modules/W3C/LogValidator/lib/W3C/LogValidator/Basic.pm,v
retrieving revision 1.4
retrieving revision 1.5
diff -u -d -r1.4 -r1.5
--- Basic.pm 7 Jun 2004 14:25:54 -0000 1.4
+++ Basic.pm 14 Jul 2004 02:40:01 -0000 1.5
@@ -43,6 +43,34 @@
# unused
}
+
+sub trim_uris
+{
+ my $self = shift;
+ my @trimmed_uris;
+ my $ignore_regexp = "";
+ $ignore_regexp = $config{IgnoreArea};
+ $ignore_regexp =~ s/\//\\\//g ;
+ my @ignored_areas = split(" ", $ignore_regexp);
+
+ my $uri;
+ while ($uri = shift)
+ {
+ my $acceptable = 1;
+ foreach my $area (@ignored_areas)
+ {
+ if ($uri =~ /$area/)
+ {
+ print "ignoring $uri matching $area \n" if ($verbose > 2) ;
+ $acceptable = 0;
+ }
+ }
+ push @trimmed_uris,$uri if ($acceptable);
+ }
+ return @trimmed_uris;
+}
+
+
#########################################
# Actual subroutine to check the list of uris #
#########################################
@@ -74,6 +102,8 @@
my @uris = sort { $hits{$b} <=> $hits{$a} }
keys %hits;
+ @uris = $self->trim_uris(@uris);
+
my $intro="Here are the <census> most popular documents overall for $name.";
my @result;
my @result_head;
Index: CSSValidator.pm
===================================================================
RCS file: /sources/public/perl/modules/W3C/LogValidator/lib/W3C/LogValidator/CSSValidator.pm,v
retrieving revision 1.5
retrieving revision 1.6
diff -u -d -r1.5 -r1.6
--- CSSValidator.pm 29 Jun 2004 00:01:40 -0000 1.5
+++ CSSValidator.pm 14 Jul 2004 02:40:01 -0000 1.6
@@ -119,6 +119,10 @@
my @authorized_extensions = split(" ", $self->auth_ext);
my @trimmed_uris;
my $uri;
+ my $ignore_regexp = "";
+ $ignore_regexp = $config{IgnoreArea};
+ $ignore_regexp =~ s/\//\\\//g ;
+ my @ignored_areas = split(" ", $ignore_regexp);
while ($uri = shift)
{
my $uri_ext = "";
@@ -133,9 +137,18 @@
{
if ($ext eq $uri_ext) { $match = 1; }
}
+ foreach my $area (@ignored_areas)
+ {
+ if ($uri =~ /$area/)
+ {
+ print "ignoring $uri matching $area \n" if ($verbose > 2) ;
+ $match = 0;
+ }
+ }
+
push @trimmed_uris,$uri if ($match);
}
- return @trimmed_uris;
+ return @trimmed_uris;
}
#########################################
Received on Tuesday, 13 July 2004 22:40:05 UTC