- From: Olivier Thereaux via cvs-syncmail <cvsmail@w3.org>
- Date: Tue, 17 May 2005 07:53:36 +0000
- To: www-validator-cvs@w3.org
Update of /sources/public/perl/modules/W3C/LogValidator/lib/W3C
In directory hutz:/tmp/cvs-serv15769
Modified Files:
LogValidator.pm
Log Message:
remove URI fragments, plus a bug fix where part of the log preprocessing was not done for plain lists of addresses
Index: LogValidator.pm
===================================================================
RCS file: /sources/public/perl/modules/W3C/LogValidator/lib/W3C/LogValidator.pm,v
retrieving revision 1.14
retrieving revision 1.15
diff -u -d -r1.14 -r1.15
--- LogValidator.pm 10 Nov 2004 00:16:47 -0000 1.14
+++ LogValidator.pm 17 May 2005 07:53:34 -0000 1.15
@@ -228,6 +228,7 @@
if ($logtype eq "plain")
{
$tmprecord = $record_arry[0];
+ $tmprecord = $self->remove_duplicates($tmprecord);
}
elsif ($logtype eq "w3") # our W3C in-house log format
{
@@ -255,21 +256,25 @@
sub remove_duplicates
# removes "directory index" suffixes such as index.html, etc
# so that http://foobar/ and http://foobar/index.html be counted as one resource
+# also removes URI fragments
{
my $self = shift;
my $tmprecord;
if (@_) { $tmprecord = shift;}
+
+ # remove frags
+ $tmprecord =~ s/\#.*$// if ($tmprecord);
+
+ # remove indexes
my $index_file;
foreach $index_file (split (" ",$config{LogProcessor}{DirectoryIndex}))
{
$tmprecord =~ s/$index_file$// if ($tmprecord);
}
return $tmprecord;
-
}
-
sub hit
{
my $self = shift;
Received on Tuesday, 17 May 2005 08:01:02 UTC