- From: Olivier Thereaux via cvs-syncmail <cvsmail@w3.org>
- Date: Tue, 17 May 2005 07:53:36 +0000
- To: www-validator-cvs@w3.org
Update of /sources/public/perl/modules/W3C/LogValidator/lib/W3C In directory hutz:/tmp/cvs-serv15769 Modified Files: LogValidator.pm Log Message: remove URI fragments, plus a bug fix where part of the log preprocessing was not done for plain lists of addresses Index: LogValidator.pm =================================================================== RCS file: /sources/public/perl/modules/W3C/LogValidator/lib/W3C/LogValidator.pm,v retrieving revision 1.14 retrieving revision 1.15 diff -u -d -r1.14 -r1.15 --- LogValidator.pm 10 Nov 2004 00:16:47 -0000 1.14 +++ LogValidator.pm 17 May 2005 07:53:34 -0000 1.15 @@ -228,6 +228,7 @@ if ($logtype eq "plain") { $tmprecord = $record_arry[0]; + $tmprecord = $self->remove_duplicates($tmprecord); } elsif ($logtype eq "w3") # our W3C in-house log format { @@ -255,21 +256,25 @@ sub remove_duplicates # removes "directory index" suffixes such as index.html, etc # so that http://foobar/ and http://foobar/index.html be counted as one resource +# also removes URI fragments { my $self = shift; my $tmprecord; if (@_) { $tmprecord = shift;} + + # remove frags + $tmprecord =~ s/\#.*$// if ($tmprecord); + + # remove indexes my $index_file; foreach $index_file (split (" ",$config{LogProcessor}{DirectoryIndex})) { $tmprecord =~ s/$index_file$// if ($tmprecord); } return $tmprecord; - } - sub hit { my $self = shift;
Received on Tuesday, 17 May 2005 08:01:02 UTC