perl/modules/W3C/LogValidator/lib/W3C LogValidator.pm,1.14,1.15

Update of /sources/public/perl/modules/W3C/LogValidator/lib/W3C
In directory hutz:/tmp/cvs-serv15769

Modified Files:
	LogValidator.pm 
Log Message:
remove URI fragments, plus a bug fix where part of the log preprocessing was not done for plain lists of addresses

Index: LogValidator.pm
===================================================================
RCS file: /sources/public/perl/modules/W3C/LogValidator/lib/W3C/LogValidator.pm,v
retrieving revision 1.14
retrieving revision 1.15
diff -u -d -r1.14 -r1.15
--- LogValidator.pm	10 Nov 2004 00:16:47 -0000	1.14
+++ LogValidator.pm	17 May 2005 07:53:34 -0000	1.15
@@ -228,6 +228,7 @@
 		if ($logtype eq "plain")
 		{
 			$tmprecord = $record_arry[0];
+			$tmprecord = $self->remove_duplicates($tmprecord);
 		}
 		elsif ($logtype eq "w3") # our W3C in-house log format
 		{
@@ -255,21 +256,25 @@
 sub remove_duplicates
 # removes "directory index" suffixes such as index.html, etc
 # so that http://foobar/ and http://foobar/index.html be counted as one resource
+# also removes URI fragments
 {
 	my $self = shift;
 	my $tmprecord;
 	if (@_) { $tmprecord = shift;}
+	
+	# remove frags
+	$tmprecord =~ s/\#.*$// if ($tmprecord);
+
+	# remove indexes
 	my $index_file;
 	foreach $index_file (split (" ",$config{LogProcessor}{DirectoryIndex}))
 	{
 		$tmprecord =~ s/$index_file$// if ($tmprecord);
 	}
 	return $tmprecord;
-
 }
 
 
-
 sub hit
 {
 	my $self = shift;

Received on Tuesday, 17 May 2005 08:01:02 UTC