W3C home > Mailing lists > Public > www-validator-cvs@w3.org > November 2008

perl/modules/W3C/LogValidator/lib/W3C LogValidator.pm,1.22,1.23

From: Olivier Thereaux via cvs-syncmail <cvsmail@w3.org>
Date: Fri, 14 Nov 2008 23:16:30 +0000
To: www-validator-cvs@w3.org
Message-Id: <E1L17tu-0005MS-Pi@lionel-hutz.w3.org>

Update of /sources/public/perl/modules/W3C/LogValidator/lib/W3C
In directory hutz:/tmp/cvs-serv20588

Modified Files:
	LogValidator.pm 
Log Message:
if the log format has information about HTTP method, make sure to only keep the GETs

Index: LogValidator.pm
===================================================================
RCS file: /sources/public/perl/modules/W3C/LogValidator/lib/W3C/LogValidator.pm,v
retrieving revision 1.22
retrieving revision 1.23
diff -u -d -r1.22 -r1.23
--- LogValidator.pm	7 Sep 2007 05:46:02 -0000	1.22
+++ LogValidator.pm	14 Nov 2008 23:16:28 -0000	1.23
@@ -275,16 +275,20 @@
 				if ($tmp_record) # not a blank line
 				{
 					my $tmp_record_uri = $self->find_uri($tmp_record, $logtype);
+					my $tmp_record_HTTP_method = $self->find_HTTP_Method($tmp_record, $logtype);
 					my $tmp_record_mime_type = $self->find_mime_type($tmp_record, $logtype);
 					my $tmp_record_HTTP_code = $self->find_HTTP_code($tmp_record, $logtype);
 					my $tmp_record_referer = $self->find_referer($tmp_record, $logtype);
-					if ($self->no_cgi($tmp_record) or ($config{LogProcessor}{ExcludeCGI} eq 0)) {
+					if ( 
+					  ($tmp_record_HTTP_method eq "GET") 
+					and 
+					  ($self->no_cgi($tmp_record) or ($config{LogProcessor}{ExcludeCGI} eq 0))
+					) {
 						$self->add_uri($tmp_record_uri);
 						$self->add_mime_type($tmp_record_uri, $tmp_record_mime_type);
 						$self->add_HTTP_code($tmp_record_uri,$tmp_record_HTTP_code);
 						$self->add_referer($tmp_record_uri,$tmp_record_referer);
 					}
-
 				}
 				$entriescounter++;
 			}
@@ -340,6 +344,35 @@
 	}
 }
 
+sub find_HTTP_Method
+# finds the returned HTTP Method from a log record, if available
+{
+	my $self = shift;
+	if (@_)
+	{
+		my $tmprecord = shift;
+		my @record_arry;
+		@record_arry = split(" ", $tmprecord);
+		# hardcoded to most apache log formats, included common and combined
+		# for the moment... TODO
+		my $logtype = shift;
+		# print "log type $logtype" if ($verbose > 2);
+		if ($logtype eq "plain") 
+		{
+		  # we consider each of those GETs
+			$tmprecord = "GET";
+		}
+		else #common combined full or w3c
+		{
+			$tmprecord = $record_arry[5];
+			$tmprecord =~ s/^"//;
+		}
+	#print "HTTP Code $tmprecord \n" if (($verbose > 2) and ($tmprecord ne ""));
+	return $tmprecord;
+	}
+}
+
+
 sub find_HTTP_code
 # finds the returned HTTP code from a log record, if available
 {
Received on Friday, 14 November 2008 23:16:39 UTC

This archive was generated by hypermail 2.3.1 : Wednesday, 7 January 2015 15:17:34 UTC