- From: Olivier Thereaux via cvs-syncmail <cvsmail@w3.org>
- Date: Tue, 18 Nov 2008 16:04:19 +0000
- To: www-validator-cvs@w3.org
Update of /sources/public/perl/modules/W3C/LogValidator/lib/W3C In directory hutz:/tmp/cvs-serv21547/lib/W3C Modified Files: LogValidator.pm Log Message: Patches by Martin B. Smith for the implementation of an ExcludeHost option http://www.w3.org/Bugs/Public/show_bug.cgi?id=5221 Index: LogValidator.pm =================================================================== RCS file: /sources/public/perl/modules/W3C/LogValidator/lib/W3C/LogValidator.pm,v retrieving revision 1.23 retrieving revision 1.24 diff -u -d -r1.23 -r1.24 --- LogValidator.pm 14 Nov 2008 23:16:28 -0000 1.23 +++ LogValidator.pm 18 Nov 2008 16:04:17 -0000 1.24 @@ -259,6 +259,8 @@ my $self = shift; my $tmp_record; my $entriesperlogfile = $config{LogProcessor}{EntriesPerLogfile}; + my $allskiphosts = ($config{LogProcessor}{ExcludeHosts}) ? $config{LogProcessor}{ExcludeHosts} : ""; # default to none + my @skiphostsregex = split(" ", $allskiphosts); my $entriescounter=0; if (@_) { @@ -274,6 +276,19 @@ my $logtype = $config{LogProcessor}{LogType}{$logfile}; if ($tmp_record) # not a blank line { + my $tmp_record_remote_addr = $self->find_remote_addr($tmp_record, $logtype); + if ($tmp_record_remote_addr) # not a blank remote host or address + { + foreach my $skipexpression (@skiphostsregex) + { + if( $tmp_record_remote_addr =~ /$skipexpression/ ) + { + print " Skipping " . $tmp_record_remote_addr . " because it matches the ExcludeHosts pattern " . $skipexpression. "\n" if ($verbose > 2); + next; + } + } + } + my $tmp_record_uri = $self->find_uri($tmp_record, $logtype); my $tmp_record_HTTP_method = $self->find_HTTP_Method($tmp_record, $logtype); my $tmp_record_mime_type = $self->find_mime_type($tmp_record, $logtype); @@ -337,6 +352,32 @@ $tmprecord = $self->remove_duplicates($tmprecord); if( !( $tmprecord =~ m/^https?\:/ ) ) { $tmprecord = join ("",'http://',$config{LogProcessor}{ServerName},$tmprecord); +sub find_remote_addr +# finds the returned HTTP code from a log record, if available +{ + my $self = shift; + if (@_) + { + my $tmprecord = shift; + my @record_arry; + @record_arry = split(" ", $tmprecord); + # hardcoded to most apache log formats, included common and combined + # for the moment... TODO + my $logtype = shift; + # print "log type $logtype" if ($verbose > 2); + if ($logtype eq "plain") + { + $tmprecord = ""; + } + else #common combined full or w3c + { + $tmprecord = $record_arry[0]; + } + #print "Remote Addr $tmprecord \n" if (($verbose > 2) and ($tmprecord ne "")); + return $tmprecord; + } +} + } } #print "$tmprecord \n" if ($verbose > 2); @@ -661,6 +702,10 @@ =over 4 =item $processor->process +=item $processor->find_remote_addr + +Given a log record and the type of the log (common log format, flat list of URIs, etc), extracts the remote host or ip + Do-it-all method: Read configuration file (if any), parse log files, run them through processing modules, send result to output module.
Received on Tuesday, 18 November 2008 16:04:29 UTC