- From: Olivier Thereaux <ot@dev.w3.org>
- Date: Tue, 08 Jun 2004 04:36:33 +0000
- To: www-validator-cvs@w3.org
Update of /sources/public/perl/modules/W3C/LogValidator/lib/W3C/LogValidator In directory hutz:/tmp/cvs-serv24408 Added Files: SurveyEngine.pm Log Message: new generic survey module - courtesy Matthieu Faure, slighlty modified by yours truly --- NEW FILE: SurveyEngine.pm --- # Copyright (c) 2004 the World Wide Web Consortium : # Keio University, # European Research Consortium for Informatics and Mathematics # Massachusetts Institute of Technology. # written by Matthieu Faure <matthieu@faure.nom.fr> for W3C # maintained by olivier Thereaux <ot@w3.org> and Matthieu Faure <matthieu@faure.nom.fr> # SurveyEngine.pm v0.1 2004/05/17 package W3C::LogValidator::SurveyEngine; use strict; use warnings; require Exporter; our @ISA = qw(Exporter); our %EXPORT_TAGS = ( 'all' => [ qw() ] ); our @EXPORT_OK = ( @{ $EXPORT_TAGS{'all'} } ); our @EXPORT = qw(); our $VERSION = '0.1'; ########################### # usual package interface # ########################### our $verbose = 1; our %config; sub new { my $self = {}; my $proto = shift; my $class = ref($proto) || $proto; # mandatory vars for the API $self->{URIS} = undef; # internal stuff here # $self->{FOO} = undef; # don't change this if (@_) {%config = %{(shift)};} if (exists $config{verbose}) {$verbose = $config{verbose}} if (exists $config{AuthorizedExtensions}) { $self->{AUTH_EXT} = $config{AuthorizedExtensions}; } else # same as the formats supported by markup Validator # TODO add support for CSS too, at least { $self->{AUTH_EXT} = ".html .xhtml .phtml .htm .shtml .php .svg .xml /"; } $config{ValidatorHost} = "validator.w3.org" if (! exists $config{ValidatorHost}); $config{ValidatorPort} = "80" if (!exists $config{ValidatorPort}); $config{ValidatorString} = "/check\?uri=" if (!exists $config{ValidatorString}); $config{ValidatorVersion} = "0.6.5" if (!exists $config{ValidatorVersion}); bless($self, $class); return $self; } sub auth_ext { my $self=shift; if (@_) { $self->{AUTH_EXT} = shift} return $self->{AUTH_EXT}; } ######################################### # Actual subroutine to check the list of uris # ######################################### sub process_list { my $self = shift; my $max_invalid = undef; my $max_documents = undef; if ( exists $config{MaxInvalid} ) { $max_invalid = $config{MaxInvalid}; } else {$max_invalid = 0;} if (exists $config{MaxDocuments}) {$max_documents = $config{MaxDocuments}; } else {$max_documents = 0;} # print "$max_documents max documents" if ($verbose > 2); # debug my $name = ""; if (exists $config{ServerName}) {$name = $config{ServerName}} print "Now Using the SurveyEngine module...\n" if $verbose; use URI::Escape; use LWP::UserAgent; use DB_File; my $tmp_file = $config{tmpfile}; my %hits; tie (%hits, 'DB_File', "$tmp_file", O_RDONLY) || die ("Cannot create or open $tmp_file"); my @uris = sort { $hits{$b} <=> $hits{$a} } keys %hits; my @result_head; #push @result_head, "Hits"; push @result_head, "Rank"; push @result_head, "Hits"; push @result_head, "URI"; push @result_head, "Charset"; push @result_head, "Doctype"; push @result_head, "Valid (#err)"; my @result; my $uri = undef; my $ua = new LWP::UserAgent; my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime(time); $year += 1900; $mon = sprintf ( "%02d", $mon); $mday = sprintf ("%02d", $mday); my $localDate = "$year-$mon-$mday" ; my $census = 0; my @trimmed_uris; foreach my $uri (@uris) { my @authorized_extensions = split(" ", $self->auth_ext); foreach my $ext (@authorized_extensions) { if ($uri=~ /$ext$/ ) { push @trimmed_uris,$uri; # print "$uri accepted" if ($verbose >2); #debug } #else { print "$uri left out" if ($verbose >2);} # debug } } @uris = @trimmed_uris; while ((@uris) and (($census < $max_documents) or (!$max_documents)) ) { # a few initializations $uri = shift (@uris); my $uri_orig = $uri; $uri = uri_escape($uri); my @result_tmp = (); print " processing $uri_orig...\n" if ($verbose > 1); print "total $census under $max_documents" if ($verbose > 2); $census = $census+1; # filling result table with "fixed" content push @result_tmp, $census; push @result_tmp, $hits{$uri_orig}; push @result_tmp, $uri_orig; my $validatorUri = join ("", "http://",$config{ValidatorHost},":",$config{ValidatorPort}, $config{ValidatorString},$uri); my $testStringCharset = undef; my $testStringDoctype = undef; my $testStringInvalid = undef; my $testStringValid = undef; my $testStringErrorNum = undef; if ( $config{ValidatorVersion} eq "0.6.1" ) { $testStringCharset = 'I was not able to extract a character encoding labeling from any of'; $testStringDoctype = '<h2>Fatal Error: No DOCTYPE specified!</h2>'; $testStringInvalid = '<h2 id="result" class="invalid">This page is <strong>not</strong> Valid'; $testStringValid = '<h2 id="result" class="valid">This Page Is Valid'; $testStringErrorNum = '<th>Errors: </th>.*?<td>(\d+)</td>'; } else { # Default ValidatorVersion is 0.6.5 (current version as of may 2004) $testStringCharset = 'found are not valid values in the specified Character Encoding'; $testStringDoctype = '<h3>No DOCTYPE Found!'; $testStringInvalid = '<h2 class="invalid">This page is <strong>not</strong> Valid'; $testStringValid = '<h2 id="result" class="valid">This Page Is Valid'; $testStringErrorNum = '<th>Errors: </th>.*?<td>(\d+)</td>'; } my $request = new HTTP::Request("GET", $validatorUri ); my $validatorResponse = new HTTP::Response; $validatorResponse = $ua->simple_request($request); if ( ! $validatorResponse->is_success ) { push @result_tmp, "N/A"; push @result_tmp, "N/A"; push @result_tmp, "can't connect"; } else { # Actual tests if ( $validatorResponse->content =~ $testStringCharset ) { push @result_tmp, "No"; push @result_tmp, "N/A"; push @result_tmp, "N/A"; } elsif ( $validatorResponse->content =~ $testStringDoctype ) { push @result_tmp, "Yes"; push @result_tmp, "No"; push @result_tmp, "N/A"; } elsif ( $validatorResponse->content =~ $testStringInvalid ) { push @result_tmp, "Yes"; push @result_tmp, "Yes"; if ( $validatorResponse->content =~ m!$testStringErrorNum!ms ) { print "Invalid... $1 Errors \n" if $verbose; push @result_tmp, "No ($1)"; } else { push @result_tmp, "No (?)"; } } elsif ( $validatorResponse->content =~ $testStringValid ) { push @result_tmp, "Yes"; push @result_tmp, "Yes"; push @result_tmp, "Yes"; } else { push @result_tmp, "N/A"; push @result_tmp, "N/A"; push @result_tmp, "Could not validate"; } } # store results for this URI in table of results push @result, [@result_tmp]; } my $intro_str = "Here are the $census most popular documents surveyed for $name on ."; print "Done!\n" if $verbose; #print "Result: @result \n" if $verbose; untie %hits; # Here is what the module will return. The hash will be sent to # the output module my %returnhash; # the name of the module $returnhash{"name"}="SurveyEngine"; #intro $returnhash{"intro"}=$intro_str; #Headers for the result table @{$returnhash{"thead"}} = @result_head; # data for the results table @{$returnhash{"trows"}} = @result; #outro $returnhash{"outro"}=""; return %returnhash; } package W3C::LogValidator::SurveyEngine; 1; __END__ =head1 SurveyEngine W3C::LogValidator::SurveyEngine - Processing module for the Log Validator to run websites validity surveys =head1 SYNOPSIS Module to run websites validity surveys =head1 DESCRIPTION This module is part of the W3C::LogValidator suite, and .... =head1 AUTHOR Matthieu Faure <matthieu@faure.nom.fr> =head1 SEE ALSO W3C::LogValidator::LogProcessor, perl(1). Up-to-date complete info at http://www.w3.org/QA/Tools/LogValidator/ =cut
Received on Tuesday, 8 June 2004 01:26:12 UTC