- From: Olivier Thereaux <ot@dev.w3.org>
- Date: Fri, 13 Aug 2004 06:01:14 +0000
- To: www-validator-cvs@w3.org
Update of /sources/public/perl/modules/W3C/LogValidator/lib/W3C/LogValidator
In directory hutz:/tmp/cvs-serv26109
Modified Files:
Basic.pm CSSValidator.pm Config.pm HTMLValidator.pm
SurveyEngine.pm
Log Message:
(more) documenting API + re-adding uris method
Index: SurveyEngine.pm
===================================================================
RCS file: /sources/public/perl/modules/W3C/LogValidator/lib/W3C/LogValidator/SurveyEngine.pm,v
retrieving revision 1.6
retrieving revision 1.7
diff -u -d -r1.6 -r1.7
--- SurveyEngine.pm 12 Aug 2004 02:28:51 -0000 1.6
+++ SurveyEngine.pm 13 Aug 2004 06:01:12 -0000 1.7
@@ -30,7 +30,7 @@
my $proto = shift;
my $class = ref($proto) || $proto;
# mandatory vars for the API
- $self->{URIS} = undef;
+ @{$self->{URIs}} = undef;
# internal stuff here
# $self->{FOO} = undef;
@@ -55,6 +55,14 @@
}
+sub uris
+{
+ my $self = shift;
+ if (@_) { @{$self->{URIs}} = @_ }
+ return @{$self->{URIs}};
+}
+
+
sub auth_ext
{
my $self=shift;
@@ -130,14 +138,24 @@
print "Now Using the SurveyEngine module...\n" if $verbose;
+ my %hits;
+ my @uris;
use URI::Escape;
use LWP::UserAgent;
- use DB_File;
- my $tmp_file = $config{tmpfile};
- my %hits;
- tie (%hits, 'DB_File', "$tmp_file", O_RDONLY) ||
- die ("Cannot create or open $tmp_file");
- my @uris = sort { $hits{$b} <=> $hits{$a} } keys %hits;
+ if (defined ($config{tmpfile}))
+ {
+ use DB_File;
+ my $tmp_file = $config{tmpfile};
+ tie (%hits, 'DB_File', "$tmp_file", O_RDONLY) ||
+ die ("Cannot create or open $tmp_file");
+ @uris = sort { $hits{$b} <=> $hits{$a} } keys %hits;
+ }
+ elsif ($self->uris())
+ {
+ @uris = $self->uris();
+ foreach my $uri (@uris) { $hits{$uri} = 0 }
+ }
+ @uris = sort { $hits{$b} <=> $hits{$a} } keys %hits;
my @result_head;
#push @result_head, "Hits";
@@ -279,16 +297,77 @@
=head1 SYNOPSIS
-Module to run websites validity surveys
+ use W3C::LogValidator::SurveyEngine;
+ my %config = ("verbose" => 2);
+ my $validator = W3C::LogValidator::SurveyEngine->new(\%config);
+ $validator->uris('http://www.w3.org/Overview.html', 'http://www.yahoo.com/index.html');
+ my %results = $validator->process_list;
+
=head1 DESCRIPTION
-This module is part of the W3C::LogValidator suite, and ....
+This module is part of the W3C::LogValidator suite, and processes a list of URIs in order
+to produce a validity/quality survey.
+
+This module is experimental.
+
+=head1 API
+
+=head2 Constructor
+
+=over 2
+
+=item $val = W3C::LogValidator::SurveyEngine->new
+
+Constructs a new C<W3C::LogValidator::SurveyEngine> processor.
+
+You might pass it a configuration hash reference (see L<W3C::LogValidator/config_module> and L<W3C::LogValidator::Config>)
+
+ $validator = W3C::LogValidator::SurveyEngine->new(\%config);
+
+=back
+
+-head2 General methods
+
+=over 4
+
+=item $val->process_list
+
+Processes a list of sorted URIs through different quality tools to produce a survey of their quality/validity
+
+The list can be set C<uris>. If the $val was given a config has when constructed, and if the has has a "tmpfile" key, C<process_list> will try to read this file as a hash of URIs and "hits" (popularity) with L<DB_File>.
+
+Returns a result hash. Keys for this hash are:
+
+
+ name (string): the name of the module, i.e "HTMLValidator"
+ intro (string): introduction to the processing results
+ thead (array): headers of the results table
+ trows (array of arrays): rows of the results table
+ outro (string): conclusion of the processing results
+
+
+=item $val->trim_uris
+
+Given a list of URIs of documents to process, returns a subset of this list containing the URIs of documents the module supposedly can handle.
+The decision is made based on file extensions (see C<auth_ext>) and the ExcludeAreas configuration setting.
+
+
+=item $val->auth_ext
+
+Returns the file extensions (space separated entries in a string) supported by the Module.
+Public method accessing $self->{AUTH_EXT}, itself coming from either the AuthorizedExtensions configuration setting, or a default value
+
+=back
+
+
=head1 AUTHOR
Matthieu Faure <matthieu@faure.nom.fr>
+Maintained by olivier Thereaux <ot@w3.org> for W3C
+
=head1 SEE ALSO
W3C::LogValidator::LogProcessor, perl(1).
Index: HTMLValidator.pm
===================================================================
RCS file: /sources/public/perl/modules/W3C/LogValidator/lib/W3C/LogValidator/HTMLValidator.pm,v
retrieving revision 1.14
retrieving revision 1.15
diff -u -d -r1.14 -r1.15
--- HTMLValidator.pm 12 Aug 2004 09:12:06 -0000 1.14
+++ HTMLValidator.pm 13 Aug 2004 06:01:12 -0000 1.15
@@ -362,6 +362,9 @@
This module is part of the W3C::LogValidator suite, and checks HTML validity
of a given document via the W3C HTML validator service.
+=head1 API
+
+=head2 Constructor
=over 2
@@ -375,8 +378,11 @@
=back
+=head2 Main processing method
=over 4
+
+
=item $val->process_list
Processes a list of sorted URIs through the W3C Markup Validator.
@@ -392,11 +398,22 @@
trows (array of arrays): rows of the results table
outro (string): conclusion of the processing results
+=back
+
+=head2 General methods
+
+=over 4
+
+=item $val->uris
+
+Returns a list of URIs to be processed (unless the configuration gives the location for the hash of URI/hits berkeley file, see C<process_list>
+If an array is given as a parameter, also sets the list of URIs and returns it.
+
=item $val->trim_uris
Given a list of URIs of documents to process, returns a subset of this list containing the URIs of documents the module supposedly can handle.
-The decision is made based on file extensions (see C<auth_ext>) and content-type (see C<HEAD_check>)
+The decision is made based on file extensions (see C<auth_ext>), content-type (see C<HEAD_check>) , and the setting for ExcludedAreas
=item $val->HEAD_check
Index: Config.pm
===================================================================
RCS file: /sources/public/perl/modules/W3C/LogValidator/lib/W3C/LogValidator/Config.pm,v
retrieving revision 1.5
retrieving revision 1.6
diff -u -d -r1.5 -r1.6
--- Config.pm 12 Aug 2004 02:28:51 -0000 1.5
+++ Config.pm 13 Aug 2004 06:01:12 -0000 1.6
@@ -204,7 +204,12 @@
=head1 DESCRIPTION
-C<W3C::LogValidator::Config> is the
+C<W3C::LogValidator::Config> parses configuration files or directives for the Log Validator
+
+
+=head1 API
+
+=head2 Constructor
=over 2
@@ -217,6 +222,8 @@
=back
+=head2 General methods
+
=over 4
=item $c->configure
Index: Basic.pm
===================================================================
RCS file: /sources/public/perl/modules/W3C/LogValidator/lib/W3C/LogValidator/Basic.pm,v
retrieving revision 1.7
retrieving revision 1.8
diff -u -d -r1.7 -r1.8
--- Basic.pm 12 Aug 2004 02:28:51 -0000 1.7
+++ Basic.pm 13 Aug 2004 06:01:12 -0000 1.8
@@ -31,7 +31,7 @@
my $proto = shift;
my $class = ref($proto) || $proto;
# mandatory vars for the API
- $self->{URIS} = undef;
+ @{$self->{URIs}} = undef;
# don't change this
if (@_) {%config = %{(shift)};}
if (exists $config{verbose}) {$verbose = $config{verbose}}
@@ -40,7 +40,9 @@
}
sub uris {
-# unused
+ my $self = shift;
+ if (@_) { @{$self->{URIs}} = @_ }
+ return @{$self->{URIs}};
}
@@ -49,10 +51,13 @@
my $self = shift;
my @trimmed_uris;
my $exclude_regexp = "";
+ my @exclude_areas;
$exclude_regexp = $config{ExcludeAreas};
- $exclude_regexp =~ s/\//\\\//g ;
- my @exclude_areas = split(" ", $exclude_regexp);
-
+ if ($exclude_regexp){
+ $exclude_regexp =~ s/\//\\\//g ;
+ @exclude_areas = split(" ", $exclude_regexp);
+ }
+ else { print "nothing to exclude" if ($verbose >2);}
my $uri;
while ($uri = shift)
{
@@ -95,15 +100,21 @@
if (exists $config{ServerName}) {$name = $config{ServerName}}
print "Now Using the Basic module... \n" if $verbose;
- # Opening the file with the hits and URIs data
- use DB_File;
- my $tmp_file = $config{tmpfile};
my %hits;
-
- tie (%hits, 'DB_File', "$tmp_file", O_RDONLY) ||
- die ("Cannot create or open $tmp_file");
- my @uris = sort { $hits{$b} <=> $hits{$a} }
- keys %hits;
+ my @uris = undef;
+ if (defined ($config{tmpfile}))
+ {
+ use DB_File;
+ my $tmp_file = $config{tmpfile};
+ tie (%hits, 'DB_File', "$tmp_file", O_RDONLY) ||
+ die ("Cannot create or open $tmp_file");
+ @uris = sort { $hits{$b} <=> $hits{$a} } keys %hits;
+ }
+ elsif ($self->uris())
+ {
+ @uris = $self->uris();
+ foreach my $uri (@uris) { $hits{$uri} = 0 }
+ }
@uris = $self->trim_uris(@uris);
@@ -160,17 +171,69 @@
=head1 SYNOPSIS
use W3C::LogValidator::Basic;
- my $validator = new W3C::LogValidator::Basic;
- my $max_documents = 12;
- # how many log entries are parsed and returned before we stop
- # 0 -> processes everything
- my $result_string= $validator->process_list($max_documents);
+ my $b = new W3C::LogValidator::Basic;
+ $b->uris('http://www.w3.org/Overview.html', 'http://www.yahoo.com/index.html');
+ my $result_string= $b->process_list();
=head1 DESCRIPTION
+
This module is part of the W3C::LogValidator suite, and simply gives back pages
sorted by popularity. This is an example of simple module for LogValidator.
+=head1 API
+
+=head2 Constructor
+
+=over 2
+
+=item $b = W3C::LogValidator::Basic->new
+
+Constructs a new C<W3C::LogValidator:HTMLBasic> processor.
+
+You might pass it a configuration hash reference (see L<W3C::LogValidator/config_module> and L<W3C::LogValidator::Config>)
+Particularly relevant for this module are the "verbose", "MaxDocuments" and obviously "tmpfile" (see C<process_list>).
+Pass the configuration hash ref as follows:
+
+ $b = W3C::LogValidator::HTMLValidator->new(\%config);
+
+=back
+
+=head2 General Methods
+
+=over 4
+
+=item b->uris
+
+Returns a list of URIs to be processed (unless the configuration gives the location for the hash of URI/hits berkeley file, see C<process_list>
+If an array is given as a parameter, also sets the list of URIs and returns it.
+Note: while this method is useful in other modules of L<W3C::LogValidator>, this basic module is here to sort URIs extracted from Log Files by popularity, this method is hence rather useless for L<W3C::LogValidator::Basic>.
+
+=item b->trim_uris
+
+Given a list of URIs of documents to process, returns a subset of this list containing the URIs of documents the module supposedly can handle.
+For this module, the decision is made based on the setting for ExcludedAreas only
+
+
+=item b->process_list
+
+Formats the list of URIs sorted by popularity.
+
+Returns a result hash. Keys for this hash are:
+
+ name (string): the name of the module, i.e "Basic"
+ intro (string): introduction to the processing results
+ thead (array): headers of the results table
+ trows (array of arrays): rows of the results table
+ outro (string): conclusion of the processing results
+
+=back
+
+=head1 BUGS
+
+Public bug-tracking interface at L<http://www.w3.org/Bugs/Public/>
+
+
=head1 AUTHOR
Olivier Thereaux <ot@w3.org> for W3C
Index: CSSValidator.pm
===================================================================
RCS file: /sources/public/perl/modules/W3C/LogValidator/lib/W3C/LogValidator/CSSValidator.pm,v
retrieving revision 1.8
retrieving revision 1.9
diff -u -d -r1.8 -r1.9
--- CSSValidator.pm 12 Aug 2004 02:28:51 -0000 1.8
+++ CSSValidator.pm 13 Aug 2004 06:01:12 -0000 1.9
@@ -32,7 +32,7 @@
my $proto = shift;
my $class = ref($proto) || $proto;
# mandatory vars for the API
- $self->{URIS} = undef;
+ @{$self->{URIs}} = undef;
# internal stuff here
# don't change this
if (@_) {%config = %{(shift)};}
@@ -162,6 +162,13 @@
return @trimmed_uris;
}
+sub uris
+{
+ my $self = shift;
+ if (@_) { @{$self->{URIs}} = @_ }
+ return @{$self->{URIs}};
+}
+
#########################################
# Actual subroutine to check the list of uris #
#########################################
@@ -172,16 +179,26 @@
my $self = shift;
my $max_invalid = undef;
if (exists $config{MaxInvalid}) {$max_invalid = $config{MaxInvalid}}
+ else {$max_invalid = 0}
my $max_documents = undef;
if (exists $config{MaxDocuments}) {$max_documents = $config{MaxDocuments}}
else {$max_documents = 0}
print "Now Using the CSS Validation module...\n" if $verbose;
- use DB_File;
- my $tmp_file = $config{tmpfile};
- my %hits;
- tie (%hits, 'DB_File', "$tmp_file", O_RDONLY) ||
- die ("Cannot create or open $tmp_file");
- my @uris = sort { $hits{$b} <=> $hits{$a} } keys %hits;
+ my @uris = undef;
+ my %hits;
+ if (defined ($config{tmpfile}))
+ {
+ use DB_File;
+ my $tmp_file = $config{tmpfile};
+ tie (%hits, 'DB_File', "$tmp_file", O_RDONLY) ||
+ die ("Cannot create or open $tmp_file");
+ @uris = sort { $hits{$b} <=> $hits{$a} } keys %hits;
+ }
+ elsif ($self->uris())
+ {
+ @uris = $self->uris();
+ foreach my $uri (@uris) { $hits{$uri} = 0 }
+ }
my $name = "";
if (exists $config{ServerName}) {$name = $config{ServerName}}
my @result;
@@ -320,11 +337,97 @@
W3C::LogValidator::CSSValidator - Validates CSS style sheets from Web Server logs
+=head1 SYNOPSIS
+
+ use W3C::LogValidator::CSSValidator;
+ my %config = ("verbose" => 2);
+ my $validator = W3C::LogValidator::CSSValidator->new(\%config);
+ $validator->uris('http://www.w3.org/StyleSheets/home.css', 'http://yoda.zoy.org/mt-static/styles.css');
+ my %result= $validator->process_list;
=head1 DESCRIPTION
This module is part of the W3C::LogValidator suite, and is used as an interface to the W3C CSS validation service.
+=over 2
+
+=item $val = W3C::LogValidator::CSSValidator->new
+
+Constructs a new C<W3C::LogValidator:CSSValidator> processor.
+
+You might pass it a configuration hash reference (see L<W3C::LogValidator/config_module> and L<W3C::LogValidator::Config>)
+
+ $validator = W3C::LogValidator::CSSValidator->new(\%config);
+
+=back
+
+=over 4
+
+=item $val->process_list
+
+Processes a list of sorted URIs through the W3C Markup Validator.
+
+The list can be set C<uris>. If the $val was given a config has when constructed, and if the has has a "tmpfile" key, C<process_list> will try to read this file as a hash of URIs and "hits" (popularity) with L<DB_File>.
+
+Returns a result hash. Keys for this hash are:
+
+
+ name (string): the name of the module, i.e "CSSValidator"
+ intro (string): introduction to the processing results
+ thead (array): headers of the results table
+ trows (array of arrays): rows of the results table
+ outro (string): conclusion of the processing results
+
+
+=item $val->trim_uris
+
+Given a list of URIs of documents to process, returns a subset of this list containing the URIs of documents the module supposedly can handle.
+The decision is made based on file extensions (see C<auth_ext>) and content-type (see C<HEAD_check>)
+
+=item $val->HEAD_check
+
+Checks whether a document with no extension is actually a CSS document through an HTTP HEAD request
+returns 1 if the URI is of an expected content-type, 0 otherwise
+
+=item $val->auth_ext
+
+Returns the file extensions (space separated entries in a string) supported by the Module.
+Public method accessing $self->{AUTH_EXT}, itself coming from either the AuthorizedExtensions configuration setting, or a default value
+
+=item $val->valid
+
+Sets / Returns whether the document being processed has been found to be valid or not.
+If an argument is given, sets the variable, otherwise returns the current variable.
+
+=item $val->valid_err_num
+
+Sets / Returns the number of validation errors for the document being processed.
+If an argument is given, sets the variable, otherwise returns the current variable.
+
+=item $val->valid_success
+
+Sets / Returns whether the module was able to process validation of the current document successfully (regardless of valid/invalid result)
+If an argument is given, sets the variable, otherwise returns the current variable.
+
+=item $val->valid_head
+
+Sets / Returns all HTTP headers returned by the markup validator when attempting to validate the current document.
+If an argument is given, sets the variable, otherwise returns the current variable.
+
+=item $val->new_doc
+
+Resets all validation variables to 'undef'. In effect, prepares the processing module to the handling of a new document.
+
+=back
+
+=head1 BUGS
+
+Public bug-tracking interface at L<http://www.w3.org/Bugs/Public/>
+
+
+
+
+
=head1 AUTHOR
Olivier Thereaux <ot@w3.org>
Received on Friday, 13 August 2004 06:01:15 UTC