perl/modules/W3C/LogValidator/lib/W3C/LogValidator Basic.pm,1.7,1.8 CSSValidator.pm,1.8,1.9 Config.pm,1.5,1.6 HTMLValidator.pm,1.14,1.15 SurveyEngine.pm,1.6,1.7

Update of /sources/public/perl/modules/W3C/LogValidator/lib/W3C/LogValidator
In directory hutz:/tmp/cvs-serv26109

Modified Files:
	Basic.pm CSSValidator.pm Config.pm HTMLValidator.pm 
	SurveyEngine.pm 
Log Message:
(more) documenting API + re-adding uris method

Index: SurveyEngine.pm
===================================================================
RCS file: /sources/public/perl/modules/W3C/LogValidator/lib/W3C/LogValidator/SurveyEngine.pm,v
retrieving revision 1.6
retrieving revision 1.7
diff -u -d -r1.6 -r1.7
--- SurveyEngine.pm	12 Aug 2004 02:28:51 -0000	1.6
+++ SurveyEngine.pm	13 Aug 2004 06:01:12 -0000	1.7
@@ -30,7 +30,7 @@
 	my $proto = shift;
 	my $class = ref($proto) || $proto;
 	# mandatory vars for the API
-	$self->{URIS}	= undef;
+	@{$self->{URIs}} = undef;
 	# internal stuff here
 	# $self->{FOO} = undef;
 	
@@ -55,6 +55,14 @@
 }
 
 
+sub uris
+{
+	my $self = shift;
+	if (@_) { @{$self->{URIs}} = @_ }
+	return @{$self->{URIs}};
+}
+
+
 sub auth_ext
 {
 	my $self=shift;
@@ -130,14 +138,24 @@
 
 
     print "Now Using the SurveyEngine module...\n" if $verbose;
+    my %hits;
+    my @uris;
     use URI::Escape;
     use LWP::UserAgent;
-    use DB_File;
-    my $tmp_file = $config{tmpfile};
-    my %hits;
-    tie (%hits, 'DB_File', "$tmp_file", O_RDONLY) ||
-      die ("Cannot create or open $tmp_file");
-    my @uris = sort { $hits{$b} <=> $hits{$a} } keys %hits;
+    if (defined ($config{tmpfile}))
+	{
+		use DB_File; 
+		my $tmp_file = $config{tmpfile};
+		tie (%hits, 'DB_File', "$tmp_file", O_RDONLY) || 
+		    die ("Cannot create or open $tmp_file");
+		@uris = sort { $hits{$b} <=> $hits{$a} } keys %hits;
+	}
+    elsif ($self->uris())
+	{
+		@uris = $self->uris();
+		foreach my $uri (@uris) { $hits{$uri} = 0 }
+	}
+    @uris = sort { $hits{$b} <=> $hits{$a} } keys %hits;
 					
     my @result_head;
     #push @result_head, "Hits";
@@ -279,16 +297,77 @@
 
 =head1 SYNOPSIS
 
-Module to run websites validity surveys
+  use  W3C::LogValidator::SurveyEngine;
+  my %config = ("verbose" => 2);
+  my $validator = W3C::LogValidator::SurveyEngine->new(\%config);
+  $validator->uris('http://www.w3.org/Overview.html', 'http://www.yahoo.com/index.html');
+  my %results = $validator->process_list;
+
 
 =head1 DESCRIPTION
 
-This module is part of the W3C::LogValidator suite, and ....
+This module is part of the W3C::LogValidator suite, and processes a list of URIs in order
+to produce a validity/quality survey.
+
+This module is experimental.
+
+=head1 API
+
+=head2 Constructor
+
+=over 2
+
+=item $val = W3C::LogValidator::SurveyEngine->new
+
+Constructs a new C<W3C::LogValidator::SurveyEngine> processor.  
+
+You might pass it a configuration hash reference (see L<W3C::LogValidator/config_module> and L<W3C::LogValidator::Config>)
+
+  $validator = W3C::LogValidator::SurveyEngine->new(\%config);  
+
+=back
+
+-head2 General methods
+
+=over 4
+
+=item $val->process_list
+
+Processes a list of sorted URIs through different quality tools to produce a survey of their quality/validity
+
+The list can be set C<uris>. If the $val was given a config has when constructed, and if the has has a "tmpfile" key, C<process_list> will try to read this file as a hash of URIs and "hits" (popularity) with L<DB_File>.
+
+Returns a result hash. Keys for this hash are: 
+
+
+  name (string): the name of the module, i.e "HTMLValidator"
+  intro (string): introduction to the processing results
+  thead (array): headers of the results table
+  trows (array of arrays): rows of the results table
+  outro (string): conclusion of the processing results
+
+
+=item $val->trim_uris 
+
+Given a list of URIs of documents to process, returns a subset of this list containing the URIs of documents the module supposedly can handle.
+The decision is made based on file extensions (see C<auth_ext>) and the ExcludeAreas configuration setting.
+
+
+=item $val->auth_ext
+
+Returns the file extensions (space separated entries in a string) supported by the Module.
+Public method accessing $self->{AUTH_EXT}, itself coming from either the AuthorizedExtensions configuration setting, or a default value
+
+=back
+
+
 
 =head1 AUTHOR
 
 Matthieu Faure  <matthieu@faure.nom.fr>
 
+Maintained by olivier Thereaux <ot@w3.org> for W3C
+
 =head1 SEE ALSO
 
 W3C::LogValidator::LogProcessor, perl(1).

Index: HTMLValidator.pm
===================================================================
RCS file: /sources/public/perl/modules/W3C/LogValidator/lib/W3C/LogValidator/HTMLValidator.pm,v
retrieving revision 1.14
retrieving revision 1.15
diff -u -d -r1.14 -r1.15
--- HTMLValidator.pm	12 Aug 2004 09:12:06 -0000	1.14
+++ HTMLValidator.pm	13 Aug 2004 06:01:12 -0000	1.15
@@ -362,6 +362,9 @@
 This module is part of the W3C::LogValidator suite, and checks HTML validity
 of a given document via the W3C HTML validator service.
 
+=head1 API
+
+=head2 Constructor
 
 =over 2
 
@@ -375,8 +378,11 @@
 
 =back
 
+=head2 Main processing method
 =over 4
 
+
+
 =item $val->process_list
 
 Processes a list of sorted URIs through the W3C Markup Validator.
@@ -392,11 +398,22 @@
   trows (array of arrays): rows of the results table
   outro (string): conclusion of the processing results
 
+=back
+
+=head2 General methods
+
+=over 4
+
+=item $val->uris
+
+Returns a  list of URIs to be processed (unless the configuration gives the location for the hash of URI/hits berkeley file, see C<process_list> 
+If an array is given as a parameter, also sets the list of URIs and returns it.
+
 
 =item $val->trim_uris 
 
 Given a list of URIs of documents to process, returns a subset of this list containing the URIs of documents the module supposedly can handle.
-The decision is made based on file extensions (see C<auth_ext>) and content-type (see C<HEAD_check>) 
+The decision is made based on file extensions (see C<auth_ext>), content-type (see C<HEAD_check>) , and the setting for ExcludedAreas
 
 =item $val->HEAD_check
 

Index: Config.pm
===================================================================
RCS file: /sources/public/perl/modules/W3C/LogValidator/lib/W3C/LogValidator/Config.pm,v
retrieving revision 1.5
retrieving revision 1.6
diff -u -d -r1.5 -r1.6
--- Config.pm	12 Aug 2004 02:28:51 -0000	1.5
+++ Config.pm	13 Aug 2004 06:01:12 -0000	1.6
@@ -204,7 +204,12 @@
 
 =head1 DESCRIPTION
 
-C<W3C::LogValidator::Config> is the 
+C<W3C::LogValidator::Config> parses configuration files or directives for the Log Validator
+
+
+=head1 API
+
+=head2 Constructor
 
 =over 2
 
@@ -217,6 +222,8 @@
 
 =back
 
+=head2 General methods
+
 =over 4
 
 =item $c->configure

Index: Basic.pm
===================================================================
RCS file: /sources/public/perl/modules/W3C/LogValidator/lib/W3C/LogValidator/Basic.pm,v
retrieving revision 1.7
retrieving revision 1.8
diff -u -d -r1.7 -r1.8
--- Basic.pm	12 Aug 2004 02:28:51 -0000	1.7
+++ Basic.pm	13 Aug 2004 06:01:12 -0000	1.8
@@ -31,7 +31,7 @@
         my $proto = shift;
         my $class = ref($proto) || $proto;
 	# mandatory vars for the API
-	$self->{URIS}	= undef;
+	@{$self->{URIs}} = undef;
 	# don't change this
 	if (@_) {%config =  %{(shift)};}
 	if (exists $config{verbose}) {$verbose = $config{verbose}}
@@ -40,7 +40,9 @@
 }
 
 sub uris { 
-# unused
+	my $self = shift;
+	if (@_) { @{$self->{URIs}} = @_ }
+	return @{$self->{URIs}};
 }
 
 
@@ -49,10 +51,13 @@
         my $self = shift;
         my @trimmed_uris;
 	my $exclude_regexp = "";
+	my @exclude_areas;
 	$exclude_regexp = $config{ExcludeAreas};
-	$exclude_regexp =~ s/\//\\\//g ;
-	my @exclude_areas = split(" ", $exclude_regexp);
-
+	if ($exclude_regexp){
+		$exclude_regexp =~ s/\//\\\//g ;
+		@exclude_areas = split(" ", $exclude_regexp);
+	}
+	else { print "nothing to exclude" if ($verbose >2);}
         my $uri;
         while ($uri = shift)
         {
@@ -95,15 +100,21 @@
 	if (exists $config{ServerName}) {$name = $config{ServerName}}
 
 	print "Now Using the Basic module... \n" if $verbose;
-	# Opening the file with the hits and URIs data
-	use DB_File;
-	my $tmp_file = $config{tmpfile};
 	my %hits;
-
-	tie (%hits, 'DB_File', "$tmp_file", O_RDONLY) ||
-        die ("Cannot create or open $tmp_file");
-	my @uris = sort { $hits{$b} <=> $hits{$a} }                                   
-                keys %hits;
+	my @uris = undef;
+	if (defined ($config{tmpfile}))
+	{
+		use DB_File; 
+		my $tmp_file = $config{tmpfile};
+		tie (%hits, 'DB_File', "$tmp_file", O_RDONLY) || 
+		    die ("Cannot create or open $tmp_file");
+		@uris = sort { $hits{$b} <=> $hits{$a} } keys %hits;
+	}
+	elsif ($self->uris())
+	{
+		@uris = $self->uris();
+		foreach my $uri (@uris) { $hits{$uri} = 0 }
+	}
 
         @uris = $self->trim_uris(@uris);
 
@@ -160,17 +171,69 @@
 =head1 SYNOPSIS
 
   use  W3C::LogValidator::Basic;
-  my $validator = new W3C::LogValidator::Basic;
-  my $max_documents = 12;
-	# how many log entries are parsed and returned before we stop
-	# 0 -> processes everything
-  my $result_string= $validator->process_list($max_documents);
+  my $b = new W3C::LogValidator::Basic;
+  $b->uris('http://www.w3.org/Overview.html', 'http://www.yahoo.com/index.html');
+  my $result_string= $b->process_list();
 
 =head1 DESCRIPTION
 
+
 This module is part of the W3C::LogValidator suite, and simply gives back pages
 sorted by popularity. This is an example of simple module for LogValidator.
 
+=head1 API 
+
+=head2 Constructor
+
+=over 2
+
+=item $b = W3C::LogValidator::Basic->new
+
+Constructs a new C<W3C::LogValidator:HTMLBasic> processor.  
+
+You might pass it a configuration hash reference (see L<W3C::LogValidator/config_module> and L<W3C::LogValidator::Config>)
+Particularly relevant for this module are the "verbose", "MaxDocuments" and obviously "tmpfile" (see C<process_list>).
+Pass the configuration hash ref as follows:
+
+  $b = W3C::LogValidator::HTMLValidator->new(\%config);
+
+=back
+
+=head2 General Methods
+
+=over 4
+
+=item b->uris 
+
+Returns a  list of URIs to be processed (unless the configuration gives the location for the hash of URI/hits berkeley file, see C<process_list> 
+If an array is given as a parameter, also sets the list of URIs and returns it.
+Note: while this method is useful in other modules of L<W3C::LogValidator>, this basic module is here to sort URIs extracted from Log Files by popularity, this method is hence rather useless for L<W3C::LogValidator::Basic>.
+
+=item b->trim_uris 
+
+Given a list of URIs of documents to process, returns a subset of this list containing the URIs of documents the module supposedly can handle.
+For this module, the decision is made based on the setting for ExcludedAreas only
+
+
+=item b->process_list
+
+Formats the list of URIs sorted by popularity.
+
+Returns a result hash. Keys for this hash are: 
+
+  name (string): the name of the module, i.e "Basic"
+  intro (string): introduction to the processing results
+  thead (array): headers of the results table
+  trows (array of arrays): rows of the results table
+  outro (string): conclusion of the processing results
+
+=back
+
+=head1 BUGS
+
+Public bug-tracking interface at L<http://www.w3.org/Bugs/Public/>
+
+
 =head1 AUTHOR
 
 Olivier Thereaux <ot@w3.org> for W3C

Index: CSSValidator.pm
===================================================================
RCS file: /sources/public/perl/modules/W3C/LogValidator/lib/W3C/LogValidator/CSSValidator.pm,v
retrieving revision 1.8
retrieving revision 1.9
diff -u -d -r1.8 -r1.9
--- CSSValidator.pm	12 Aug 2004 02:28:51 -0000	1.8
+++ CSSValidator.pm	13 Aug 2004 06:01:12 -0000	1.9
@@ -32,7 +32,7 @@
         my $proto = shift;
         my $class = ref($proto) || $proto;
 	# mandatory vars for the API
-	$self->{URIS}	= undef;
+	@{$self->{URIs}} = undef;
 	# internal stuff here
 	# don't change this
         if (@_) {%config =  %{(shift)};}
@@ -162,6 +162,13 @@
         return @trimmed_uris;
 }
 
+sub uris
+{
+	my $self = shift;
+	if (@_) { @{$self->{URIs}} = @_ }
+	return @{$self->{URIs}};
+}
+
 #########################################
 # Actual subroutine to check the list of uris #
 #########################################
@@ -172,16 +179,26 @@
 	my $self = shift;
 	my $max_invalid = undef;
 	if (exists $config{MaxInvalid}) {$max_invalid = $config{MaxInvalid}}
+	else {$max_invalid = 0}
         my $max_documents = undef;                                                                      
         if (exists $config{MaxDocuments}) {$max_documents = $config{MaxDocuments}}                      
         else {$max_documents = 0}
 	print "Now Using the CSS Validation module...\n" if $verbose;
-	use DB_File;                                                                  
-        my $tmp_file = $config{tmpfile};
-	my %hits;                                                                     
-	tie (%hits, 'DB_File', "$tmp_file", O_RDONLY) ||                              
-	die ("Cannot create or open $tmp_file");                                      
-	my @uris = sort { $hits{$b} <=> $hits{$a} } keys %hits;
+	my @uris = undef;
+	my %hits;
+	if (defined ($config{tmpfile}))
+	{
+		use DB_File; 
+		my $tmp_file = $config{tmpfile};
+		tie (%hits, 'DB_File', "$tmp_file", O_RDONLY) || 
+		    die ("Cannot create or open $tmp_file");
+		@uris = sort { $hits{$b} <=> $hits{$a} } keys %hits;
+	}
+	elsif ($self->uris())
+	{
+		@uris = $self->uris();
+		foreach my $uri (@uris) { $hits{$uri} = 0 }
+	}
 	my $name = "";
 	if (exists $config{ServerName}) {$name = $config{ServerName}}
        	my @result;
@@ -320,11 +337,97 @@
 
 W3C::LogValidator::CSSValidator - Validates CSS style sheets from Web Server logs
 
+=head1 SYNOPSIS
+
+  use  W3C::LogValidator::CSSValidator;
+  my %config = ("verbose" => 2);
+  my $validator = W3C::LogValidator::CSSValidator->new(\%config);
+  $validator->uris('http://www.w3.org/StyleSheets/home.css', 'http://yoda.zoy.org/mt-static/styles.css');
+  my %result= $validator->process_list;
 
 =head1 DESCRIPTION
 
 This module is part of the W3C::LogValidator suite, and is used as an interface to the W3C CSS validation service.
 
+=over 2
+
+=item $val = W3C::LogValidator::CSSValidator->new
+
+Constructs a new C<W3C::LogValidator:CSSValidator> processor.  
+
+You might pass it a configuration hash reference (see L<W3C::LogValidator/config_module> and L<W3C::LogValidator::Config>)
+
+  $validator = W3C::LogValidator::CSSValidator->new(\%config);  
+
+=back
+
+=over 4
+
+=item $val->process_list
+
+Processes a list of sorted URIs through the W3C Markup Validator.
+
+The list can be set C<uris>. If the $val was given a config has when constructed, and if the has has a "tmpfile" key, C<process_list> will try to read this file as a hash of URIs and "hits" (popularity) with L<DB_File>.
+
+Returns a result hash. Keys for this hash are: 
+
+
+  name (string): the name of the module, i.e "CSSValidator"
+  intro (string): introduction to the processing results
+  thead (array): headers of the results table
+  trows (array of arrays): rows of the results table
+  outro (string): conclusion of the processing results
+
+
+=item $val->trim_uris 
+
+Given a list of URIs of documents to process, returns a subset of this list containing the URIs of documents the module supposedly can handle.
+The decision is made based on file extensions (see C<auth_ext>) and content-type (see C<HEAD_check>) 
+
+=item $val->HEAD_check
+
+Checks whether a document with no extension is actually a CSS document through an HTTP HEAD request
+returns 1 if the URI is of an expected content-type, 0 otherwise
+
+=item $val->auth_ext
+
+Returns the file extensions (space separated entries in a string) supported by the Module.
+Public method accessing $self->{AUTH_EXT}, itself coming from either the AuthorizedExtensions configuration setting, or a default value
+
+=item $val->valid
+
+Sets / Returns whether the document being processed has been found to be valid or not.
+If an argument is given, sets the variable, otherwise returns the current variable.
+
+=item $val->valid_err_num
+
+Sets / Returns the number of validation errors for the document being processed.
+If an argument is given, sets the variable, otherwise returns the current variable.
+
+=item $val->valid_success
+
+Sets / Returns whether the module was able to process validation of the current document successfully (regardless of valid/invalid result)
+If an argument is given, sets the variable, otherwise returns the current variable.
+
+=item $val->valid_head
+
+Sets / Returns all HTTP headers returned by the markup validator when attempting to validate the current document.
+If an argument is given, sets the variable, otherwise returns the current variable.
+
+=item $val->new_doc
+
+Resets all validation variables to 'undef'. In effect, prepares the processing module to the handling of a new document.
+
+=back
+
+=head1 BUGS
+
+Public bug-tracking interface at L<http://www.w3.org/Bugs/Public/>
+
+
+
+
+
 =head1 AUTHOR
 
 Olivier Thereaux <ot@w3.org>

Received on Friday, 13 August 2004 06:01:15 UTC