- From: Olivier Thereaux <ot@dev.w3.org>
- Date: Fri, 13 Aug 2004 06:01:14 +0000
- To: www-validator-cvs@w3.org
Update of /sources/public/perl/modules/W3C/LogValidator/lib/W3C/LogValidator In directory hutz:/tmp/cvs-serv26109 Modified Files: Basic.pm CSSValidator.pm Config.pm HTMLValidator.pm SurveyEngine.pm Log Message: (more) documenting API + re-adding uris method Index: SurveyEngine.pm =================================================================== RCS file: /sources/public/perl/modules/W3C/LogValidator/lib/W3C/LogValidator/SurveyEngine.pm,v retrieving revision 1.6 retrieving revision 1.7 diff -u -d -r1.6 -r1.7 --- SurveyEngine.pm 12 Aug 2004 02:28:51 -0000 1.6 +++ SurveyEngine.pm 13 Aug 2004 06:01:12 -0000 1.7 @@ -30,7 +30,7 @@ my $proto = shift; my $class = ref($proto) || $proto; # mandatory vars for the API - $self->{URIS} = undef; + @{$self->{URIs}} = undef; # internal stuff here # $self->{FOO} = undef; @@ -55,6 +55,14 @@ } +sub uris +{ + my $self = shift; + if (@_) { @{$self->{URIs}} = @_ } + return @{$self->{URIs}}; +} + + sub auth_ext { my $self=shift; @@ -130,14 +138,24 @@ print "Now Using the SurveyEngine module...\n" if $verbose; + my %hits; + my @uris; use URI::Escape; use LWP::UserAgent; - use DB_File; - my $tmp_file = $config{tmpfile}; - my %hits; - tie (%hits, 'DB_File', "$tmp_file", O_RDONLY) || - die ("Cannot create or open $tmp_file"); - my @uris = sort { $hits{$b} <=> $hits{$a} } keys %hits; + if (defined ($config{tmpfile})) + { + use DB_File; + my $tmp_file = $config{tmpfile}; + tie (%hits, 'DB_File', "$tmp_file", O_RDONLY) || + die ("Cannot create or open $tmp_file"); + @uris = sort { $hits{$b} <=> $hits{$a} } keys %hits; + } + elsif ($self->uris()) + { + @uris = $self->uris(); + foreach my $uri (@uris) { $hits{$uri} = 0 } + } + @uris = sort { $hits{$b} <=> $hits{$a} } keys %hits; my @result_head; #push @result_head, "Hits"; @@ -279,16 +297,77 @@ =head1 SYNOPSIS -Module to run websites validity surveys + use W3C::LogValidator::SurveyEngine; + my %config = ("verbose" => 2); + my $validator = W3C::LogValidator::SurveyEngine->new(\%config); + $validator->uris('http://www.w3.org/Overview.html', 'http://www.yahoo.com/index.html'); + my %results = $validator->process_list; + =head1 DESCRIPTION -This module is part of the W3C::LogValidator suite, and .... +This module is part of the W3C::LogValidator suite, and processes a list of URIs in order +to produce a validity/quality survey. + +This module is experimental. + +=head1 API + +=head2 Constructor + +=over 2 + +=item $val = W3C::LogValidator::SurveyEngine->new + +Constructs a new C<W3C::LogValidator::SurveyEngine> processor. + +You might pass it a configuration hash reference (see L<W3C::LogValidator/config_module> and L<W3C::LogValidator::Config>) + + $validator = W3C::LogValidator::SurveyEngine->new(\%config); + +=back + +-head2 General methods + +=over 4 + +=item $val->process_list + +Processes a list of sorted URIs through different quality tools to produce a survey of their quality/validity + +The list can be set C<uris>. If the $val was given a config has when constructed, and if the has has a "tmpfile" key, C<process_list> will try to read this file as a hash of URIs and "hits" (popularity) with L<DB_File>. + +Returns a result hash. Keys for this hash are: + + + name (string): the name of the module, i.e "HTMLValidator" + intro (string): introduction to the processing results + thead (array): headers of the results table + trows (array of arrays): rows of the results table + outro (string): conclusion of the processing results + + +=item $val->trim_uris + +Given a list of URIs of documents to process, returns a subset of this list containing the URIs of documents the module supposedly can handle. +The decision is made based on file extensions (see C<auth_ext>) and the ExcludeAreas configuration setting. + + +=item $val->auth_ext + +Returns the file extensions (space separated entries in a string) supported by the Module. +Public method accessing $self->{AUTH_EXT}, itself coming from either the AuthorizedExtensions configuration setting, or a default value + +=back + + =head1 AUTHOR Matthieu Faure <matthieu@faure.nom.fr> +Maintained by olivier Thereaux <ot@w3.org> for W3C + =head1 SEE ALSO W3C::LogValidator::LogProcessor, perl(1). Index: HTMLValidator.pm =================================================================== RCS file: /sources/public/perl/modules/W3C/LogValidator/lib/W3C/LogValidator/HTMLValidator.pm,v retrieving revision 1.14 retrieving revision 1.15 diff -u -d -r1.14 -r1.15 --- HTMLValidator.pm 12 Aug 2004 09:12:06 -0000 1.14 +++ HTMLValidator.pm 13 Aug 2004 06:01:12 -0000 1.15 @@ -362,6 +362,9 @@ This module is part of the W3C::LogValidator suite, and checks HTML validity of a given document via the W3C HTML validator service. +=head1 API + +=head2 Constructor =over 2 @@ -375,8 +378,11 @@ =back +=head2 Main processing method =over 4 + + =item $val->process_list Processes a list of sorted URIs through the W3C Markup Validator. @@ -392,11 +398,22 @@ trows (array of arrays): rows of the results table outro (string): conclusion of the processing results +=back + +=head2 General methods + +=over 4 + +=item $val->uris + +Returns a list of URIs to be processed (unless the configuration gives the location for the hash of URI/hits berkeley file, see C<process_list> +If an array is given as a parameter, also sets the list of URIs and returns it. + =item $val->trim_uris Given a list of URIs of documents to process, returns a subset of this list containing the URIs of documents the module supposedly can handle. -The decision is made based on file extensions (see C<auth_ext>) and content-type (see C<HEAD_check>) +The decision is made based on file extensions (see C<auth_ext>), content-type (see C<HEAD_check>) , and the setting for ExcludedAreas =item $val->HEAD_check Index: Config.pm =================================================================== RCS file: /sources/public/perl/modules/W3C/LogValidator/lib/W3C/LogValidator/Config.pm,v retrieving revision 1.5 retrieving revision 1.6 diff -u -d -r1.5 -r1.6 --- Config.pm 12 Aug 2004 02:28:51 -0000 1.5 +++ Config.pm 13 Aug 2004 06:01:12 -0000 1.6 @@ -204,7 +204,12 @@ =head1 DESCRIPTION -C<W3C::LogValidator::Config> is the +C<W3C::LogValidator::Config> parses configuration files or directives for the Log Validator + + +=head1 API + +=head2 Constructor =over 2 @@ -217,6 +222,8 @@ =back +=head2 General methods + =over 4 =item $c->configure Index: Basic.pm =================================================================== RCS file: /sources/public/perl/modules/W3C/LogValidator/lib/W3C/LogValidator/Basic.pm,v retrieving revision 1.7 retrieving revision 1.8 diff -u -d -r1.7 -r1.8 --- Basic.pm 12 Aug 2004 02:28:51 -0000 1.7 +++ Basic.pm 13 Aug 2004 06:01:12 -0000 1.8 @@ -31,7 +31,7 @@ my $proto = shift; my $class = ref($proto) || $proto; # mandatory vars for the API - $self->{URIS} = undef; + @{$self->{URIs}} = undef; # don't change this if (@_) {%config = %{(shift)};} if (exists $config{verbose}) {$verbose = $config{verbose}} @@ -40,7 +40,9 @@ } sub uris { -# unused + my $self = shift; + if (@_) { @{$self->{URIs}} = @_ } + return @{$self->{URIs}}; } @@ -49,10 +51,13 @@ my $self = shift; my @trimmed_uris; my $exclude_regexp = ""; + my @exclude_areas; $exclude_regexp = $config{ExcludeAreas}; - $exclude_regexp =~ s/\//\\\//g ; - my @exclude_areas = split(" ", $exclude_regexp); - + if ($exclude_regexp){ + $exclude_regexp =~ s/\//\\\//g ; + @exclude_areas = split(" ", $exclude_regexp); + } + else { print "nothing to exclude" if ($verbose >2);} my $uri; while ($uri = shift) { @@ -95,15 +100,21 @@ if (exists $config{ServerName}) {$name = $config{ServerName}} print "Now Using the Basic module... \n" if $verbose; - # Opening the file with the hits and URIs data - use DB_File; - my $tmp_file = $config{tmpfile}; my %hits; - - tie (%hits, 'DB_File', "$tmp_file", O_RDONLY) || - die ("Cannot create or open $tmp_file"); - my @uris = sort { $hits{$b} <=> $hits{$a} } - keys %hits; + my @uris = undef; + if (defined ($config{tmpfile})) + { + use DB_File; + my $tmp_file = $config{tmpfile}; + tie (%hits, 'DB_File', "$tmp_file", O_RDONLY) || + die ("Cannot create or open $tmp_file"); + @uris = sort { $hits{$b} <=> $hits{$a} } keys %hits; + } + elsif ($self->uris()) + { + @uris = $self->uris(); + foreach my $uri (@uris) { $hits{$uri} = 0 } + } @uris = $self->trim_uris(@uris); @@ -160,17 +171,69 @@ =head1 SYNOPSIS use W3C::LogValidator::Basic; - my $validator = new W3C::LogValidator::Basic; - my $max_documents = 12; - # how many log entries are parsed and returned before we stop - # 0 -> processes everything - my $result_string= $validator->process_list($max_documents); + my $b = new W3C::LogValidator::Basic; + $b->uris('http://www.w3.org/Overview.html', 'http://www.yahoo.com/index.html'); + my $result_string= $b->process_list(); =head1 DESCRIPTION + This module is part of the W3C::LogValidator suite, and simply gives back pages sorted by popularity. This is an example of simple module for LogValidator. +=head1 API + +=head2 Constructor + +=over 2 + +=item $b = W3C::LogValidator::Basic->new + +Constructs a new C<W3C::LogValidator:HTMLBasic> processor. + +You might pass it a configuration hash reference (see L<W3C::LogValidator/config_module> and L<W3C::LogValidator::Config>) +Particularly relevant for this module are the "verbose", "MaxDocuments" and obviously "tmpfile" (see C<process_list>). +Pass the configuration hash ref as follows: + + $b = W3C::LogValidator::HTMLValidator->new(\%config); + +=back + +=head2 General Methods + +=over 4 + +=item b->uris + +Returns a list of URIs to be processed (unless the configuration gives the location for the hash of URI/hits berkeley file, see C<process_list> +If an array is given as a parameter, also sets the list of URIs and returns it. +Note: while this method is useful in other modules of L<W3C::LogValidator>, this basic module is here to sort URIs extracted from Log Files by popularity, this method is hence rather useless for L<W3C::LogValidator::Basic>. + +=item b->trim_uris + +Given a list of URIs of documents to process, returns a subset of this list containing the URIs of documents the module supposedly can handle. +For this module, the decision is made based on the setting for ExcludedAreas only + + +=item b->process_list + +Formats the list of URIs sorted by popularity. + +Returns a result hash. Keys for this hash are: + + name (string): the name of the module, i.e "Basic" + intro (string): introduction to the processing results + thead (array): headers of the results table + trows (array of arrays): rows of the results table + outro (string): conclusion of the processing results + +=back + +=head1 BUGS + +Public bug-tracking interface at L<http://www.w3.org/Bugs/Public/> + + =head1 AUTHOR Olivier Thereaux <ot@w3.org> for W3C Index: CSSValidator.pm =================================================================== RCS file: /sources/public/perl/modules/W3C/LogValidator/lib/W3C/LogValidator/CSSValidator.pm,v retrieving revision 1.8 retrieving revision 1.9 diff -u -d -r1.8 -r1.9 --- CSSValidator.pm 12 Aug 2004 02:28:51 -0000 1.8 +++ CSSValidator.pm 13 Aug 2004 06:01:12 -0000 1.9 @@ -32,7 +32,7 @@ my $proto = shift; my $class = ref($proto) || $proto; # mandatory vars for the API - $self->{URIS} = undef; + @{$self->{URIs}} = undef; # internal stuff here # don't change this if (@_) {%config = %{(shift)};} @@ -162,6 +162,13 @@ return @trimmed_uris; } +sub uris +{ + my $self = shift; + if (@_) { @{$self->{URIs}} = @_ } + return @{$self->{URIs}}; +} + ######################################### # Actual subroutine to check the list of uris # ######################################### @@ -172,16 +179,26 @@ my $self = shift; my $max_invalid = undef; if (exists $config{MaxInvalid}) {$max_invalid = $config{MaxInvalid}} + else {$max_invalid = 0} my $max_documents = undef; if (exists $config{MaxDocuments}) {$max_documents = $config{MaxDocuments}} else {$max_documents = 0} print "Now Using the CSS Validation module...\n" if $verbose; - use DB_File; - my $tmp_file = $config{tmpfile}; - my %hits; - tie (%hits, 'DB_File', "$tmp_file", O_RDONLY) || - die ("Cannot create or open $tmp_file"); - my @uris = sort { $hits{$b} <=> $hits{$a} } keys %hits; + my @uris = undef; + my %hits; + if (defined ($config{tmpfile})) + { + use DB_File; + my $tmp_file = $config{tmpfile}; + tie (%hits, 'DB_File', "$tmp_file", O_RDONLY) || + die ("Cannot create or open $tmp_file"); + @uris = sort { $hits{$b} <=> $hits{$a} } keys %hits; + } + elsif ($self->uris()) + { + @uris = $self->uris(); + foreach my $uri (@uris) { $hits{$uri} = 0 } + } my $name = ""; if (exists $config{ServerName}) {$name = $config{ServerName}} my @result; @@ -320,11 +337,97 @@ W3C::LogValidator::CSSValidator - Validates CSS style sheets from Web Server logs +=head1 SYNOPSIS + + use W3C::LogValidator::CSSValidator; + my %config = ("verbose" => 2); + my $validator = W3C::LogValidator::CSSValidator->new(\%config); + $validator->uris('http://www.w3.org/StyleSheets/home.css', 'http://yoda.zoy.org/mt-static/styles.css'); + my %result= $validator->process_list; =head1 DESCRIPTION This module is part of the W3C::LogValidator suite, and is used as an interface to the W3C CSS validation service. +=over 2 + +=item $val = W3C::LogValidator::CSSValidator->new + +Constructs a new C<W3C::LogValidator:CSSValidator> processor. + +You might pass it a configuration hash reference (see L<W3C::LogValidator/config_module> and L<W3C::LogValidator::Config>) + + $validator = W3C::LogValidator::CSSValidator->new(\%config); + +=back + +=over 4 + +=item $val->process_list + +Processes a list of sorted URIs through the W3C Markup Validator. + +The list can be set C<uris>. If the $val was given a config has when constructed, and if the has has a "tmpfile" key, C<process_list> will try to read this file as a hash of URIs and "hits" (popularity) with L<DB_File>. + +Returns a result hash. Keys for this hash are: + + + name (string): the name of the module, i.e "CSSValidator" + intro (string): introduction to the processing results + thead (array): headers of the results table + trows (array of arrays): rows of the results table + outro (string): conclusion of the processing results + + +=item $val->trim_uris + +Given a list of URIs of documents to process, returns a subset of this list containing the URIs of documents the module supposedly can handle. +The decision is made based on file extensions (see C<auth_ext>) and content-type (see C<HEAD_check>) + +=item $val->HEAD_check + +Checks whether a document with no extension is actually a CSS document through an HTTP HEAD request +returns 1 if the URI is of an expected content-type, 0 otherwise + +=item $val->auth_ext + +Returns the file extensions (space separated entries in a string) supported by the Module. +Public method accessing $self->{AUTH_EXT}, itself coming from either the AuthorizedExtensions configuration setting, or a default value + +=item $val->valid + +Sets / Returns whether the document being processed has been found to be valid or not. +If an argument is given, sets the variable, otherwise returns the current variable. + +=item $val->valid_err_num + +Sets / Returns the number of validation errors for the document being processed. +If an argument is given, sets the variable, otherwise returns the current variable. + +=item $val->valid_success + +Sets / Returns whether the module was able to process validation of the current document successfully (regardless of valid/invalid result) +If an argument is given, sets the variable, otherwise returns the current variable. + +=item $val->valid_head + +Sets / Returns all HTTP headers returned by the markup validator when attempting to validate the current document. +If an argument is given, sets the variable, otherwise returns the current variable. + +=item $val->new_doc + +Resets all validation variables to 'undef'. In effect, prepares the processing module to the handling of a new document. + +=back + +=head1 BUGS + +Public bug-tracking interface at L<http://www.w3.org/Bugs/Public/> + + + + + =head1 AUTHOR Olivier Thereaux <ot@w3.org>
Received on Friday, 13 August 2004 06:01:15 UTC