- From: Olivier Thereaux <ot@dev.w3.org>
- Date: Thu, 12 Aug 2004 09:12:08 +0000
- To: www-validator-cvs@w3.org
Update of /sources/public/perl/modules/W3C/LogValidator/lib/W3C/LogValidator In directory hutz:/tmp/cvs-serv1847 Modified Files: HTMLValidator.pm Log Message: finishing documenting API + re-adding uris method Index: HTMLValidator.pm =================================================================== RCS file: /sources/public/perl/modules/W3C/LogValidator/lib/W3C/LogValidator/HTMLValidator.pm,v retrieving revision 1.13 retrieving revision 1.14 diff -u -d -r1.13 -r1.14 --- HTMLValidator.pm 12 Aug 2004 02:19:43 -0000 1.13 +++ HTMLValidator.pm 12 Aug 2004 09:12:06 -0000 1.14 @@ -190,22 +190,29 @@ sub process_list { + my $self = shift; print "Now using the HTML Validator module... " if $verbose; print "\n" if ($verbose > 1); - - # Opening the file with the hits and URIs data - use DB_File; - my $tmp_file = $config{tmpfile}; + my @uris = undef; my %hits; - tie (%hits, 'DB_File', "$tmp_file", O_RDONLY) || - die ("Cannot create or open $tmp_file"); - my @uris = sort { $hits{$b} <=> $hits{$a} } keys %hits; - + # Opening the file with the hits and URIs data + if (defined ($config{tmpfile})) + { + use DB_File; + my $tmp_file = $config{tmpfile}; + tie (%hits, 'DB_File', "$tmp_file", O_RDONLY) || + die ("Cannot create or open $tmp_file"); + @uris = sort { $hits{$b} <=> $hits{$a} } keys %hits; + } + elsif ($self->uris()) + { + @uris = $self->uris(); + foreach my $uri (@uris) { $hits{$uri} = 0 } + } print "\n (This may take a long time if you have many files to validate)\n" if ($verbose eq 1); print "\n" if ($verbose > 2); # trying to breathe in the debug volume... use LWP::UserAgent; use URI::Escape; - my $self = shift; my $max_invalid = undef; if (exists $config{MaxInvalid}) {$max_invalid = $config{MaxInvalid}} else {$max_invalid = 0} @@ -345,9 +352,10 @@ =head1 SYNOPSIS use W3C::LogValidator::HTMLValidator; - @@ todo @@ - my $validator = W3C::LogValidator::HTMLValidator->new(\%config);; - my $result_string= $validator->process_list; + my %config = ("verbose" => 2); + my $validator = W3C::LogValidator::HTMLValidator->new(\%config); + $validator->uris('http://www.w3.org/Overview.html', 'http://www.yahoo.com/index.html'); + my %results = $validator->process_list; =head1 DESCRIPTION @@ -359,12 +367,31 @@ =item $val = W3C::LogValidator::HTMLValidator->new +Constructs a new C<W3C::LogValidator:HTMLValidator> processor. + +You might pass it a configuration hash reference (see L<W3C::LogValidator/config_module> and L<W3C::LogValidator::Config>) + + $validator = W3C::LogValidator::HTMLValidator->new(\%config); + =back =over 4 =item $val->process_list +Processes a list of sorted URIs through the W3C Markup Validator. + +The list can be set C<uris>. If the $val was given a config has when constructed, and if the has has a "tmpfile" key, C<process_list> will try to read this file as a hash of URIs and "hits" (popularity) with L<DB_File>. + +Returns a result hash. Keys for this hash are: + + + name (string): the name of the module, i.e "HTMLValidator" + intro (string): introduction to the processing results + thead (array): headers of the results table + trows (array of arrays): rows of the results table + outro (string): conclusion of the processing results + =item $val->trim_uris @@ -372,6 +399,7 @@ The decision is made based on file extensions (see C<auth_ext>) and content-type (see C<HEAD_check>) =item $val->HEAD_check + Checks whether a document with no extension is actually an HTML/XML document through an HTTP HEAD request returns 1 if the URI is of an expected content-type, 0 otherwise
Received on Thursday, 12 August 2004 09:12:09 UTC