markup-validator commit: Run incomplete input URLs through URI::Heuristic.

changeset:   3123:3e0d641baff4
tag:         tip
user:        Ville Skyttä <ville.skytta@iki.fi>
date:        Tue Jul 06 19:31:39 2010 +0300
files:       htdocs/whatsnew.html httpd/cgi-bin/check misc/bundle/Makefile.PL misc/bundle/lib/Bundle/W3C/Validator.pm
description:
Run incomplete input URLs through URI::Heuristic.


diff -r d62be4a5d48d -r 3e0d641baff4 htdocs/whatsnew.html
--- a/htdocs/whatsnew.html	Mon Jul 05 00:08:22 2010 +0300
+++ b/htdocs/whatsnew.html	Tue Jul 06 19:31:39 2010 +0300
@@ -48,6 +48,11 @@
             <a href="http://www.w3.org/Bugs/Public/show_bug.cgi?id=9933">makes
               it clearer</a> whether the error occurred in the validated
             document or an external resource related to it.
+          </li>
+          <li>
+            Enhancement: improved
+            <a href="http://search.cpan.org/dist/URI/URI/Heuristic.pm">heuristics</a>
+            for incomplete input URLs.
           </li>
         </ul>
       </dd>
diff -r d62be4a5d48d -r 3e0d641baff4 httpd/cgi-bin/check
--- a/httpd/cgi-bin/check	Mon Jul 05 00:08:22 2010 +0300
+++ b/httpd/cgi-bin/check	Tue Jul 06 19:31:39 2010 +0300
@@ -62,6 +62,7 @@
 use URI qw();
 use URI::Escape qw(uri_escape);
 use URI::file;
+use URI::Heuristic qw();
 
 ###############################################################################
 #### Constant definitions. ####################################################
@@ -1742,10 +1743,21 @@
     my $q    = shift;    # The CGI object.
     my $File = shift;    # The master datastructure.
 
+    my $ua = W3C::Validator::UserAgent->new($CFG, $File);
+
     my $uri = URI->new(ref $q ? $q->param('uri') : $q)->canonical();
     $uri->fragment(undef);
 
-    my $ua = W3C::Validator::UserAgent->new($CFG, $File);
+    if (!$uri->scheme()) {
+        local $ENV{URL_GUESS_PATTERN} = '';
+        my $guess = URI::Heuristic::uf_uri($uri);
+        if ($guess->scheme() && $ua->is_protocol_supported($guess)) {
+            $uri = $guess;
+        }
+        else {
+            $uri = URI->new("http://$uri");
+        }
+    }
 
     unless ($ua->is_protocol_supported($uri)) {
         $File->{'Error Flagged'} = TRUE;
@@ -2812,10 +2824,6 @@
         $q->param('uri', $q->param('url'));
     }
 
-    # Munge the URL to include commonly omitted prefix.
-    my $u = $q->param('uri');
-    $q->param('uri', "http://$u") if $u && $u =~ m(^www)i;
-
     # Set output mode; needed in get_error_template if we end up there.
     $File->{Opt}->{Output} = $q->param('output') || 'html';
 
diff -r d62be4a5d48d -r 3e0d641baff4 misc/bundle/Makefile.PL
--- a/misc/bundle/Makefile.PL	Mon Jul 05 00:08:22 2010 +0300
+++ b/misc/bundle/Makefile.PL	Tue Jul 06 19:31:39 2010 +0300
@@ -39,6 +39,7 @@
         "URI"                   => 0,
         "URI::Escape"           => 0,
         "URI::file"             => 0,
+        "URI::Heuristic"        => 0,
         "XML::LibXML"           => "1.70",
 
         # Optional:
diff -r d62be4a5d48d -r 3e0d641baff4 misc/bundle/lib/Bundle/W3C/Validator.pm
--- a/misc/bundle/lib/Bundle/W3C/Validator.pm	Mon Jul 05 00:08:22 2010 +0300
+++ b/misc/bundle/lib/Bundle/W3C/Validator.pm	Tue Jul 06 19:31:39 2010 +0300
@@ -49,6 +49,7 @@
  URI
  URI::Escape
  URI::file
+ URI::Heuristic
  XML::LibXML 1.70
 
 =head1 DESCRIPTION

Received on Tuesday, 6 July 2010 16:35:56 UTC