link-checker commit: block ports and protocols

changeset:   419:e3679807fdb8
tag:         tip
user:        Ted Guild <ted@w3.org>
date:        Mon Jun 22 12:36:43 2015 -0400
files:       bin/checklink
description:
block ports and protocols


diff -r a0321dce6900 -r e3679807fdb8 bin/checklink
--- a/bin/checklink	Wed May 27 09:12:53 2015 +0400
+++ b/bin/checklink	Mon Jun 22 12:36:43 2015 -0400
@@ -194,6 +194,40 @@
         );
         $resp->header('Client-Warning', 'Internal response');
     }
+
+#    #defaults to 80
+    my $port = undef;
+    $port = $uri->port();
+    #whitelist regex if short enough
+#    if (! $port =~ m/^(80|443|8000|8080)$/ ) {
+    if ( $port =~ '22' ) {
+        $resp = HTTP::Response->new(403,
+            'Checking certain ports disallowed by link checker configuration'
+        );
+        $resp->header('Client-Warning', 'Internal response');
+    }
+
+#    #defaults to 80
+    my $port = undef;
+    $port = $uri->port();
+    #whitelist regex if short enough
+    if ($port !~ m/^(80|443|8000|8080|8081|115|21)$/ ) {
+        $resp = HTTP::Response->new(403,
+            'Checking certain ports disallowed by link checker configuration'
+        );
+        $resp->header('Client-Warning', 'Internal response');
+    }
+
+    my $scheme = undef;
+    $scheme = $uri->scheme();
+    #whitelist regex if short enough
+    if ($scheme !~ m/^(http|https|ftp|urn)$/ ) {
+        $resp = HTTP::Response->new(403,
+            'Checking certain protocols disallowed by link checker configuration'
+        );
+        $resp->header('Client-Warning', 'Internal response');
+    }
+
     return $resp;
 }
 
@@ -354,7 +388,7 @@
     $Cfg{Markup_Validator_URI} ||= 'http://validator.w3.org/check?uri=%s';
     $Cfg{CSS_Validator_URI} ||=
         'http://jigsaw.w3.org/css-validator/validator?uri=%s';
-    $Cfg{Doc_URI} ||= 'http://validator.w3.org/checklink/docs/checklink.html';
+    $Cfg{Doc_URI} ||= 'http://validator.w3.org/docs/checklink.html';
     $Cfg{Doc_Base_URI} ||= '/checklink/docs/';
     $Cfg{Doc_Images_URI} ||= '/checklink/images/';
 
@@ -651,7 +685,7 @@
     } if (MP2() && !$ENV{HTTP_AUTHORIZATION});
 
     $uri =~ s/^\s+//g;
-    if (index($uri, ":") != -1) {
+    if ($uri =~ /:/) {
         $uri = URI->new($uri);
     }
     else {
@@ -816,7 +850,7 @@
     for my $i (0 .. $#{$Opts{Suppress_Broken}}) {
         ${$Opts{Suppress_Broken}}[$i] =~ s/ /:/;
         my $sb_arg = ${$Opts{Suppress_Broken}}[$i];
-        if ($sb_arg !~ /^(?:-1|[0-9]+):./) {
+        if ($sb_arg !~ /^(-1|[0-9]+):./) {
             &usage(1,
                 "Bad suppress-broken argument, should be prefixed by a numeric response code: $sb_arg"
             );
@@ -1582,7 +1616,7 @@
     for my $base (@{$Opts{Base_Locations}}) {
         my $rel = $candidate->rel($base);
         next if ($candidate eq $rel);    # Relative path not possible?
-        next if ($rel =~ m|^(?:\.\.)?/|);    # Relative path upwards?
+        next if ($rel =~ m|^(\.\.)?/|);  # Relative path upwards?
         return 1;
     }
 
@@ -1749,10 +1783,10 @@
     $results{$uri}{location}{code} = $response->code();
     $results{$uri}{location}{code} = RC_ROBOTS_TXT()
         if ($results{$uri}{location}{code} == 403 &&
-        index($response->message(), "Forbidden by robots.txt") != -1);
+        $response->message() =~ /Forbidden by robots\.txt/);
     $results{$uri}{location}{code} = RC_IP_DISALLOWED()
         if ($results{$uri}{location}{code} == 403 &&
-        index($response->message(), "non-public IP") != -1);
+        $response->message() =~ /non-public IP/);
     $results{$uri}{location}{code} = RC_DNS_ERROR()
         if ($results{$uri}{location}{code} == 500 &&
         $response->message() =~ /Bad hostname '[^\']*'/);
@@ -3024,7 +3058,7 @@
     my $tagline = "Check links and anchors in Web pages or full Web sites";
 
     printf(
-        <<'EOF', $Cfg{Doc_Images_URI} . 'no_w3c.png', $tagline);
+        <<'EOF', $Cfg{Doc_Images_URI} . 'w3c.png', $tagline);
 <div id="banner"><h1 id="title"><a href="http://www.w3.org/" title="W3C"><img alt="W3C" id="logo" src="%s" width="110" height="61" /></a>
 <a href="checklink"><span>Link Checker</span></a></h1>
 <p id="tagline">%s</p></div>
@@ -3061,7 +3095,7 @@
         $icon_type = 'error';
     }
     return sprintf('<span class="err_type"><img src="%s" alt="%s" /></span>',
-        $Cfg{Doc_Images_URI} . 'info_icons/'. $icon_type . '.png',
+        $Cfg{Doc_Images_URI} . 'info_icons/' . $icon_type . '.png',
         $icon_type);
 }
 
@@ -3229,9 +3263,7 @@
 </fieldset>
 <p class=\"submit_button\"><input type=\"submit\" name=\"check\" value=\"Check\" /></p>
 </form>
-<div id=\"w3c-include\">
-    <script type=\"text/javascript\" src=\"//www.w3.org/QA/Tools/w3c-include.js\"></script>
-</div>
+<div id=\"w3c-include\"><script type=\"text/javascript\" src=\"//www.w3.org/QA/Tools/w3c-include.js\"></script></div>
 ";
     return;
 }

Received on Monday, 22 June 2015 16:38:22 UTC