- From: Hugo Haas <hugo@dev.w3.org>
- Date: Wed, 04 Aug 2004 09:41:09 +0000
- To: public-ws-desc-eds@w3.org
Update of /sources/public/2002/ws/desc/tools
In directory hutz:/tmp/cvs-serv14919
Added Files:
.cvsignore http_auth.py http_head.py nschecker
Log Message:
Local version of nschecker
--- NEW FILE: http_head.py ---
""" $Id: http_head.py,v 1.1 2004/08/04 09:41:07 hugo Exp $
Module to make a HTTP Head
"""
from urllib import *
class HEADURLopener(FancyURLopener):
def open_http(self, url):
import httplib
user_passwd = None
if type(url) is type(""):
host, selector = splithost(url)
if host:
user_passwd, host = splituser(host)
host = unquote(host)
realhost = host
else:
host, selector = url
urltype, rest = splittype(selector)
url = rest
user_passwd = None
if string.lower(urltype) != 'http':
realhost = None
else:
realhost, rest = splithost(rest)
if realhost:
user_passwd, realhost = splituser(realhost)
if user_passwd:
selector = "%s://%s%s" % (urltype, realhost, rest)
#print "proxy via http:", host, selector
if not host: raise IOError, ('http error', 'no host given')
if user_passwd:
import base64
auth = string.strip(base64.encodestring(user_passwd))
else:
auth = None
h = httplib.HTTP(host)
h.putrequest('HEAD', selector)
if auth: h.putheader('Authorization', 'Basic %s' % auth)
if realhost: h.putheader('Host', realhost)
for args in self.addheaders: apply(h.putheader, args)
h.endheaders()
errcode, errmsg, headers = h.getreply()
fp = h.getfile()
if errcode == 200:
import urllib
return urllib.addinfourl(fp, headers, "http:" + url)
else:
return self.http_error(url, fp, errcode, errmsg, headers)
--- NEW FILE: .cvsignore ---
*.pyc
--- NEW FILE: nschecker ---
#!/usr/local/bin/python
""" $Id: nschecker,v 1.1 2004/08/04 09:41:07 hugo Exp $
"""
import cgi
import sys
import os
import urlparse
import urllib
import http_head
import http_auth
import re
import popen2
Page1 = """
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en-US">
<head>
<link href="http://www.w3.org/StyleSheets/base" rel="stylesheet"/>
<link href="http://www.w3.org/2001/11/results" rel="stylesheet" />
<title>Namespaces checker service%s</title></head>
<body>
<p><a href="http://www.w3.org/"><img src="http://www.w3.org/Icons/w3c_home" alt="W3C"/></a></p>
<h1>Namespaces checker%s</h1>
<h2>Description</h2>
<p>This tool takes the URI of an (X)HTML document as input and outputs the "visible" URIs in it and make a HTTP HEAD on them to check their validity. The intent is to help the <a href="/Guide/pubrules">pubrules</a> checking. It <strong>does not</strong> check the validity of the anchors.</p>
<p>You may want to use <a href="http://validator.w3.org/checklink">the linkchecker tool</a> to check all your links in your document.</p>
"""
Page2 = """
<form method="GET">
<label>URI(s) of the document(s) you want to check URIs: <input type="text" name="uri" value="%s"/></label>
<input type="submit" value="Get results"/>
</form>
<hr />
<address>
script $Revision: 1.1 $ of $Date: 2004/08/04 09:41:07 $<br />
by <a href="http://www.w3.org/People/Dom/">Dominique Hazael-Massieux</a><br />
</address>
</body>
</html>
"""
class myHEADURLopener(http_head.HEADURLopener):
res =""
def http_error_default(self, url, fp, errcode, errmsg, headers):
return None
def http_error(self, url, fp, errcode, errmsg, headers, data=None):
self.res = self.res + formatHeaders(errcode,errmsg,headers)
return urllib.URLopener.http_error(self, url, fp, errcode, errmsg, headers, data)
def retry_http_basic_auth(self, url, realm, data=None):
return None
def formatHeaders(errcode,errmsg,headers):
classe=""
if errcode==200:
classe=" class='yes'"
elif errcode==301 or errcode==302:
classe=""
elif errcode==401:
classe=" class='tocheck'"
else:
classe=" class='no'"
return( "-> <span" + classe + ">"+ `errcode` + "</span> (<span class='errmsg'>" + errmsg + '</span>) ')
def serveRequest():
fields = cgi.FieldStorage()
if not fields.has_key('uri'):
print "Content-Type: text/html; charset=utf-8"
print
print Page1 % ("","")
print Page2 % ("")
else:
addr = fields['uri'].value
if len(urlparse.urlparse(addr)[0])<2:
print "Status: 403"
print "Content-Type: text/plain"
print
print "sorry, I decline to handle file: addresses"
else:
title = " for %s" % (addr)
link = " for <a href='%s'>%s</a>" % (addr,addr)
print Page1 % (title,link)
import http_auth
url_opener = http_auth.ProxyAuthURLopener()
error = ""
try:
doc = url_opener.open(addr)
except IOError, (errno, strerror):
doc = None
command = "/usr/bin/lynx -nolist -dump -stdin"
(piperfd,pipewfd,pipeErr) = popen2.popen3(command)
if (doc):
pipewfd.write(doc.read())
doc.close()
pipewfd.close()
if (piperfd):
head_request = myHEADURLopener()
line = piperfd.readline()
pattern = "(http://[^><\s\"'\&\)]*)[>|<|\s|\"|'|\&\)]"
uriMatcher = re.compile(pattern)
foundUris = {}
while line:
results = uriMatcher.findall(line)
for uri in results:
if uri[-1]==".":
uri=uri[:-1]
if foundUris.has_key(uri):
foundUris[uri] = foundUris[uri] + 1
else:
foundUris[uri] = 1
line = piperfd.readline()
piperfd.close()
print '<p>URIs found :</p>\n<dl>\n'
uris = foundUris.keys()
uris.sort()
for uri in uris:
plural= ""
if foundUris[uri]>1:
plural = "s"
print "<dt><a href='"+uri+"'>"+uri+"</a> (%s occurence%s)</dt>\n" % (foundUris[uri],plural)
example_org = "(http://([^.]*\.)*example\.(net|org|com)($|/))"
example_org_matcher = re.compile(example_org)
if example_org_matcher.search(uri):
print "<dd>Example URI</dd>"
else:
print "<dd>"
head = head_request.open(uri)
print head_request.res
if head:
print formatHeaders(200,"OK","")
head.close()
head_request.res = ""
if uri[:18]=="http://www.w3.org/":
print """ www.w3.org URI, if a namespace, make sure it is <a href="http://www.w3.org/1999/10/nsuri">compliant with the NS rules</a>"""
else:
print """; this URI is not on www.w3.org, <span class="tocheck">make sure it's not a namespace</span>"""
print "</dd>\n"
print "</dl>\n"
else:
print "<p><span class='no'>An error</span> (%s) occured trying to get <a href='%s'>%s</a>.</p>" % (url_opener.error,addr,addr)
print Page2 % (addr)
if __name__ == '__main__':
if os.environ.has_key('SCRIPT_NAME'):
serveRequest()
--- NEW FILE: http_auth.py ---
import urllib
import os
class ProxyAuthURLopener(urllib.FancyURLopener):
error = ""
def http_error_default(self, url, fp, errcode, errmsg, headers):
self.error = `errcode` + " " + errmsg
return None
def http_error_304(self,uri,fp,errocode,errmsg,headers):
print 'HTTP/1.1 304 Not Modified'
return None
def retry_http_basic_auth(self, url, realm, data=None):
if os.environ.has_key('HTTP_AUTHORIZATION') and os.environ['HTTP_AUTHORIZATION']:
self.addheader('Authorization',os.environ['HTTP_AUTHORIZATION'])
del os.environ['HTTP_AUTHORIZATION']
if data is None:
return self.open('http:' + url)
else:
return self.open('http:' + url,data)
else:
global Page
print 'Status: 401 Authorization Required'
print 'WWW-Authenticate: Basic realm="%s"' % realm
print 'Connection: close'
Page = """<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
<html>
<head>
<title>401 Authorization Required</title>
</head>
<body>
<h1>Authorization Required</h1>
<p>You need %s access to http:%s to use this service.</p>
""" % (realm,url)
return None
Received on Wednesday, 4 August 2004 05:41:29 UTC