- From: Hugo Haas <hugo@dev.w3.org>
- Date: Wed, 04 Aug 2004 09:41:09 +0000
- To: public-ws-desc-eds@w3.org
Update of /sources/public/2002/ws/desc/tools In directory hutz:/tmp/cvs-serv14919 Added Files: .cvsignore http_auth.py http_head.py nschecker Log Message: Local version of nschecker --- NEW FILE: http_head.py --- """ $Id: http_head.py,v 1.1 2004/08/04 09:41:07 hugo Exp $ Module to make a HTTP Head """ from urllib import * class HEADURLopener(FancyURLopener): def open_http(self, url): import httplib user_passwd = None if type(url) is type(""): host, selector = splithost(url) if host: user_passwd, host = splituser(host) host = unquote(host) realhost = host else: host, selector = url urltype, rest = splittype(selector) url = rest user_passwd = None if string.lower(urltype) != 'http': realhost = None else: realhost, rest = splithost(rest) if realhost: user_passwd, realhost = splituser(realhost) if user_passwd: selector = "%s://%s%s" % (urltype, realhost, rest) #print "proxy via http:", host, selector if not host: raise IOError, ('http error', 'no host given') if user_passwd: import base64 auth = string.strip(base64.encodestring(user_passwd)) else: auth = None h = httplib.HTTP(host) h.putrequest('HEAD', selector) if auth: h.putheader('Authorization', 'Basic %s' % auth) if realhost: h.putheader('Host', realhost) for args in self.addheaders: apply(h.putheader, args) h.endheaders() errcode, errmsg, headers = h.getreply() fp = h.getfile() if errcode == 200: import urllib return urllib.addinfourl(fp, headers, "http:" + url) else: return self.http_error(url, fp, errcode, errmsg, headers) --- NEW FILE: .cvsignore --- *.pyc --- NEW FILE: nschecker --- #!/usr/local/bin/python """ $Id: nschecker,v 1.1 2004/08/04 09:41:07 hugo Exp $ """ import cgi import sys import os import urlparse import urllib import http_head import http_auth import re import popen2 Page1 = """ <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en-US"> <head> <link href="http://www.w3.org/StyleSheets/base" rel="stylesheet"/> <link href="http://www.w3.org/2001/11/results" rel="stylesheet" /> <title>Namespaces checker service%s</title></head> <body> <p><a href="http://www.w3.org/"><img src="http://www.w3.org/Icons/w3c_home" alt="W3C"/></a></p> <h1>Namespaces checker%s</h1> <h2>Description</h2> <p>This tool takes the URI of an (X)HTML document as input and outputs the "visible" URIs in it and make a HTTP HEAD on them to check their validity. The intent is to help the <a href="/Guide/pubrules">pubrules</a> checking. It <strong>does not</strong> check the validity of the anchors.</p> <p>You may want to use <a href="http://validator.w3.org/checklink">the linkchecker tool</a> to check all your links in your document.</p> """ Page2 = """ <form method="GET"> <label>URI(s) of the document(s) you want to check URIs: <input type="text" name="uri" value="%s"/></label> <input type="submit" value="Get results"/> </form> <hr /> <address> script $Revision: 1.1 $ of $Date: 2004/08/04 09:41:07 $<br /> by <a href="http://www.w3.org/People/Dom/">Dominique Hazael-Massieux</a><br /> </address> </body> </html> """ class myHEADURLopener(http_head.HEADURLopener): res ="" def http_error_default(self, url, fp, errcode, errmsg, headers): return None def http_error(self, url, fp, errcode, errmsg, headers, data=None): self.res = self.res + formatHeaders(errcode,errmsg,headers) return urllib.URLopener.http_error(self, url, fp, errcode, errmsg, headers, data) def retry_http_basic_auth(self, url, realm, data=None): return None def formatHeaders(errcode,errmsg,headers): classe="" if errcode==200: classe=" class='yes'" elif errcode==301 or errcode==302: classe="" elif errcode==401: classe=" class='tocheck'" else: classe=" class='no'" return( "-> <span" + classe + ">"+ `errcode` + "</span> (<span class='errmsg'>" + errmsg + '</span>) ') def serveRequest(): fields = cgi.FieldStorage() if not fields.has_key('uri'): print "Content-Type: text/html; charset=utf-8" print print Page1 % ("","") print Page2 % ("") else: addr = fields['uri'].value if len(urlparse.urlparse(addr)[0])<2: print "Status: 403" print "Content-Type: text/plain" print print "sorry, I decline to handle file: addresses" else: title = " for %s" % (addr) link = " for <a href='%s'>%s</a>" % (addr,addr) print Page1 % (title,link) import http_auth url_opener = http_auth.ProxyAuthURLopener() error = "" try: doc = url_opener.open(addr) except IOError, (errno, strerror): doc = None command = "/usr/bin/lynx -nolist -dump -stdin" (piperfd,pipewfd,pipeErr) = popen2.popen3(command) if (doc): pipewfd.write(doc.read()) doc.close() pipewfd.close() if (piperfd): head_request = myHEADURLopener() line = piperfd.readline() pattern = "(http://[^><\s\"'\&\)]*)[>|<|\s|\"|'|\&\)]" uriMatcher = re.compile(pattern) foundUris = {} while line: results = uriMatcher.findall(line) for uri in results: if uri[-1]==".": uri=uri[:-1] if foundUris.has_key(uri): foundUris[uri] = foundUris[uri] + 1 else: foundUris[uri] = 1 line = piperfd.readline() piperfd.close() print '<p>URIs found :</p>\n<dl>\n' uris = foundUris.keys() uris.sort() for uri in uris: plural= "" if foundUris[uri]>1: plural = "s" print "<dt><a href='"+uri+"'>"+uri+"</a> (%s occurence%s)</dt>\n" % (foundUris[uri],plural) example_org = "(http://([^.]*\.)*example\.(net|org|com)($|/))" example_org_matcher = re.compile(example_org) if example_org_matcher.search(uri): print "<dd>Example URI</dd>" else: print "<dd>" head = head_request.open(uri) print head_request.res if head: print formatHeaders(200,"OK","") head.close() head_request.res = "" if uri[:18]=="http://www.w3.org/": print """ www.w3.org URI, if a namespace, make sure it is <a href="http://www.w3.org/1999/10/nsuri">compliant with the NS rules</a>""" else: print """; this URI is not on www.w3.org, <span class="tocheck">make sure it's not a namespace</span>""" print "</dd>\n" print "</dl>\n" else: print "<p><span class='no'>An error</span> (%s) occured trying to get <a href='%s'>%s</a>.</p>" % (url_opener.error,addr,addr) print Page2 % (addr) if __name__ == '__main__': if os.environ.has_key('SCRIPT_NAME'): serveRequest() --- NEW FILE: http_auth.py --- import urllib import os class ProxyAuthURLopener(urllib.FancyURLopener): error = "" def http_error_default(self, url, fp, errcode, errmsg, headers): self.error = `errcode` + " " + errmsg return None def http_error_304(self,uri,fp,errocode,errmsg,headers): print 'HTTP/1.1 304 Not Modified' return None def retry_http_basic_auth(self, url, realm, data=None): if os.environ.has_key('HTTP_AUTHORIZATION') and os.environ['HTTP_AUTHORIZATION']: self.addheader('Authorization',os.environ['HTTP_AUTHORIZATION']) del os.environ['HTTP_AUTHORIZATION'] if data is None: return self.open('http:' + url) else: return self.open('http:' + url,data) else: global Page print 'Status: 401 Authorization Required' print 'WWW-Authenticate: Basic realm="%s"' % realm print 'Connection: close' Page = """<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> <html> <head> <title>401 Authorization Required</title> </head> <body> <h1>Authorization Required</h1> <p>You need %s access to http:%s to use this service.</p> """ % (realm,url) return None
Received on Wednesday, 4 August 2004 05:41:29 UTC