- From: Yves Lafon via cvs-syncmail <cvsmail@w3.org>
- Date: Sun, 30 Oct 2011 21:02:53 +0000
- To: www-validator-cvs@w3.org
Update of /sources/public/2002/css-validator/org/w3c/css/css In directory hutz:/tmp/cvs-serv12204 Added Files: HTMLParserStyleSheetHandler.java Log Message: Added support for the HTML5 parser http://about.validator.nu/htmlparser/ --- NEW FILE: HTMLParserStyleSheetHandler.java --- /* * Copyright (c) 2001 World Wide Web Consortium, * (Massachusetts Institute of Technology, Institut National de * Recherche en Informatique et en Automatique, Keio University). All * Rights Reserved. This program is distributed under the W3C's Software * Intellectual Property License. This program is distributed in the * hope that it will be useful, but WITHOUT ANY WARRANTY; without even * the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR * PURPOSE. * See W3C License http://www.w3.org/Consortium/Legal/ for more details. * * $Id: HTMLParserStyleSheetHandler.java,v 1.1 2011/10/30 21:02:51 ylafon Exp $ */ package org.w3c.css.css; import org.w3c.css.parser.CssError; import org.w3c.css.parser.Errors; import org.w3c.css.parser.analyzer.TokenMgrError; import org.w3c.css.util.ApplContext; import org.w3c.css.util.CssVersion; import org.w3c.css.util.HTTPURL; import org.w3c.css.util.InvalidParamException; import org.w3c.css.util.Util; import org.w3c.css.util.Warning; import org.w3c.css.util.Warnings; import org.w3c.css.util.xml.XMLCatalog; import org.w3c.www.mime.MimeType; import org.w3c.www.mime.MimeTypeFormatException; import org.xml.sax.Attributes; import org.xml.sax.ContentHandler; import org.xml.sax.EntityResolver; import org.xml.sax.ErrorHandler; import org.xml.sax.InputSource; import org.xml.sax.Locator; import org.xml.sax.SAXException; import org.xml.sax.SAXParseException; import org.xml.sax.ext.LexicalHandler; import java.io.ByteArrayInputStream; import java.io.IOException; import java.io.InputStream; import java.io.StringBufferInputStream; import java.net.MalformedURLException; import java.net.URL; import java.net.URLConnection; import java.util.HashMap; import static nu.validator.htmlparser.common.XmlViolationPolicy.ALLOW; /** * @author Philippe Le Hegaret * @version $Revision: 1.1 $ */ public class HTMLParserStyleSheetHandler implements ContentHandler, LexicalHandler, ErrorHandler, EntityResolver { static String XHTML_NS = "http://www.w3.org/1999/xhtml"; private static long autoIdCount; String namespaceURI; boolean isRoot = true; ApplContext ac; URL documentURI = null; URL baseURI = null; boolean isHTML5; // StyleSheet styleSheet = new StyleSheet(); StyleSheetParser styleSheetParser = new StyleSheetParser(); boolean inStyle = false; String media = null; String type = null; String title = null; String charset = null; StringBuilder text = new StringBuilder(); Locator locator; static XMLCatalog catalog = new XMLCatalog(); /** * Creates a new HTMLParserStyleSheetHandler */ public HTMLParserStyleSheetHandler(URL baseURI, ApplContext ac) { this.documentURI = baseURI; this.baseURI = baseURI; this.ac = ac; } public void setDocumentLocator(Locator locator) { this.locator = locator; } public void startDocument() throws SAXException { } public void endDocument() throws SAXException { ac.setInput("text/xml"); } public void startPrefixMapping(String prefix, String uri) throws SAXException { } public void endPrefixMapping(String prefix) throws SAXException { } public void characters(char ch[], int start, int length) throws SAXException { if (inStyle) { text.append(ch, start, length); } } public void comment(char ch[], int start, int length) throws SAXException { if (inStyle) { int line = (locator != null ? locator.getLineNumber() : -1); Warning w = new Warning(baseURI.toString(), line, "style-inside-comment", 0, ac); Warnings warnings = new Warnings(ac.getWarningLevel()); warnings.addWarning(w); styleSheetParser.notifyWarnings(warnings); //text.append(ch, start, length); // ignoring, per http://www.w3.org/Bugs/Public/show_bug.cgi?id=761 } } public void ignorableWhitespace(char ch[], int start, int length) throws SAXException { } public void processingInstruction(String target, String data) throws SAXException { HashMap<String, String> atts = getValues(data); if ("xml-stylesheet".equals(target)) { String rel = atts.get("alternate"); String type = atts.get("type"); String href = atts.get("href"); if (Util.onDebug) { System.err.println("<?xml-stylesheet alternate=\"" + rel + "\" type=\"" + type + "\"" + " href=\"" + href + "\"?>"); } if ("yes".equalsIgnoreCase(rel)) { rel = "alternate stylesheet"; } else { rel = "stylesheet"; } if (href == null) { int line = -1; if (locator != null) { line = locator.getLineNumber(); } CssError er = new CssError(baseURI.toString(), line, new InvalidParamException("unrecognized.link", ac)); Errors ers = new Errors(); ers.addError(er); styleSheetParser.notifyErrors(ers); } if (href.charAt(0) == '#') { // internal style sheet, will be processed by the parser return; } if (type != null) { MimeType mt = null; try { new MimeType(type); } catch (Exception ex) { /* at worst, null */ } ; if (mt != null && (MimeType.TEXT_CSS.match(mt) == MimeType.MATCH_SPECIFIC_SUBTYPE)) { // we're dealing with a stylesheet... URL url; try { if (baseURI != null) { url = new URL(baseURI, href); } else { url = new URL(href); } } catch (MalformedURLException e) { return; // Ignore errors } if (Util.onDebug) { System.err.println("[HTMLParserStyleSheetHandler::" + "initialize(): " + "should parse CSS url: " + url.toString() + "]"); } String media = atts.get("media"); if (media == null && ac.getCssVersion() != CssVersion.CSS1) { media = "all"; } styleSheetParser.parseURL(ac, url, atts.get("title"), rel, media, StyleSheetOrigin.AUTHOR); if (Util.onDebug) { System.err.println("[parsed!]"); } } } } } public void skippedEntity(String name) throws SAXException { } public void startElement(String namespaceURI, String localName, String qName, Attributes atts) throws SAXException { if (isRoot) { this.namespaceURI = namespaceURI; isRoot = false; } if (XHTML_NS.equals(namespaceURI)) { if ("base".equals(localName)) { String href = atts.getValue("href"); if (Util.onDebug) { System.err.println("BASE href=\"" + href + "\""); } if (href != null) { //URL url; try { baseURI = new URL(documentURI, href); documentURI = baseURI; } catch (MalformedURLException e) { return; // Ignore errors } } } else if ("link".equals(localName)) { String rel = atts.getValue("rel"); String type = atts.getValue("type"); String href = atts.getValue("href"); if (Util.onDebug) { System.err.println("HTMLParser: link rel=\"" + rel + "\" type=\"" + type + "\"" + " href=\"" + href + "\""); } if ((rel != null) && rel.toLowerCase().indexOf("stylesheet") != -1) { // we're dealing with a stylesheet... // @@TODO alternate stylesheet URL url; // first we check if there is an href if (href == null) { int line = -1; if (locator != null) { line = locator.getLineNumber(); } CssError er = new CssError(baseURI.toString(), line, new InvalidParamException("unrecognized.link", ac)); Errors ers = new Errors(); ers.addError(er); styleSheetParser.notifyErrors(ers); return; } // If so, check the type if (type == null) { if (!isHTML5) { int line = (locator != null ? locator.getLineNumber() : -1); Warning w = new Warning(baseURI.toString(), line, "link-type", 0, ac); Warnings warnings = new Warnings(ac.getWarningLevel()); warnings.addWarning(w); styleSheetParser.notifyWarnings(warnings); } } else { MimeType mt = null; try { mt = new MimeType(type); } catch (MimeTypeFormatException mtfe) { return; } if (MimeType.TEXT_CSS.match(mt) != MimeType.MATCH_SPECIFIC_SUBTYPE) { return; } } // then prepare for parsing try { if (baseURI != null) { url = new URL(baseURI, href); } else { url = new URL(href); } } catch (MalformedURLException e) { return; // Ignore errors } if (Util.onDebug) { System.err.println("[HTMLParserStyleSheetHandler::initialize(): " + "should parse CSS url: " + url.toString() + "]"); } String media = atts.getValue("media"); if (media == null && ac.getCssVersion() != CssVersion.CSS1) { media = "all"; } styleSheetParser.parseURL(ac, url, atts.getValue("title"), rel, media, StyleSheetOrigin.AUTHOR); if (Util.onDebug) { System.err.println("[parsed!]"); } } } else if ("style".equals(localName)) { media = atts.getValue("media"); type = atts.getValue("type"); title = atts.getValue("title"); if (media == null && ac.getCssVersion() != CssVersion.CSS1) { media = "all"; } if (Util.onDebug) { System.err.println("style media=\"" + media + "\" type=\"" + type + "\"" + " title=\"" + title + "\""); } if (type == null) { // By default we consider that it is CSS for HTML content // and raise a warning about the missing type attribute. // (per HTML5 spec) if (!isHTML5) { int line = (locator != null ? locator.getLineNumber() : -1); Warning w = new Warning(baseURI.toString(), line, "style-type", 0, ac); Warnings warnings = new Warnings(ac.getWarningLevel()); warnings.addWarning(w); styleSheetParser.notifyWarnings(warnings); } text.setLength(0); inStyle = true; } else { try { MimeType mt = new MimeType(type); if (MimeType.TEXT_CSS.match(mt) == MimeType.MATCH_SPECIFIC_SUBTYPE) { text.setLength(0); inStyle = true; } } catch (MimeTypeFormatException ex) { // do nothing } } } else if (atts.getValue("style") != null) { String value = atts.getValue("style"); if (value != null) { // here we have a style attribute String id = atts.getValue("id"); handleStyleAttribute(value, id); } } } else { // the style attribute, recommended by UI Tech TF String value = atts.getValue(XHTML_NS, "style"); if (value != null) { // here we have a style attribute String id = atts.getValue(XHTML_NS, "id"); handleStyleAttribute(value, id); } } } public void endElement(String namespaceURI, String localName, String qName) throws SAXException { int line = 0; if (locator != null) { line = locator.getLineNumber(); } if (XHTML_NS.equals(namespaceURI)) { if ("style".equals(localName)) { if (inStyle) { inStyle = false; if (text.length() != 0) { if (Util.onDebug) { System.err.println("PARSE [" + text.toString() + "]"); } styleSheetParser .parseStyleElement(ac, new StringBufferInputStream(text.toString()), title, media, documentURI, line); } } } } } public void handleStyleAttribute(String value, String id) { if (id == null) { // but we have no id: create one. // a normal id should NOT contain a "#" character. StringBuilder sb = new StringBuilder("#autoXML"); sb.append(autoIdCount); // FIXME why two times? sb.append(autoIdCount++); id = sb.toString(); } int line = 0; if (locator != null) { line = locator.getLineNumber(); } // parse the style attribute. try { styleSheetParser .parseStyleAttribute(ac, new ByteArrayInputStream(value.getBytes()), id, documentURI, line); } catch (TokenMgrError ex) { CssError err = new CssError(baseURI.toString(), line, ex); Errors errs = new Errors(); errs.addError(err); styleSheetParser.notifyErrors(errs); } } public StyleSheet getStyleSheet() { return styleSheetParser.getStyleSheet(); } public void startDTD(String name, String publicId, String systemId) throws SAXException { // check that we received <!DOCTYPE html> isHTML5 = ("html".equalsIgnoreCase(name)); } public void endDTD() throws SAXException { } public void startEntity(String name) throws SAXException { } public void endEntity(String name) throws SAXException { } public void startCDATA() throws SAXException { } public void endCDATA() throws SAXException { } public void error(SAXParseException exception) throws SAXException { } public void fatalError(SAXParseException exception) throws SAXException { throw exception; } public void warning(SAXParseException exception) throws SAXException { } public InputSource resolveEntity(String publicId, String systemId) throws SAXException, IOException { String uri = null; if (publicId != null) { if ("-//W3C//DTD XHTML 1.0 Transitional//EN".equals(publicId)) { if (!"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd".equals(systemId)) { if (ac != null && ac.getFrame() != null) { ac.getFrame().addWarning("xhtml.system_identifier.invalid"); } } } else if ("-//W3C//DTD XHTML 1.0 Strict//EN".equals(publicId)) { if (!"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd".equals(systemId)) { if (ac != null && ac.getFrame() != null) { ac.getFrame().addWarning("xhtml.system_identifier.invalid"); } } } else if ("-//W3C//DTD XHTML 1.0 Frameset//EN".equals(publicId)) { if (!"http://www.w3.org/TR/xhtml1/DTD/xhtml1-frameset.dtd".equals(systemId)) { if (ac != null && ac.getFrame() != null) { ac.getFrame().addWarning("xhtml.system_identifier.invalid"); } } } uri = catalog.getProperty(publicId); } if (uri == null && systemId != null) { uri = catalog.getProperty(systemId); } if (uri != null) { return new InputSource(uri); } else { return new InputSource(new URL(baseURI, systemId).toString()); } } public void parse(InputStream in, String fileName) throws IOException, SAXException { InputSource source = new InputSource(in); org.xml.sax.XMLReader xmlParser = new nu.validator.htmlparser.sax.HtmlParser(ALLOW); try { xmlParser.setProperty("http://xml.org/sax/properties/lexical-handler", this); // xmlParser.setFeature("http://xml.org/sax/features/namespace-prefixes", true); xmlParser.setFeature("http://xml.org/sax/features/validation", false); } catch (Exception ex) { ex.printStackTrace(); } xmlParser.setContentHandler(this); baseURI = new URL(fileName); documentURI = new URL(fileName); source.setSystemId(fileName); try { xmlParser.parse(source); } finally { in.close(); } } void parse(URL url) throws Exception { InputSource source = new InputSource(); URLConnection connection; InputStream in; org.xml.sax.XMLReader xmlParser = new nu.validator.htmlparser.sax.HtmlParser(ALLOW); try { xmlParser.setProperty("http://xml.org/sax/properties/lexical-handler", this); // xmlParser.setFeature("http://xml.org/sax/features/namespace-prefixes", true); xmlParser.setFeature("http://xml.org/sax/features/validation", false); /* xmlParser.setFeature("http://xml.org/sax/features/external-parameter-entities", false); xmlParser.setFeature("http://xml.org/sax/features/external-general-entities", false); */ } catch (Exception ex) { ex.printStackTrace(); } xmlParser.setContentHandler(this); connection = HTTPURL.getConnection(url, ac); in = HTTPURL.getInputStream(ac, connection); String streamEncoding = HTTPURL.getCharacterEncoding(ac, connection); String httpCL = connection.getHeaderField("Content-Location"); if (httpCL != null) { baseURI = HTTPURL.getURL(baseURI, httpCL); documentURI = baseURI; if (streamEncoding != null) { ac.setCharsetForURL(baseURI, streamEncoding); } } if (streamEncoding != null) { source.setEncoding(streamEncoding); } //else { // String ctype = connection.getContentType(); // if (ctype != null) { // try { // MimeType repmime = new MimeType(ctype); // if (repmime.hasParameter("charset")) // source.setEncoding(repmime.getParameterValue("charset")); // } catch (Exception ex) {} // } //} source.setByteStream(in); try { xmlParser.parse(url.toString()); } finally { in.close(); } } void parse(String urlString, URLConnection connection) throws Exception { org.xml.sax.XMLReader xmlParser = new nu.validator.htmlparser.sax.HtmlParser(ALLOW); try { xmlParser.setProperty("http://xml.org/sax/properties/lexical-handler", this); // xmlParser.setFeature("http://xml.org/sax/features/namespace-prefixes", true); xmlParser.setFeature("http://xml.org/sax/features/validation", false); xmlParser.setErrorHandler(this); xmlParser.setEntityResolver(this); } catch (Exception ex) { ex.printStackTrace(); } xmlParser.setContentHandler(this); InputStream cis = HTTPURL.getInputStream(ac, connection); InputSource source = new InputSource(cis); String streamEncoding = HTTPURL.getCharacterEncoding(ac, connection); String httpCL = connection.getHeaderField("Content-Location"); if (httpCL != null) { baseURI = HTTPURL.getURL(baseURI, httpCL); documentURI = baseURI; if (streamEncoding != null) { ac.setCharsetForURL(baseURI, streamEncoding); } } if (streamEncoding != null) { source.setEncoding(streamEncoding); } //else { // String ctype = connection.getContentType(); // if (ctype != null) { // try { // MimeType repmime = new MimeType(ctype); // if (repmime.hasParameter("charset")) { // source.setEncoding(repmime.getParameterValue("charset")); // } else { // // if text/html and no given charset, let's assume // // iso-8859-1. Ideally, the parser would change the // // encoding if it find a mismatch, not sure, but well... // if (repmime.match(MimeType.TEXT_HTML) == // MimeType.MATCH_SPECIFIC_SUBTYPE) { // source.setEncoding("iso-8859-1"); // } // } // } catch (Exception ex) {} // } //} source.setSystemId(urlString); try { xmlParser.parse(source); } finally { cis.close(); } } HashMap<String, String> getValues(String data) { int length = data.length(); int current = 0; char c; StringBuilder name = new StringBuilder(10); StringBuilder value = new StringBuilder(128); StringBuilder entity_name = new StringBuilder(16); int state = 0; HashMap<String, String> table = new HashMap<String, String>(); while (current < length) { c = data.charAt(current); switch (state) { case 0: switch (c) { case ' ': case '\t': case '\n': // \r are normalized per XML spec // nothing break; case '"': case '\'': return table; case 'h': case 't': case 'm': case 'c': case 'a': case 'r': name.setLength(0); // reset the name value.setLength(0); // reset the value name.append(c); // start to build the name state = 1; break; default: // anything else is invalid return table; } break; case 1: // in the "attribute" name inside the PI if ((c >= 'a') && (c <= 'z')) { name.append(c); } else if ((c == ' ') || (c == '\t') || (c == '\n')) { state = 2; } else if (c == '=') { state = 3; } else { // anything else is invalid state = 0; } break; case 2: // waiting for = switch (c) { case ' ': case '\t': case '\n': // nothing break; case '=': state = 3; break; default: // anything else is invalid return table; } break; case 3: // waiting for ' or " switch (c) { case ' ': case '\t': case '\n': // nothing break; case '"': state = 4; break; case '\'': state = 5; break; default: // anything else is invalid return table; } break; case 4: case 5: // in the "attribute" value inside the PI switch (c) { case '&': // predefined entities amp, lt, gt, quot, apos entity_name.setLength(0); state += 10; break; case '<': return table; case '"': if (state == 4) { state = 6; } else { value.append(c); } break; case '\'': if (state == 5) { state = 6; } else { value.append(c); } break; default: value.append(c); } break; case 6: // waiting a white space table.put(name.toString(), value.toString()); name.setLength(0); // reset the name value.setLength(0); // reset the value switch (c) { case ' ': case '\n': case '\t': state = 0; break; default: return table; } break; case 14: case 15: // in the entity switch (c) { case 'a': case 'm': case 'p': case 'l': case 't': case 'g': case 'q': case 'u': case 'o': case 's': entity_name.append(c); break; case ';': String entity = entity_name.toString(); if ("amp".equals(entity)) { value.append('&'); } else if ("lt".equals(entity)) { value.append('<'); } else if ("gt".equals(entity)) { value.append('>'); } else if ("quote".equals(entity)) { value.append('"'); } else if ("apos".equals(entity)) { value.append('\''); } else { return table; } state -= 10; break; default: return table; } } current++; } if (name.length() != 0 && value.length() != 0) { table.put(name.toString(), value.toString()); } return table; } }
Received on Sunday, 30 October 2011 21:02:57 UTC