- From: Yves Lafon via cvs-syncmail <cvsmail@w3.org>
- Date: Sun, 30 Oct 2011 21:02:53 +0000
- To: www-validator-cvs@w3.org
Update of /sources/public/2002/css-validator/org/w3c/css/css
In directory hutz:/tmp/cvs-serv12204
Added Files:
HTMLParserStyleSheetHandler.java
Log Message:
Added support for the HTML5 parser http://about.validator.nu/htmlparser/
--- NEW FILE: HTMLParserStyleSheetHandler.java ---
/*
* Copyright (c) 2001 World Wide Web Consortium,
* (Massachusetts Institute of Technology, Institut National de
* Recherche en Informatique et en Automatique, Keio University). All
* Rights Reserved. This program is distributed under the W3C's Software
* Intellectual Property License. This program is distributed in the
* hope that it will be useful, but WITHOUT ANY WARRANTY; without even
* the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
* PURPOSE.
* See W3C License http://www.w3.org/Consortium/Legal/ for more details.
*
* $Id: HTMLParserStyleSheetHandler.java,v 1.1 2011/10/30 21:02:51 ylafon Exp $
*/
package org.w3c.css.css;
import org.w3c.css.parser.CssError;
import org.w3c.css.parser.Errors;
import org.w3c.css.parser.analyzer.TokenMgrError;
import org.w3c.css.util.ApplContext;
import org.w3c.css.util.CssVersion;
import org.w3c.css.util.HTTPURL;
import org.w3c.css.util.InvalidParamException;
import org.w3c.css.util.Util;
import org.w3c.css.util.Warning;
import org.w3c.css.util.Warnings;
import org.w3c.css.util.xml.XMLCatalog;
import org.w3c.www.mime.MimeType;
import org.w3c.www.mime.MimeTypeFormatException;
import org.xml.sax.Attributes;
import org.xml.sax.ContentHandler;
import org.xml.sax.EntityResolver;
import org.xml.sax.ErrorHandler;
import org.xml.sax.InputSource;
import org.xml.sax.Locator;
import org.xml.sax.SAXException;
import org.xml.sax.SAXParseException;
import org.xml.sax.ext.LexicalHandler;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.StringBufferInputStream;
import java.net.MalformedURLException;
import java.net.URL;
import java.net.URLConnection;
import java.util.HashMap;
import static nu.validator.htmlparser.common.XmlViolationPolicy.ALLOW;
/**
* @author Philippe Le Hegaret
* @version $Revision: 1.1 $
*/
public class HTMLParserStyleSheetHandler implements ContentHandler, LexicalHandler,
ErrorHandler, EntityResolver {
static String XHTML_NS = "http://www.w3.org/1999/xhtml";
private static long autoIdCount;
String namespaceURI;
boolean isRoot = true;
ApplContext ac;
URL documentURI = null;
URL baseURI = null;
boolean isHTML5;
// StyleSheet styleSheet = new StyleSheet();
StyleSheetParser styleSheetParser = new StyleSheetParser();
boolean inStyle = false;
String media = null;
String type = null;
String title = null;
String charset = null;
StringBuilder text = new StringBuilder();
Locator locator;
static XMLCatalog catalog = new XMLCatalog();
/**
* Creates a new HTMLParserStyleSheetHandler
*/
public HTMLParserStyleSheetHandler(URL baseURI, ApplContext ac) {
this.documentURI = baseURI;
this.baseURI = baseURI;
this.ac = ac;
}
public void setDocumentLocator(Locator locator) {
this.locator = locator;
}
public void startDocument()
throws SAXException {
}
public void endDocument()
throws SAXException {
ac.setInput("text/xml");
}
public void startPrefixMapping(String prefix, String uri)
throws SAXException {
}
public void endPrefixMapping(String prefix)
throws SAXException {
}
public void characters(char ch[], int start, int length)
throws SAXException {
if (inStyle) {
text.append(ch, start, length);
}
}
public void comment(char ch[], int start, int length)
throws SAXException {
if (inStyle) {
int line = (locator != null ? locator.getLineNumber() : -1);
Warning w = new Warning(baseURI.toString(), line,
"style-inside-comment", 0, ac);
Warnings warnings = new Warnings(ac.getWarningLevel());
warnings.addWarning(w);
styleSheetParser.notifyWarnings(warnings);
//text.append(ch, start, length); // ignoring, per http://www.w3.org/Bugs/Public/show_bug.cgi?id=761
}
}
public void ignorableWhitespace(char ch[], int start, int length)
throws SAXException {
}
public void processingInstruction(String target, String data)
throws SAXException {
HashMap<String, String> atts = getValues(data);
if ("xml-stylesheet".equals(target)) {
String rel = atts.get("alternate");
String type = atts.get("type");
String href = atts.get("href");
if (Util.onDebug) {
System.err.println("<?xml-stylesheet alternate=\"" + rel
+ "\" type=\"" + type
+ "\"" + " href=\"" + href + "\"?>");
}
if ("yes".equalsIgnoreCase(rel)) {
rel = "alternate stylesheet";
} else {
rel = "stylesheet";
}
if (href == null) {
int line = -1;
if (locator != null) {
line = locator.getLineNumber();
}
CssError er =
new CssError(baseURI.toString(), line,
new InvalidParamException("unrecognized.link",
ac));
Errors ers = new Errors();
ers.addError(er);
styleSheetParser.notifyErrors(ers);
}
if (href.charAt(0) == '#') {
// internal style sheet, will be processed by the parser
return;
}
if (type != null) {
MimeType mt = null;
try {
new MimeType(type);
} catch (Exception ex) { /* at worst, null */ }
;
if (mt != null && (MimeType.TEXT_CSS.match(mt) ==
MimeType.MATCH_SPECIFIC_SUBTYPE)) {
// we're dealing with a stylesheet...
URL url;
try {
if (baseURI != null) {
url = new URL(baseURI, href);
} else {
url = new URL(href);
}
} catch (MalformedURLException e) {
return; // Ignore errors
}
if (Util.onDebug) {
System.err.println("[HTMLParserStyleSheetHandler::" +
"initialize(): "
+ "should parse CSS url: "
+ url.toString() + "]");
}
String media = atts.get("media");
if (media == null && ac.getCssVersion() != CssVersion.CSS1) {
media = "all";
}
styleSheetParser.parseURL(ac,
url,
atts.get("title"),
rel,
media,
StyleSheetOrigin.AUTHOR);
if (Util.onDebug) {
System.err.println("[parsed!]");
}
}
}
}
}
public void skippedEntity(String name)
throws SAXException {
}
public void startElement(String namespaceURI,
String localName,
String qName,
Attributes atts) throws SAXException {
if (isRoot) {
this.namespaceURI = namespaceURI;
isRoot = false;
}
if (XHTML_NS.equals(namespaceURI)) {
if ("base".equals(localName)) {
String href = atts.getValue("href");
if (Util.onDebug) {
System.err.println("BASE href=\"" + href + "\"");
}
if (href != null) {
//URL url;
try {
baseURI = new URL(documentURI, href);
documentURI = baseURI;
} catch (MalformedURLException e) {
return; // Ignore errors
}
}
} else if ("link".equals(localName)) {
String rel = atts.getValue("rel");
String type = atts.getValue("type");
String href = atts.getValue("href");
if (Util.onDebug) {
System.err.println("HTMLParser: link rel=\"" + rel
+ "\" type=\"" + type
+ "\"" + " href=\"" + href + "\"");
}
if ((rel != null) &&
rel.toLowerCase().indexOf("stylesheet") != -1) {
// we're dealing with a stylesheet...
// @@TODO alternate stylesheet
URL url;
// first we check if there is an href
if (href == null) {
int line = -1;
if (locator != null) {
line = locator.getLineNumber();
}
CssError er =
new CssError(baseURI.toString(), line,
new InvalidParamException("unrecognized.link", ac));
Errors ers = new Errors();
ers.addError(er);
styleSheetParser.notifyErrors(ers);
return;
}
// If so, check the type
if (type == null) {
if (!isHTML5) {
int line = (locator != null ? locator.getLineNumber() : -1);
Warning w = new Warning(baseURI.toString(), line,
"link-type", 0, ac);
Warnings warnings = new Warnings(ac.getWarningLevel());
warnings.addWarning(w);
styleSheetParser.notifyWarnings(warnings);
}
} else {
MimeType mt = null;
try {
mt = new MimeType(type);
} catch (MimeTypeFormatException mtfe) {
return;
}
if (MimeType.TEXT_CSS.match(mt) !=
MimeType.MATCH_SPECIFIC_SUBTYPE) {
return;
}
}
// then prepare for parsing
try {
if (baseURI != null) {
url = new URL(baseURI, href);
} else {
url = new URL(href);
}
} catch (MalformedURLException e) {
return; // Ignore errors
}
if (Util.onDebug) {
System.err.println("[HTMLParserStyleSheetHandler::initialize(): "
+ "should parse CSS url: "
+ url.toString() + "]");
}
String media = atts.getValue("media");
if (media == null && ac.getCssVersion() != CssVersion.CSS1) {
media = "all";
}
styleSheetParser.parseURL(ac,
url,
atts.getValue("title"),
rel,
media,
StyleSheetOrigin.AUTHOR);
if (Util.onDebug) {
System.err.println("[parsed!]");
}
}
} else if ("style".equals(localName)) {
media = atts.getValue("media");
type = atts.getValue("type");
title = atts.getValue("title");
if (media == null && ac.getCssVersion() != CssVersion.CSS1) {
media = "all";
}
if (Util.onDebug) {
System.err.println("style media=\"" + media
+ "\" type=\"" + type
+ "\"" + " title=\"" + title + "\"");
}
if (type == null) {
// By default we consider that it is CSS for HTML content
// and raise a warning about the missing type attribute.
// (per HTML5 spec)
if (!isHTML5) {
int line = (locator != null ? locator.getLineNumber() : -1);
Warning w = new Warning(baseURI.toString(), line,
"style-type", 0, ac);
Warnings warnings = new Warnings(ac.getWarningLevel());
warnings.addWarning(w);
styleSheetParser.notifyWarnings(warnings);
}
text.setLength(0);
inStyle = true;
} else {
try {
MimeType mt = new MimeType(type);
if (MimeType.TEXT_CSS.match(mt) ==
MimeType.MATCH_SPECIFIC_SUBTYPE) {
text.setLength(0);
inStyle = true;
}
} catch (MimeTypeFormatException ex) {
// do nothing
}
}
} else if (atts.getValue("style") != null) {
String value = atts.getValue("style");
if (value != null) { // here we have a style attribute
String id = atts.getValue("id");
handleStyleAttribute(value, id);
}
}
} else {
// the style attribute, recommended by UI Tech TF
String value = atts.getValue(XHTML_NS, "style");
if (value != null) { // here we have a style attribute
String id = atts.getValue(XHTML_NS, "id");
handleStyleAttribute(value, id);
}
}
}
public void endElement(String namespaceURI, String localName,
String qName)
throws SAXException {
int line = 0;
if (locator != null) {
line = locator.getLineNumber();
}
if (XHTML_NS.equals(namespaceURI)) {
if ("style".equals(localName)) {
if (inStyle) {
inStyle = false;
if (text.length() != 0) {
if (Util.onDebug) {
System.err.println("PARSE [" + text.toString() + "]");
}
styleSheetParser
.parseStyleElement(ac,
new StringBufferInputStream(text.toString()),
title, media,
documentURI, line);
}
}
}
}
}
public void handleStyleAttribute(String value, String id) {
if (id == null) { // but we have no id: create one.
// a normal id should NOT contain a "#" character.
StringBuilder sb = new StringBuilder("#autoXML");
sb.append(autoIdCount);
// FIXME why two times?
sb.append(autoIdCount++);
id = sb.toString();
}
int line = 0;
if (locator != null) {
line = locator.getLineNumber();
}
// parse the style attribute.
try {
styleSheetParser
.parseStyleAttribute(ac,
new ByteArrayInputStream(value.getBytes()),
id, documentURI, line);
} catch (TokenMgrError ex) {
CssError err = new CssError(baseURI.toString(), line,
ex);
Errors errs = new Errors();
errs.addError(err);
styleSheetParser.notifyErrors(errs);
}
}
public StyleSheet getStyleSheet() {
return styleSheetParser.getStyleSheet();
}
public void startDTD(String name, String publicId,
String systemId)
throws SAXException {
// check that we received <!DOCTYPE html>
isHTML5 = ("html".equalsIgnoreCase(name));
}
public void endDTD()
throws SAXException {
}
public void startEntity(String name)
throws SAXException {
}
public void endEntity(String name)
throws SAXException {
}
public void startCDATA()
throws SAXException {
}
public void endCDATA()
throws SAXException {
}
public void error(SAXParseException exception) throws SAXException {
}
public void fatalError(SAXParseException exception) throws SAXException {
throw exception;
}
public void warning(SAXParseException exception) throws SAXException {
}
public InputSource resolveEntity(String publicId, String systemId)
throws SAXException, IOException {
String uri = null;
if (publicId != null) {
if ("-//W3C//DTD XHTML 1.0 Transitional//EN".equals(publicId)) {
if (!"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd".equals(systemId)) {
if (ac != null && ac.getFrame() != null) {
ac.getFrame().addWarning("xhtml.system_identifier.invalid");
}
}
} else if ("-//W3C//DTD XHTML 1.0 Strict//EN".equals(publicId)) {
if (!"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd".equals(systemId)) {
if (ac != null && ac.getFrame() != null) {
ac.getFrame().addWarning("xhtml.system_identifier.invalid");
}
}
} else if ("-//W3C//DTD XHTML 1.0 Frameset//EN".equals(publicId)) {
if (!"http://www.w3.org/TR/xhtml1/DTD/xhtml1-frameset.dtd".equals(systemId)) {
if (ac != null && ac.getFrame() != null) {
ac.getFrame().addWarning("xhtml.system_identifier.invalid");
}
}
}
uri = catalog.getProperty(publicId);
}
if (uri == null && systemId != null) {
uri = catalog.getProperty(systemId);
}
if (uri != null) {
return new InputSource(uri);
} else {
return new InputSource(new URL(baseURI, systemId).toString());
}
}
public void parse(InputStream in, String fileName) throws IOException, SAXException {
InputSource source = new InputSource(in);
org.xml.sax.XMLReader xmlParser = new nu.validator.htmlparser.sax.HtmlParser(ALLOW);
try {
xmlParser.setProperty("http://xml.org/sax/properties/lexical-handler",
this);
// xmlParser.setFeature("http://xml.org/sax/features/namespace-prefixes", true);
xmlParser.setFeature("http://xml.org/sax/features/validation", false);
} catch (Exception ex) {
ex.printStackTrace();
}
xmlParser.setContentHandler(this);
baseURI = new URL(fileName);
documentURI = new URL(fileName);
source.setSystemId(fileName);
try {
xmlParser.parse(source);
} finally {
in.close();
}
}
void parse(URL url) throws Exception {
InputSource source = new InputSource();
URLConnection connection;
InputStream in;
org.xml.sax.XMLReader xmlParser = new nu.validator.htmlparser.sax.HtmlParser(ALLOW);
try {
xmlParser.setProperty("http://xml.org/sax/properties/lexical-handler",
this);
// xmlParser.setFeature("http://xml.org/sax/features/namespace-prefixes", true);
xmlParser.setFeature("http://xml.org/sax/features/validation", false);
/*
xmlParser.setFeature("http://xml.org/sax/features/external-parameter-entities",
false);
xmlParser.setFeature("http://xml.org/sax/features/external-general-entities",
false);
*/
} catch (Exception ex) {
ex.printStackTrace();
}
xmlParser.setContentHandler(this);
connection = HTTPURL.getConnection(url, ac);
in = HTTPURL.getInputStream(ac, connection);
String streamEncoding = HTTPURL.getCharacterEncoding(ac, connection);
String httpCL = connection.getHeaderField("Content-Location");
if (httpCL != null) {
baseURI = HTTPURL.getURL(baseURI, httpCL);
documentURI = baseURI;
if (streamEncoding != null) {
ac.setCharsetForURL(baseURI, streamEncoding);
}
}
if (streamEncoding != null) {
source.setEncoding(streamEncoding);
}
//else {
// String ctype = connection.getContentType();
// if (ctype != null) {
// try {
// MimeType repmime = new MimeType(ctype);
// if (repmime.hasParameter("charset"))
// source.setEncoding(repmime.getParameterValue("charset"));
// } catch (Exception ex) {}
// }
//}
source.setByteStream(in);
try {
xmlParser.parse(url.toString());
} finally {
in.close();
}
}
void parse(String urlString, URLConnection connection) throws Exception {
org.xml.sax.XMLReader xmlParser = new nu.validator.htmlparser.sax.HtmlParser(ALLOW);
try {
xmlParser.setProperty("http://xml.org/sax/properties/lexical-handler",
this);
// xmlParser.setFeature("http://xml.org/sax/features/namespace-prefixes", true);
xmlParser.setFeature("http://xml.org/sax/features/validation", false);
xmlParser.setErrorHandler(this);
xmlParser.setEntityResolver(this);
} catch (Exception ex) {
ex.printStackTrace();
}
xmlParser.setContentHandler(this);
InputStream cis = HTTPURL.getInputStream(ac, connection);
InputSource source = new InputSource(cis);
String streamEncoding = HTTPURL.getCharacterEncoding(ac, connection);
String httpCL = connection.getHeaderField("Content-Location");
if (httpCL != null) {
baseURI = HTTPURL.getURL(baseURI, httpCL);
documentURI = baseURI;
if (streamEncoding != null) {
ac.setCharsetForURL(baseURI, streamEncoding);
}
}
if (streamEncoding != null) {
source.setEncoding(streamEncoding);
} //else {
// String ctype = connection.getContentType();
// if (ctype != null) {
// try {
// MimeType repmime = new MimeType(ctype);
// if (repmime.hasParameter("charset")) {
// source.setEncoding(repmime.getParameterValue("charset"));
// } else {
// // if text/html and no given charset, let's assume
// // iso-8859-1. Ideally, the parser would change the
// // encoding if it find a mismatch, not sure, but well...
// if (repmime.match(MimeType.TEXT_HTML) ==
// MimeType.MATCH_SPECIFIC_SUBTYPE) {
// source.setEncoding("iso-8859-1");
// }
// }
// } catch (Exception ex) {}
// }
//}
source.setSystemId(urlString);
try {
xmlParser.parse(source);
} finally {
cis.close();
}
}
HashMap<String, String> getValues(String data) {
int length = data.length();
int current = 0;
char c;
StringBuilder name = new StringBuilder(10);
StringBuilder value = new StringBuilder(128);
StringBuilder entity_name = new StringBuilder(16);
int state = 0;
HashMap<String, String> table = new HashMap<String, String>();
while (current < length) {
c = data.charAt(current);
switch (state) {
case 0:
switch (c) {
case ' ':
case '\t':
case '\n': // \r are normalized per XML spec
// nothing
break;
case '"':
case '\'':
return table;
case 'h':
case 't':
case 'm':
case 'c':
case 'a':
case 'r':
name.setLength(0); // reset the name
value.setLength(0); // reset the value
name.append(c); // start to build the name
state = 1;
break;
default:
// anything else is invalid
return table;
}
break;
case 1: // in the "attribute" name inside the PI
if ((c >= 'a') && (c <= 'z')) {
name.append(c);
} else if ((c == ' ') || (c == '\t') || (c == '\n')) {
state = 2;
} else if (c == '=') {
state = 3;
} else {
// anything else is invalid
state = 0;
}
break;
case 2: // waiting for =
switch (c) {
case ' ':
case '\t':
case '\n':
// nothing
break;
case '=':
state = 3;
break;
default:
// anything else is invalid
return table;
}
break;
case 3: // waiting for ' or "
switch (c) {
case ' ':
case '\t':
case '\n':
// nothing
break;
case '"':
state = 4;
break;
case '\'':
state = 5;
break;
default:
// anything else is invalid
return table;
}
break;
case 4:
case 5: // in the "attribute" value inside the PI
switch (c) {
case '&':
// predefined entities amp, lt, gt, quot, apos
entity_name.setLength(0);
state += 10;
break;
case '<':
return table;
case '"':
if (state == 4) {
state = 6;
} else {
value.append(c);
}
break;
case '\'':
if (state == 5) {
state = 6;
} else {
value.append(c);
}
break;
default:
value.append(c);
}
break;
case 6: // waiting a white space
table.put(name.toString(), value.toString());
name.setLength(0); // reset the name
value.setLength(0); // reset the value
switch (c) {
case ' ':
case '\n':
case '\t':
state = 0;
break;
default:
return table;
}
break;
case 14:
case 15: // in the entity
switch (c) {
case 'a':
case 'm':
case 'p':
case 'l':
case 't':
case 'g':
case 'q':
case 'u':
case 'o':
case 's':
entity_name.append(c);
break;
case ';':
String entity = entity_name.toString();
if ("amp".equals(entity)) {
value.append('&');
} else if ("lt".equals(entity)) {
value.append('<');
} else if ("gt".equals(entity)) {
value.append('>');
} else if ("quote".equals(entity)) {
value.append('"');
} else if ("apos".equals(entity)) {
value.append('\'');
} else {
return table;
}
state -= 10;
break;
default:
return table;
}
}
current++;
}
if (name.length() != 0 && value.length() != 0) {
table.put(name.toString(), value.toString());
}
return table;
}
}
Received on Sunday, 30 October 2011 21:02:57 UTC