W3C home > Mailing lists > Public > www-validator-cvs@w3.org > September 2007

2002/css-validator/org/w3c/css/css TagSoupStyleSheetHandler.java,1.3,1.4

From: Yves Lafon via cvs-syncmail <cvsmail@w3.org>
Date: Tue, 18 Sep 2007 17:59:23 +0000
To: www-validator-cvs@w3.org
Message-Id: <E1IXhM3-0002D4-W0@lionel-hutz.w3.org>

Update of /sources/public/2002/css-validator/org/w3c/css/css
In directory hutz:/tmp/cvs-serv8455

Modified Files:
	TagSoupStyleSheetHandler.java 
Log Message:
added support for BOM in stream to find out the encoding


Index: TagSoupStyleSheetHandler.java
===================================================================
RCS file: /sources/public/2002/css-validator/org/w3c/css/css/TagSoupStyleSheetHandler.java,v
retrieving revision 1.3
retrieving revision 1.4
diff -u -d -r1.3 -r1.4
--- TagSoupStyleSheetHandler.java	10 Aug 2007 14:52:37 -0000	1.3
+++ TagSoupStyleSheetHandler.java	18 Sep 2007 17:59:21 -0000	1.4
@@ -43,12 +43,14 @@
 import org.xml.sax.SAXParseException;
 import org.xml.sax.ext.LexicalHandler;
 
+import org.apache.velocity.io.UnicodeInputStream;
+
 /**
  * @version $Revision$
  * @author  Philippe Le Hegaret
  */
 public class TagSoupStyleSheetHandler implements ContentHandler,
-    LexicalHandler, ErrorHandler, EntityResolver {
+				      LexicalHandler, ErrorHandler, EntityResolver {
 
     static String XHTML_NS = "http://www.w3.org/1999/xhtml";
 
@@ -100,9 +102,9 @@
         throws SAXException {
     }
 
-     public void endPrefixMapping (String prefix)
-	 throws SAXException {
-     }
+    public void endPrefixMapping (String prefix)
+	throws SAXException {
+    }
 
     public void characters (char ch[], int start, int length)
         throws SAXException {
@@ -116,7 +118,7 @@
 	if (inStyle) {
             int line = (locator != null ? locator.getLineNumber() : -1);
             Warning w = new Warning(baseURI.toString(), line,
-                    "style-inside-comment", 0, ac);
+				    "style-inside-comment", 0, ac);
             Warnings warnings = new Warnings(ac.getWarningLevel());
             warnings.addWarning(w);
             styleSheetParser.notifyWarnings(warnings);
@@ -173,8 +175,8 @@
 		    new MimeType(type);
 		} catch (Exception ex) { /* at worst, null */ };
 		if (mt != null && (MimeType.TEXT_CSS.match(mt) ==
-		                           MimeType.MATCH_SPECIFIC_SUBTYPE)) {
-		// we're dealing with a stylesheet...
+				   MimeType.MATCH_SPECIFIC_SUBTYPE)) {
+		    // we're dealing with a stylesheet...
 		    URL url;
 
 		    try {
@@ -263,7 +265,7 @@
 		    return;
 		}
 		if (MimeType.TEXT_CSS.match(mt) !=
-		                            MimeType.MATCH_SPECIFIC_SUBTYPE) {
+		    MimeType.MATCH_SPECIFIC_SUBTYPE) {
 		    return;
 		}
 		if (href == null) {
@@ -274,8 +276,8 @@
 		    }
 		    CssError er =
 			new CssError(baseURI.toString(), line,
-				                    new InvalidParamException(
-					            "unrecognized.link", ac));
+				     new InvalidParamException(
+					 "unrecognized.link", ac));
 		    Errors ers = new Errors();
 		    ers.addError(er);
 		    styleSheetParser.notifyErrors(ers);
@@ -283,7 +285,7 @@
 		}
 
 		if ((rel != null) &&
-		              rel.toLowerCase().indexOf("stylesheet") != -1) {
+		    rel.toLowerCase().indexOf("stylesheet") != -1) {
 		    // we're dealing with a stylesheet...
 		    // @@TODO alternate stylesheet
 		    URL url;
@@ -340,7 +342,7 @@
 		    CssError er =
 			new CssError(baseURI.toString(), line,
 				     new InvalidParamException(
-					             "unrecognized.link", ac));
+					 "unrecognized.link", ac));
 		    Errors ers = new Errors();
 		    ers.addError(er);
 		    styleSheetParser.notifyErrors(ers);
@@ -348,7 +350,7 @@
 		    try {
 			MimeType mt = new MimeType(type);
 			if (MimeType.TEXT_CSS.match(mt) ==
-			                     MimeType.MATCH_SPECIFIC_SUBTYPE) {
+			    MimeType.MATCH_SPECIFIC_SUBTYPE) {
 			    text.setLength(0);
 			    inStyle = true;
 			}
@@ -426,7 +428,7 @@
     }
 
     public void startDTD (String name, String publicId,
-                                   String systemId)
+			  String systemId)
         throws SAXException {
     }
 
@@ -512,16 +514,16 @@
     	documentURI = new URL(fileName);
     	source.setSystemId(fileName);
     	try {
-			xmlParser.parse(source);
+	    xmlParser.parse(source);
     	} finally {
-			in.close();
+	    in.close();
     	}
     }
 
     void parse(URL url) throws Exception {
 	InputSource source = new InputSource();
 	URLConnection connection;
-	InputStream in;
+	UnicodeInputStream in;
 	org.xml.sax.XMLReader xmlParser = new org.ccil.cowan.tagsoup.Parser();
 	try {
 	    xmlParser.setProperty("http://xml.org/sax/properties/lexical-handler",
@@ -530,9 +532,9 @@
 	    xmlParser.setFeature("http://xml.org/sax/features/validation", false);
 	    /*
 	      xmlParser.setFeature("http://xml.org/sax/features/external-parameter-entities",
-				  false);
-	    xmlParser.setFeature("http://xml.org/sax/features/external-general-entities",
-				  false);
+	      false);
+	      xmlParser.setFeature("http://xml.org/sax/features/external-general-entities",
+	      false);
 	    */
 	} catch (Exception ex) {
 	    ex.printStackTrace();
@@ -540,19 +542,25 @@
 	xmlParser.setContentHandler(this);
 
 	connection = HTTPURL.getConnection(url, ac);
-	in = connection.getInputStream();
+	in = new UnicodeInputStream(connection.getInputStream());
+	String streamEncoding = in.getEncodingFromStream();
+
 	String httpCL = connection.getHeaderField("Content-Location");
 	if (httpCL != null) {
 	    baseURI = HTTPURL.getURL(baseURI, httpCL);
 	    documentURI = baseURI;
 	}
-	String ctype = connection.getContentType();
-	if (ctype != null) {
-	    try {
-		MimeType repmime = new MimeType(ctype);
-		if (repmime.hasParameter("charset"))
-		    source.setEncoding(repmime.getParameterValue("charset"));
-	    } catch (Exception ex) {}
+	if (streamEncoding != null) {
+	    source.setEncoding(streamEncoding);
+	} else {
+	    String ctype = connection.getContentType();
+	    if (ctype != null) {
+		try {
+		    MimeType repmime = new MimeType(ctype);
+		    if (repmime.hasParameter("charset"))
+			source.setEncoding(repmime.getParameterValue("charset"));
+		} catch (Exception ex) {}
+	    }
 	}
 	source.setByteStream(in);
 	try {
@@ -576,24 +584,30 @@
 	    ex.printStackTrace();
 	}
 	xmlParser.setContentHandler(this);
-	InputStream cis = connection.getInputStream();
+	UnicodeInputStream cis = new UnicodeInputStream(connection.getInputStream());
 	InputSource source = new InputSource(cis);
-	String ctype = connection.getContentType();
-	if (ctype != null) {
-	    try {
-		MimeType repmime = new MimeType(ctype);
-		if (repmime.hasParameter("charset")) {
-		    source.setEncoding(repmime.getParameterValue("charset"));
-		} else {
-		    // if text/html and no given charset, let's assume
-		    // iso-8859-1. Ideally, the parser would change the
-		    // encoding if it find a mismatch, not sure, but well...
-		    if (repmime.match(MimeType.TEXT_HTML) ==
-			                     MimeType.MATCH_SPECIFIC_SUBTYPE) {
-			source.setEncoding("iso-8859-1");
+	String streamEncoding = cis.getEncodingFromStream();
+	// if we get a BOM, use that for the encoding... otherwise CT, then iso-8859-1
+	if (streamEncoding != null) {
+	    source.setEncoding(streamEncoding);
+	} else {
+	    String ctype = connection.getContentType();
+	    if (ctype != null) {
+		try {
+		    MimeType repmime = new MimeType(ctype);
+		    if (repmime.hasParameter("charset")) {
+			source.setEncoding(repmime.getParameterValue("charset"));
+		    } else {
+			// if text/html and no given charset, let's assume
+			// iso-8859-1. Ideally, the parser would change the
+			// encoding if it find a mismatch, not sure, but well...
+			if (repmime.match(MimeType.TEXT_HTML) ==
+			    MimeType.MATCH_SPECIFIC_SUBTYPE) {
+			    source.setEncoding("iso-8859-1");
+			}
 		    }
-		}
-	    } catch (Exception ex) {}
+		} catch (Exception ex) {}
+	    }
 	}
 	source.setSystemId(urlString);
 	try {
Received on Tuesday, 18 September 2007 17:59:35 GMT

This archive was generated by hypermail 2.2.0+W3C-0.50 : Thursday, 26 April 2012 12:54:59 GMT