2002/css-validator/org/w3c/css/css StyleSheetGenerator.java,1.23,1.24 StyleSheetParser.java,1.13,1.14 TagSoupStyleSheetHandler.java,1.5,1.6 XMLStyleSheetHandler.java,1.29,1.30

Update of /sources/public/2002/css-validator/org/w3c/css/css
In directory hutz:/tmp/cvs-serv11022/org/w3c/css/css

Modified Files:
	StyleSheetGenerator.java StyleSheetParser.java 
	TagSoupStyleSheetHandler.java XMLStyleSheetHandler.java 
Log Message:
multi-byte handling, more comprehensive use of charsets, while trying to preserve BOM support

Index: StyleSheetParser.java
===================================================================
RCS file: /sources/public/2002/css-validator/org/w3c/css/css/StyleSheetParser.java,v
retrieving revision 1.13
retrieving revision 1.14
diff -u -d -r1.13 -r1.14
--- StyleSheetParser.java	21 Feb 2008 01:41:17 -0000	1.13
+++ StyleSheetParser.java	13 Feb 2009 21:50:15 -0000	1.14
@@ -255,7 +255,8 @@
 	try {
 
 //	    if (cssFouffa == null) {
-		cssFouffa = new CssFouffa(ac, input, url, lineno);
+	    String charset = ac.getCharsetForURL(url);
+	    cssFouffa = new CssFouffa(ac, input, charset, url, lineno);
 		cssFouffa.addListener(this);
 //	    } else {
 //		cssFouffa.ReInit(ac, input, url, lineno);
@@ -345,10 +346,11 @@
 	}
 
 	try {
-//	    if (cssFouffa == null) {
-		cssFouffa = new CssFouffa(ac, input, url, lineno);
-		cssFouffa.addListener(this);
-//	    } else
+	    //	    if (cssFouffa == null) {
+	    String charset = ac.getCharsetForURL(url);
+	    cssFouffa = new CssFouffa(ac, input, charset, url, lineno);
+	    cssFouffa.addListener(this);
+	    //	    } else
 //		cssFouffa.ReInit(ac, input, url, lineno);
 	    CssSelectors selector = new CssSelectors(ac);
 

Index: StyleSheetGenerator.java
===================================================================
RCS file: /sources/public/2002/css-validator/org/w3c/css/css/StyleSheetGenerator.java,v
retrieving revision 1.23
retrieving revision 1.24
diff -u -d -r1.23 -r1.24
--- StyleSheetGenerator.java	13 Feb 2009 14:03:36 -0000	1.23
+++ StyleSheetGenerator.java	13 Feb 2009 21:50:15 -0000	1.24
@@ -176,6 +176,7 @@
 
         try {
             template = Velocity.getTemplate("org/w3c/css/css/" + template_file);
+	    template.setEncoding("utf-8");
         } catch (ResourceNotFoundException rnfe) {
             System.err.println(rnfe.getMessage());
             rnfe.printStackTrace();

Index: XMLStyleSheetHandler.java
===================================================================
RCS file: /sources/public/2002/css-validator/org/w3c/css/css/XMLStyleSheetHandler.java,v
retrieving revision 1.29
retrieving revision 1.30
diff -u -d -r1.29 -r1.30
--- XMLStyleSheetHandler.java	10 Dec 2008 15:25:52 -0000	1.29
+++ XMLStyleSheetHandler.java	13 Feb 2009 21:50:15 -0000	1.30
@@ -43,8 +43,6 @@
 import org.xml.sax.SAXParseException;
 import org.xml.sax.ext.LexicalHandler;
 
-import org.apache.velocity.io.UnicodeInputStream;
-
 /**
  * @version $Revision$
  * @author Philippe Le Hegaret
@@ -483,7 +481,7 @@
     void parse(URL url) throws Exception {
 	InputSource source = new InputSource();
 	URLConnection connection;
-	UnicodeInputStream in;
+	InputStream in;
 	org.xml.sax.XMLReader xmlParser = new org.apache.xerces.parsers.SAXParser();
 	try {
 	    xmlParser.setProperty(
@@ -504,27 +502,31 @@
 	xmlParser.setContentHandler(this);
 	    
 	connection = HTTPURL.getConnection(url, ac);
-	in = new UnicodeInputStream(connection.getInputStream());
-	String streamEncoding = in.getEncodingFromStream();
+	in = HTTPURL.getInputStream(ac, connection);
+	String streamEncoding = HTTPURL.getCharacterEncoding(ac, connection);
 
 	String httpCL = connection.getHeaderField("Content-Location");
 	if (httpCL != null) {
 	    baseURI = HTTPURL.getURL(baseURI, httpCL);
 	    documentURI = baseURI;
+	    if (streamEncoding != null) {
+		ac.setCharsetForURL(baseURI, streamEncoding);
+	    }
 	}
 	if (streamEncoding != null) {
 	    source.setEncoding(streamEncoding);
-	} else {
-	    String ctype = connection.getContentType();
-	    if (ctype != null) {
-		try {
-		    MimeType repmime = new MimeType(ctype);
-		    if (repmime.hasParameter("charset"))
-			source.setEncoding(repmime.getParameterValue("charset"));
-		} catch (Exception ex) {
-		}
-	    }
-	}
+	} 
+	// else {
+	//    String ctype = connection.getContentType();
+	//   if (ctype != null) {
+	//	try {
+	//	    MimeType repmime = new MimeType(ctype);
+	//	    if (repmime.hasParameter("charset"))
+	//		source.setEncoding(repmime.getParameterValue("charset"));
+	//	} catch (Exception ex) {
+	//    }
+	//    }
+	//}
 	source.setByteStream(in);
 	try {
 	    xmlParser.parse(url.toString());
@@ -549,31 +551,38 @@
 	    ex.printStackTrace();
 	}
 	xmlParser.setContentHandler(this);
-	UnicodeInputStream cis = new UnicodeInputStream(connection.getInputStream());
+	InputStream cis = HTTPURL.getInputStream(ac, connection);
 	InputSource source = new InputSource(cis);
-	String streamEncoding = cis.getEncodingFromStream();
-	// if we get a BOM, use that for the encoding... otherwise CT, then iso-8859-1
-	if (streamEncoding != null) {
-	    source.setEncoding(streamEncoding);
-	} else {
-	    String ctype = connection.getContentType();
-	    if (ctype != null) {
-		try {
-		    MimeType repmime = new MimeType(ctype);
-		    if (repmime.hasParameter("charset")) {
-			source.setEncoding(repmime.getParameterValue("charset"));
-		    } else {
-			// if text/html and no given charset, let's assume
-			// iso-8859-1. Ideally, the parser would change the
-			// encoding if it find a mismatch, not sure, but well...
-			if (repmime.match(MimeType.TEXT_HTML) == MimeType.MATCH_SPECIFIC_SUBTYPE) {
-			    source.setEncoding("iso-8859-1");
-			}
-		    }
-		} catch (Exception ex) {
-		}
+	String streamEncoding = HTTPURL.getCharacterEncoding(ac, connection);
+	String httpCL = connection.getHeaderField("Content-Location");
+	if (httpCL != null) {
+	    baseURI = HTTPURL.getURL(baseURI, httpCL);
+	    documentURI = baseURI;
+	    if (streamEncoding != null) {
+		ac.setCharsetForURL(baseURI, streamEncoding);
 	    }
 	}
+	if (streamEncoding != null) {
+	    source.setEncoding(streamEncoding);
+	} //else {
+	//    String ctype = connection.getContentType();
+	//   if (ctype != null) {
+	//	try {
+	//	    MimeType repmime = new MimeType(ctype);
+	//	    if (repmime.hasParameter("charset")) {
+	//		source.setEncoding(repmime.getParameterValue("charset"));
+	//	    } else {
+	//		// if text/html and no given charset, let's assume
+	//		// iso-8859-1. Ideally, the parser would change the
+	//		// encoding if it find a mismatch, not sure, but well...
+	//		if (repmime.match(MimeType.TEXT_HTML) == MimeType.MATCH_SPECIFIC_SUBTYPE) {
+	//		    source.setEncoding("iso-8859-1");
+	//		}
+	//	    }
+	//	} catch (Exception ex) {
+	//	}
+	//   }
+	//}
 	source.setSystemId(urlString);
 	try {
 	    xmlParser.parse(source);

Index: TagSoupStyleSheetHandler.java
===================================================================
RCS file: /sources/public/2002/css-validator/org/w3c/css/css/TagSoupStyleSheetHandler.java,v
retrieving revision 1.5
retrieving revision 1.6
diff -u -d -r1.5 -r1.6
--- TagSoupStyleSheetHandler.java	10 Dec 2008 15:10:15 -0000	1.5
+++ TagSoupStyleSheetHandler.java	13 Feb 2009 21:50:15 -0000	1.6
@@ -45,8 +45,6 @@
 import org.xml.sax.SAXParseException;
 import org.xml.sax.ext.LexicalHandler;
 
-import org.apache.velocity.io.UnicodeInputStream;
-
 /**
  * @version $Revision$
  * @author  Philippe Le Hegaret
@@ -72,6 +70,7 @@
     String media  = null;
     String type  = null;
     String title = null;
+    String charset = null;
     StringBuilder text = new StringBuilder(255);
 
     Locator locator;
@@ -535,7 +534,7 @@
     void parse(URL url) throws Exception {
 	InputSource source = new InputSource();
 	URLConnection connection;
-	UnicodeInputStream in;
+	InputStream in;
 	org.xml.sax.XMLReader xmlParser = new org.ccil.cowan.tagsoup.Parser();
 	try {
 	    xmlParser.setProperty("http://xml.org/sax/properties/lexical-handler",
@@ -554,26 +553,30 @@
 	xmlParser.setContentHandler(this);
 
 	connection = HTTPURL.getConnection(url, ac);
-	in = new UnicodeInputStream(connection.getInputStream());
-	String streamEncoding = in.getEncodingFromStream();
+	in = HTTPURL.getInputStream(ac, connection);
+	String streamEncoding = HTTPURL.getCharacterEncoding(ac, connection);
 
 	String httpCL = connection.getHeaderField("Content-Location");
 	if (httpCL != null) {
 	    baseURI = HTTPURL.getURL(baseURI, httpCL);
 	    documentURI = baseURI;
+	    if (streamEncoding != null) {
+		ac.setCharsetForURL(baseURI, streamEncoding);
+	    }
 	}
 	if (streamEncoding != null) {
 	    source.setEncoding(streamEncoding);
-	} else {
-	    String ctype = connection.getContentType();
-	    if (ctype != null) {
-		try {
-		    MimeType repmime = new MimeType(ctype);
-		    if (repmime.hasParameter("charset"))
-			source.setEncoding(repmime.getParameterValue("charset"));
-		} catch (Exception ex) {}
-	    }
-	}
+	} 
+	//else {
+	//    String ctype = connection.getContentType();
+	//   if (ctype != null) {
+	//	try {
+	//	    MimeType repmime = new MimeType(ctype);
+	//	    if (repmime.hasParameter("charset"))
+	//		source.setEncoding(repmime.getParameterValue("charset"));
+	//	} catch (Exception ex) {}
+	//   }
+	//}
 	source.setByteStream(in);
 	try {
 	    xmlParser.parse(url.toString());
@@ -596,31 +599,38 @@
 	    ex.printStackTrace();
 	}
 	xmlParser.setContentHandler(this);
-	UnicodeInputStream cis = new UnicodeInputStream(connection.getInputStream());
+	InputStream cis = HTTPURL.getInputStream(ac, connection);
 	InputSource source = new InputSource(cis);
-	String streamEncoding = cis.getEncodingFromStream();
-	// if we get a BOM, use that for the encoding... otherwise CT, then iso-8859-1
-	if (streamEncoding != null) {
-	    source.setEncoding(streamEncoding);
-	} else {
-	    String ctype = connection.getContentType();
-	    if (ctype != null) {
-		try {
-		    MimeType repmime = new MimeType(ctype);
-		    if (repmime.hasParameter("charset")) {
-			source.setEncoding(repmime.getParameterValue("charset"));
-		    } else {
-			// if text/html and no given charset, let's assume
-			// iso-8859-1. Ideally, the parser would change the
-			// encoding if it find a mismatch, not sure, but well...
-			if (repmime.match(MimeType.TEXT_HTML) ==
-			    MimeType.MATCH_SPECIFIC_SUBTYPE) {
-			    source.setEncoding("iso-8859-1");
-			}
-		    }
-		} catch (Exception ex) {}
+	String streamEncoding = HTTPURL.getCharacterEncoding(ac, connection);
+	String httpCL = connection.getHeaderField("Content-Location");
+	if (httpCL != null) {
+	    baseURI = HTTPURL.getURL(baseURI, httpCL);
+	    documentURI = baseURI;
+	    if (streamEncoding != null) {
+		ac.setCharsetForURL(baseURI, streamEncoding);
 	    }
 	}
+	if (streamEncoding != null) {
+	    source.setEncoding(streamEncoding);
+	} //else {
+	  //  String ctype = connection.getContentType();
+	//   if (ctype != null) {
+	//	try {
+	//	    MimeType repmime = new MimeType(ctype);
+	//	    if (repmime.hasParameter("charset")) {
+	//		source.setEncoding(repmime.getParameterValue("charset"));
+	//	    } else {
+	//		// if text/html and no given charset, let's assume
+	//		// iso-8859-1. Ideally, the parser would change the
+	//		// encoding if it find a mismatch, not sure, but well...
+	//		if (repmime.match(MimeType.TEXT_HTML) ==
+	//		    MimeType.MATCH_SPECIFIC_SUBTYPE) {
+	//		    source.setEncoding("iso-8859-1");
+	//		}
+	//	    }
+	//	} catch (Exception ex) {}
+	//   }
+	//}
 	source.setSystemId(urlString);
 	try {
 	    xmlParser.parse(source);

Received on Friday, 13 February 2009 21:50:32 UTC