- From: Yves Lafon via cvs-syncmail <cvsmail@w3.org>
- Date: Fri, 13 Feb 2009 21:50:17 +0000
- To: www-validator-cvs@w3.org
Update of /sources/public/2002/css-validator/org/w3c/css/css In directory hutz:/tmp/cvs-serv11022/org/w3c/css/css Modified Files: StyleSheetGenerator.java StyleSheetParser.java TagSoupStyleSheetHandler.java XMLStyleSheetHandler.java Log Message: multi-byte handling, more comprehensive use of charsets, while trying to preserve BOM support Index: StyleSheetParser.java =================================================================== RCS file: /sources/public/2002/css-validator/org/w3c/css/css/StyleSheetParser.java,v retrieving revision 1.13 retrieving revision 1.14 diff -u -d -r1.13 -r1.14 --- StyleSheetParser.java 21 Feb 2008 01:41:17 -0000 1.13 +++ StyleSheetParser.java 13 Feb 2009 21:50:15 -0000 1.14 @@ -255,7 +255,8 @@ try { // if (cssFouffa == null) { - cssFouffa = new CssFouffa(ac, input, url, lineno); + String charset = ac.getCharsetForURL(url); + cssFouffa = new CssFouffa(ac, input, charset, url, lineno); cssFouffa.addListener(this); // } else { // cssFouffa.ReInit(ac, input, url, lineno); @@ -345,10 +346,11 @@ } try { -// if (cssFouffa == null) { - cssFouffa = new CssFouffa(ac, input, url, lineno); - cssFouffa.addListener(this); -// } else + // if (cssFouffa == null) { + String charset = ac.getCharsetForURL(url); + cssFouffa = new CssFouffa(ac, input, charset, url, lineno); + cssFouffa.addListener(this); + // } else // cssFouffa.ReInit(ac, input, url, lineno); CssSelectors selector = new CssSelectors(ac); Index: StyleSheetGenerator.java =================================================================== RCS file: /sources/public/2002/css-validator/org/w3c/css/css/StyleSheetGenerator.java,v retrieving revision 1.23 retrieving revision 1.24 diff -u -d -r1.23 -r1.24 --- StyleSheetGenerator.java 13 Feb 2009 14:03:36 -0000 1.23 +++ StyleSheetGenerator.java 13 Feb 2009 21:50:15 -0000 1.24 @@ -176,6 +176,7 @@ try { template = Velocity.getTemplate("org/w3c/css/css/" + template_file); + template.setEncoding("utf-8"); } catch (ResourceNotFoundException rnfe) { System.err.println(rnfe.getMessage()); rnfe.printStackTrace(); Index: XMLStyleSheetHandler.java =================================================================== RCS file: /sources/public/2002/css-validator/org/w3c/css/css/XMLStyleSheetHandler.java,v retrieving revision 1.29 retrieving revision 1.30 diff -u -d -r1.29 -r1.30 --- XMLStyleSheetHandler.java 10 Dec 2008 15:25:52 -0000 1.29 +++ XMLStyleSheetHandler.java 13 Feb 2009 21:50:15 -0000 1.30 @@ -43,8 +43,6 @@ import org.xml.sax.SAXParseException; import org.xml.sax.ext.LexicalHandler; -import org.apache.velocity.io.UnicodeInputStream; - /** * @version $Revision$ * @author Philippe Le Hegaret @@ -483,7 +481,7 @@ void parse(URL url) throws Exception { InputSource source = new InputSource(); URLConnection connection; - UnicodeInputStream in; + InputStream in; org.xml.sax.XMLReader xmlParser = new org.apache.xerces.parsers.SAXParser(); try { xmlParser.setProperty( @@ -504,27 +502,31 @@ xmlParser.setContentHandler(this); connection = HTTPURL.getConnection(url, ac); - in = new UnicodeInputStream(connection.getInputStream()); - String streamEncoding = in.getEncodingFromStream(); + in = HTTPURL.getInputStream(ac, connection); + String streamEncoding = HTTPURL.getCharacterEncoding(ac, connection); String httpCL = connection.getHeaderField("Content-Location"); if (httpCL != null) { baseURI = HTTPURL.getURL(baseURI, httpCL); documentURI = baseURI; + if (streamEncoding != null) { + ac.setCharsetForURL(baseURI, streamEncoding); + } } if (streamEncoding != null) { source.setEncoding(streamEncoding); - } else { - String ctype = connection.getContentType(); - if (ctype != null) { - try { - MimeType repmime = new MimeType(ctype); - if (repmime.hasParameter("charset")) - source.setEncoding(repmime.getParameterValue("charset")); - } catch (Exception ex) { - } - } - } + } + // else { + // String ctype = connection.getContentType(); + // if (ctype != null) { + // try { + // MimeType repmime = new MimeType(ctype); + // if (repmime.hasParameter("charset")) + // source.setEncoding(repmime.getParameterValue("charset")); + // } catch (Exception ex) { + // } + // } + //} source.setByteStream(in); try { xmlParser.parse(url.toString()); @@ -549,31 +551,38 @@ ex.printStackTrace(); } xmlParser.setContentHandler(this); - UnicodeInputStream cis = new UnicodeInputStream(connection.getInputStream()); + InputStream cis = HTTPURL.getInputStream(ac, connection); InputSource source = new InputSource(cis); - String streamEncoding = cis.getEncodingFromStream(); - // if we get a BOM, use that for the encoding... otherwise CT, then iso-8859-1 - if (streamEncoding != null) { - source.setEncoding(streamEncoding); - } else { - String ctype = connection.getContentType(); - if (ctype != null) { - try { - MimeType repmime = new MimeType(ctype); - if (repmime.hasParameter("charset")) { - source.setEncoding(repmime.getParameterValue("charset")); - } else { - // if text/html and no given charset, let's assume - // iso-8859-1. Ideally, the parser would change the - // encoding if it find a mismatch, not sure, but well... - if (repmime.match(MimeType.TEXT_HTML) == MimeType.MATCH_SPECIFIC_SUBTYPE) { - source.setEncoding("iso-8859-1"); - } - } - } catch (Exception ex) { - } + String streamEncoding = HTTPURL.getCharacterEncoding(ac, connection); + String httpCL = connection.getHeaderField("Content-Location"); + if (httpCL != null) { + baseURI = HTTPURL.getURL(baseURI, httpCL); + documentURI = baseURI; + if (streamEncoding != null) { + ac.setCharsetForURL(baseURI, streamEncoding); } } + if (streamEncoding != null) { + source.setEncoding(streamEncoding); + } //else { + // String ctype = connection.getContentType(); + // if (ctype != null) { + // try { + // MimeType repmime = new MimeType(ctype); + // if (repmime.hasParameter("charset")) { + // source.setEncoding(repmime.getParameterValue("charset")); + // } else { + // // if text/html and no given charset, let's assume + // // iso-8859-1. Ideally, the parser would change the + // // encoding if it find a mismatch, not sure, but well... + // if (repmime.match(MimeType.TEXT_HTML) == MimeType.MATCH_SPECIFIC_SUBTYPE) { + // source.setEncoding("iso-8859-1"); + // } + // } + // } catch (Exception ex) { + // } + // } + //} source.setSystemId(urlString); try { xmlParser.parse(source); Index: TagSoupStyleSheetHandler.java =================================================================== RCS file: /sources/public/2002/css-validator/org/w3c/css/css/TagSoupStyleSheetHandler.java,v retrieving revision 1.5 retrieving revision 1.6 diff -u -d -r1.5 -r1.6 --- TagSoupStyleSheetHandler.java 10 Dec 2008 15:10:15 -0000 1.5 +++ TagSoupStyleSheetHandler.java 13 Feb 2009 21:50:15 -0000 1.6 @@ -45,8 +45,6 @@ import org.xml.sax.SAXParseException; import org.xml.sax.ext.LexicalHandler; -import org.apache.velocity.io.UnicodeInputStream; - /** * @version $Revision$ * @author Philippe Le Hegaret @@ -72,6 +70,7 @@ String media = null; String type = null; String title = null; + String charset = null; StringBuilder text = new StringBuilder(255); Locator locator; @@ -535,7 +534,7 @@ void parse(URL url) throws Exception { InputSource source = new InputSource(); URLConnection connection; - UnicodeInputStream in; + InputStream in; org.xml.sax.XMLReader xmlParser = new org.ccil.cowan.tagsoup.Parser(); try { xmlParser.setProperty("http://xml.org/sax/properties/lexical-handler", @@ -554,26 +553,30 @@ xmlParser.setContentHandler(this); connection = HTTPURL.getConnection(url, ac); - in = new UnicodeInputStream(connection.getInputStream()); - String streamEncoding = in.getEncodingFromStream(); + in = HTTPURL.getInputStream(ac, connection); + String streamEncoding = HTTPURL.getCharacterEncoding(ac, connection); String httpCL = connection.getHeaderField("Content-Location"); if (httpCL != null) { baseURI = HTTPURL.getURL(baseURI, httpCL); documentURI = baseURI; + if (streamEncoding != null) { + ac.setCharsetForURL(baseURI, streamEncoding); + } } if (streamEncoding != null) { source.setEncoding(streamEncoding); - } else { - String ctype = connection.getContentType(); - if (ctype != null) { - try { - MimeType repmime = new MimeType(ctype); - if (repmime.hasParameter("charset")) - source.setEncoding(repmime.getParameterValue("charset")); - } catch (Exception ex) {} - } - } + } + //else { + // String ctype = connection.getContentType(); + // if (ctype != null) { + // try { + // MimeType repmime = new MimeType(ctype); + // if (repmime.hasParameter("charset")) + // source.setEncoding(repmime.getParameterValue("charset")); + // } catch (Exception ex) {} + // } + //} source.setByteStream(in); try { xmlParser.parse(url.toString()); @@ -596,31 +599,38 @@ ex.printStackTrace(); } xmlParser.setContentHandler(this); - UnicodeInputStream cis = new UnicodeInputStream(connection.getInputStream()); + InputStream cis = HTTPURL.getInputStream(ac, connection); InputSource source = new InputSource(cis); - String streamEncoding = cis.getEncodingFromStream(); - // if we get a BOM, use that for the encoding... otherwise CT, then iso-8859-1 - if (streamEncoding != null) { - source.setEncoding(streamEncoding); - } else { - String ctype = connection.getContentType(); - if (ctype != null) { - try { - MimeType repmime = new MimeType(ctype); - if (repmime.hasParameter("charset")) { - source.setEncoding(repmime.getParameterValue("charset")); - } else { - // if text/html and no given charset, let's assume - // iso-8859-1. Ideally, the parser would change the - // encoding if it find a mismatch, not sure, but well... - if (repmime.match(MimeType.TEXT_HTML) == - MimeType.MATCH_SPECIFIC_SUBTYPE) { - source.setEncoding("iso-8859-1"); - } - } - } catch (Exception ex) {} + String streamEncoding = HTTPURL.getCharacterEncoding(ac, connection); + String httpCL = connection.getHeaderField("Content-Location"); + if (httpCL != null) { + baseURI = HTTPURL.getURL(baseURI, httpCL); + documentURI = baseURI; + if (streamEncoding != null) { + ac.setCharsetForURL(baseURI, streamEncoding); } } + if (streamEncoding != null) { + source.setEncoding(streamEncoding); + } //else { + // String ctype = connection.getContentType(); + // if (ctype != null) { + // try { + // MimeType repmime = new MimeType(ctype); + // if (repmime.hasParameter("charset")) { + // source.setEncoding(repmime.getParameterValue("charset")); + // } else { + // // if text/html and no given charset, let's assume + // // iso-8859-1. Ideally, the parser would change the + // // encoding if it find a mismatch, not sure, but well... + // if (repmime.match(MimeType.TEXT_HTML) == + // MimeType.MATCH_SPECIFIC_SUBTYPE) { + // source.setEncoding("iso-8859-1"); + // } + // } + // } catch (Exception ex) {} + // } + //} source.setSystemId(urlString); try { xmlParser.parse(source);
Received on Friday, 13 February 2009 21:50:32 UTC