- From: Yves Lafon via cvs-syncmail <cvsmail@w3.org>
- Date: Fri, 13 Feb 2009 21:50:18 +0000
- To: www-validator-cvs@w3.org
Update of /sources/public/2002/css-validator/org/w3c/css/util In directory hutz:/tmp/cvs-serv11022/org/w3c/css/util Modified Files: ApplContext.java HTTPURL.java Log Message: multi-byte handling, more comprehensive use of charsets, while trying to preserve BOM support Index: ApplContext.java =================================================================== RCS file: /sources/public/2002/css-validator/org/w3c/css/util/ApplContext.java,v retrieving revision 1.13 retrieving revision 1.14 diff -u -d -r1.13 -r1.14 --- ApplContext.java 13 Aug 2007 08:30:37 -0000 1.13 +++ ApplContext.java 13 Feb 2009 21:50:16 -0000 1.14 @@ -9,6 +9,8 @@ package org.w3c.css.util; import java.nio.charset.Charset; +import java.util.HashMap; +import java.net.URL; import org.w3c.css.parser.Frame; import org.w3c.www.http.HttpAcceptCharset; @@ -21,221 +23,238 @@ */ public class ApplContext { - String credential = null; - String lang; - Messages msgs; - Frame frame; - String cssversion; - String profile; - String input; - Class cssselectorstyle; - int origin = -1; - String medium; - private String link; - int warningLevel = 0; - - /** - * Creates a new ApplContext - */ - public ApplContext(String lang) { - this.lang = lang; - msgs = new Messages(lang); - } + String credential = null; + String lang; + Messages msgs; + Frame frame; + String cssversion; + String profile; + String input; + Class cssselectorstyle; + int origin = -1; + String medium; + private String link; + int warningLevel = 0; - public int getWarningLevel() { - return warningLevel; - } + private HashMap<URL,String> uricharsets = null; - public void setWarningLevel(int warningLevel) { - this.warningLevel = warningLevel; - } + /** + * Creates a new ApplContext + */ + public ApplContext(String lang) { + this.lang = lang; + msgs = new Messages(lang); + } - // as ugly as everything else - public String getCredential() { - return credential; - } + public int getWarningLevel() { + return warningLevel; + } - public void setCredential(String credential) { - this.credential = credential; - } + public void setWarningLevel(int warningLevel) { + this.warningLevel = warningLevel; + } - public void setFrame(Frame frame) { - this.frame = frame; - frame.ac = this; - } + // as ugly as everything else + public String getCredential() { + return credential; + } - public Frame getFrame() { - return frame; - } + public void setCredential(String credential) { + this.credential = credential; + } - public Class getCssSelectorsStyle() { - return cssselectorstyle; - } + public void setFrame(Frame frame) { + this.frame = frame; + frame.ac = this; + } - public void setCssSelectorsStyle(Class s) { - cssselectorstyle = s; - } + public Frame getFrame() { + return frame; + } - public Messages getMsg() { - return msgs; - } + public Class getCssSelectorsStyle() { + return cssselectorstyle; + } - public String getContentType() { - return (msgs != null) ? msgs.getString("content-type") : null; - } + public void setCssSelectorsStyle(Class s) { + cssselectorstyle = s; + } - public String getContentLanguage() { - return (msgs != null) ? msgs.getString("content-language") : null; - } + public Messages getMsg() { + return msgs; + } - /** - * Searches the properties list for a content-encoding one. If it does not - * exist, searches for output-encoding-name. If it still does not exists, - * the method returns the default utf-8 value - * - * @return the output encoding of this ApplContext - */ - public String getContentEncoding() { - // return (msgs != null) ? msgs.getString("content-encoding") : null; - String res = null; - if (msgs != null) { - res = msgs.getString("content-encoding"); - if (res == null) { - res = msgs.getString("output-encoding-name"); - } - if (res != null) { - // if an encoding has been found, return it - return res; - } - } - // default encoding - return Utf8Properties.ENCODING; - } + public String getContentType() { + return (msgs != null) ? msgs.getString("content-type") : null; + } - public String getLang() { - return lang; - } + public String getContentLanguage() { + return (msgs != null) ? msgs.getString("content-language") : null; + } - public void setCssVersion(String cssversion) { - this.cssversion = cssversion; + /** + * Searches the properties list for a content-encoding one. If it does not + * exist, searches for output-encoding-name. If it still does not exists, + * the method returns the default utf-8 value + * + * @return the output encoding of this ApplContext + */ + public String getContentEncoding() { + // return (msgs != null) ? msgs.getString("content-encoding") : null; + String res = null; + if (msgs != null) { + res = msgs.getString("content-encoding"); + if (res == null) { + res = msgs.getString("output-encoding-name"); + } + if (res != null) { + // if an encoding has been found, return it + return res; + } } + // default encoding + return Utf8Properties.ENCODING; + } + + public String getLang() { + return lang; + } + + public void setCssVersion(String cssversion) { + this.cssversion = cssversion; + } - public String getCssVersion() { - if (cssversion == null) { - cssversion = "css2"; - } - return cssversion; + public String getCssVersion() { + if (cssversion == null) { + cssversion = "css2"; } + return cssversion; + } - public void setProfile(String profile) { - this.profile = profile; - } + public void setProfile(String profile) { + this.profile = profile; + } - public String getProfile() { - if (profile == null) { - return ""; - } - return profile; + public String getProfile() { + if (profile == null) { + return ""; } + return profile; + } - public void setOrigin(int origin) { - this.origin = origin; - } + public void setOrigin(int origin) { + this.origin = origin; + } - public int getOrigin() { - return origin; - } + public int getOrigin() { + return origin; + } - public void setMedium(String medium) { - this.medium = medium; - } + public void setMedium(String medium) { + this.medium = medium; + } - public String getMedium() { - return medium; - } + public String getMedium() { + return medium; + } - public String getInput() { - return input; - } + public String getInput() { + return input; + } - public void setInput(String input) { - this.input = input; - } + public void setInput(String input) { + this.input = input; + } - public String getLink() { - return link; - } + public String getLink() { + return link; + } - public void setLink(String queryString) { - this.link = queryString; - } + public void setLink(String queryString) { + this.link = queryString; + } - /** - * Sets the content encoding to the first charset that appears in - * <i>acceptCharset</i>. If the charset is not supported, the content - * encoding will be utf-8 - * - * @param acceptCharset - * a String representing the Accept-Charset request parameter - */ - public void setContentEncoding(String acceptCharset) { - if (acceptCharset != null) { - // uses some Jigsaw classes to parse the Accept-Charset - // these classes need to load a lot of stuff, so it may be quite - // long the first time - HttpAcceptCharsetList charsetList; - HttpAcceptCharset[] charsets; + /** + * Sets the content encoding to the first charset that appears in + * <i>acceptCharset</i>. If the charset is not supported, the content + * encoding will be utf-8 + * + * @param acceptCharset + * a String representing the Accept-Charset request parameter + */ + public void setContentEncoding(String acceptCharset) { + if (acceptCharset != null) { + // uses some Jigsaw classes to parse the Accept-Charset + // these classes need to load a lot of stuff, so it may be quite + // long the first time + HttpAcceptCharsetList charsetList; + HttpAcceptCharset[] charsets; - charsetList = HttpFactory.parseAcceptCharsetList(acceptCharset); - charsets = (HttpAcceptCharset[]) charsetList.getValue(); + charsetList = HttpFactory.parseAcceptCharsetList(acceptCharset); + charsets = (HttpAcceptCharset[]) charsetList.getValue(); - String encoding = null; - double quality = 0.0; + String encoding = null; + double quality = 0.0; - String biasedcharset = getMsg().getString("output-encoding-name"); + String biasedcharset = getMsg().getString("output-encoding-name"); - for (int i = 0; i < charsets.length && quality < 1.0; i++) { - HttpAcceptCharset charset = charsets[i]; + for (int i = 0; i < charsets.length && quality < 1.0; i++) { + HttpAcceptCharset charset = charsets[i]; - String currentCharset = charset.getCharset(); + String currentCharset = charset.getCharset(); - // checks that the charset is supported by Java + // checks that the charset is supported by Java - if (isCharsetSupported(currentCharset)) { - double currentQuality = charset.getQuality(); + if (isCharsetSupported(currentCharset)) { + double currentQuality = charset.getQuality(); - // we prefer utf-8 - // FIXME (the bias value and the biased charset - // should be dependant on the language) - if ((biasedcharset != null) && !biasedcharset.equalsIgnoreCase(currentCharset)) { - currentQuality = currentQuality * 0.5; - } - if (currentQuality > quality) { - quality = currentQuality; - encoding = charset.getCharset(); - } - } - } - if (encoding != null) { - getMsg().properties.setProperty("content-encoding", encoding); - } else { - // no valid charset - getMsg().properties.remove("content-encoding"); - } - } else { - // no Accept-Charset given - getMsg().properties.remove("content-encoding"); + // we prefer utf-8 + // FIXME (the bias value and the biased charset + // should be dependant on the language) + if ((biasedcharset != null) && !biasedcharset.equalsIgnoreCase(currentCharset)) { + currentQuality = currentQuality * 0.5; + } + if (currentQuality > quality) { + quality = currentQuality; + encoding = charset.getCharset(); + } } + } + if (encoding != null) { + getMsg().properties.setProperty("content-encoding", encoding); + } else { + // no valid charset + getMsg().properties.remove("content-encoding"); + } + } else { + // no Accept-Charset given + getMsg().properties.remove("content-encoding"); } + } - private boolean isCharsetSupported(String charset) { - if ("*".equals(charset)) { - return true; - } - try { - return Charset.isSupported(charset); - } catch (Exception e) { - return false; - } + private boolean isCharsetSupported(String charset) { + if ("*".equals(charset)) { + return true; + } + try { + return Charset.isSupported(charset); + } catch (Exception e) { + return false; } + } + + public void setCharsetForURL(URL url, String charset) { + if (uricharsets == null) { + uricharsets = new HashMap<URL,String>(); + } + uricharsets.put(url, charset); + } + + public String getCharsetForURL(URL url) { + if (uricharsets == null) { + return null; + } + return uricharsets.get(url); + } + } Index: HTTPURL.java =================================================================== RCS file: /sources/public/2002/css-validator/org/w3c/css/util/HTTPURL.java,v retrieving revision 1.20 retrieving revision 1.21 diff -u -d -r1.20 -r1.21 --- HTTPURL.java 4 Feb 2009 15:19:20 -0000 1.20 +++ HTTPURL.java 13 Feb 2009 21:50:16 -0000 1.21 @@ -19,6 +19,11 @@ import java.util.zip.GZIPInputStream; +import org.w3c.www.mime.MimeType; +import org.w3c.www.mime.MimeTypeFormatException; + +import org.apache.velocity.io.UnicodeInputStream; + /** * @version $Revision$ * @author Philippe Le Hegaret @@ -265,22 +270,55 @@ } /* more madness */ - public static InputStream getInputStream(URLConnection uco) throws IOException { + public static InputStream getInputStream(ApplContext ac, URLConnection uco) + throws IOException + { InputStream orig_stream = uco.getInputStream(); + String charset; String encoding; if (orig_stream == null) { return orig_stream; // let it fail elsewhere } encoding = uco.getContentEncoding(); // not set -> return - if (encoding == null) { - return orig_stream; + if (encoding != null) { + if (encoding.equalsIgnoreCase("gzip")) { + orig_stream = new GZIPInputStream(orig_stream); + } } - if (encoding.equalsIgnoreCase("gzip")) { - return new GZIPInputStream(orig_stream); + charset = getCharacterEncoding(ac, uco); + if ((charset == null) || (charset.regionMatches(true, 0, "utf", 0, 3))) { + UnicodeInputStream is = new UnicodeInputStream(orig_stream); + charset = is.getEncodingFromStream(); + if (charset != null) { + ac.setCharsetForURL(uco.getURL(), charset); + } + return is; } return orig_stream; } + + public static String getCharacterEncoding(ApplContext ac, URLConnection uco) { + String charset = ac.getCharsetForURL(uco.getURL()); + if (charset != null) { + return charset; + } + String mtypestr = uco.getContentType(); + if (mtypestr == null) { + return mtypestr; + } + MimeType mt; + try { + mt = new MimeType(mtypestr); + } catch (MimeTypeFormatException mex) { + return null; + } + charset = mt.getParameterValue("charset"); + if (charset != null) { + ac.setCharsetForURL(uco.getURL(), charset); + } + return charset; + } /** * */
Received on Friday, 13 February 2009 21:50:40 UTC