- From: Yves Lafon via cvs-syncmail <cvsmail@w3.org>
- Date: Fri, 13 Feb 2009 21:50:18 +0000
- To: www-validator-cvs@w3.org
Update of /sources/public/2002/css-validator/org/w3c/css/util
In directory hutz:/tmp/cvs-serv11022/org/w3c/css/util
Modified Files:
ApplContext.java HTTPURL.java
Log Message:
multi-byte handling, more comprehensive use of charsets, while trying to preserve BOM support
Index: ApplContext.java
===================================================================
RCS file: /sources/public/2002/css-validator/org/w3c/css/util/ApplContext.java,v
retrieving revision 1.13
retrieving revision 1.14
diff -u -d -r1.13 -r1.14
--- ApplContext.java 13 Aug 2007 08:30:37 -0000 1.13
+++ ApplContext.java 13 Feb 2009 21:50:16 -0000 1.14
@@ -9,6 +9,8 @@
package org.w3c.css.util;
import java.nio.charset.Charset;
+import java.util.HashMap;
+import java.net.URL;
import org.w3c.css.parser.Frame;
import org.w3c.www.http.HttpAcceptCharset;
@@ -21,221 +23,238 @@
*/
public class ApplContext {
- String credential = null;
- String lang;
- Messages msgs;
- Frame frame;
- String cssversion;
- String profile;
- String input;
- Class cssselectorstyle;
- int origin = -1;
- String medium;
- private String link;
- int warningLevel = 0;
-
- /**
- * Creates a new ApplContext
- */
- public ApplContext(String lang) {
- this.lang = lang;
- msgs = new Messages(lang);
- }
+ String credential = null;
+ String lang;
+ Messages msgs;
+ Frame frame;
+ String cssversion;
+ String profile;
+ String input;
+ Class cssselectorstyle;
+ int origin = -1;
+ String medium;
+ private String link;
+ int warningLevel = 0;
- public int getWarningLevel() {
- return warningLevel;
- }
+ private HashMap<URL,String> uricharsets = null;
- public void setWarningLevel(int warningLevel) {
- this.warningLevel = warningLevel;
- }
+ /**
+ * Creates a new ApplContext
+ */
+ public ApplContext(String lang) {
+ this.lang = lang;
+ msgs = new Messages(lang);
+ }
- // as ugly as everything else
- public String getCredential() {
- return credential;
- }
+ public int getWarningLevel() {
+ return warningLevel;
+ }
- public void setCredential(String credential) {
- this.credential = credential;
- }
+ public void setWarningLevel(int warningLevel) {
+ this.warningLevel = warningLevel;
+ }
- public void setFrame(Frame frame) {
- this.frame = frame;
- frame.ac = this;
- }
+ // as ugly as everything else
+ public String getCredential() {
+ return credential;
+ }
- public Frame getFrame() {
- return frame;
- }
+ public void setCredential(String credential) {
+ this.credential = credential;
+ }
- public Class getCssSelectorsStyle() {
- return cssselectorstyle;
- }
+ public void setFrame(Frame frame) {
+ this.frame = frame;
+ frame.ac = this;
+ }
- public void setCssSelectorsStyle(Class s) {
- cssselectorstyle = s;
- }
+ public Frame getFrame() {
+ return frame;
+ }
- public Messages getMsg() {
- return msgs;
- }
+ public Class getCssSelectorsStyle() {
+ return cssselectorstyle;
+ }
- public String getContentType() {
- return (msgs != null) ? msgs.getString("content-type") : null;
- }
+ public void setCssSelectorsStyle(Class s) {
+ cssselectorstyle = s;
+ }
- public String getContentLanguage() {
- return (msgs != null) ? msgs.getString("content-language") : null;
- }
+ public Messages getMsg() {
+ return msgs;
+ }
- /**
- * Searches the properties list for a content-encoding one. If it does not
- * exist, searches for output-encoding-name. If it still does not exists,
- * the method returns the default utf-8 value
- *
- * @return the output encoding of this ApplContext
- */
- public String getContentEncoding() {
- // return (msgs != null) ? msgs.getString("content-encoding") : null;
- String res = null;
- if (msgs != null) {
- res = msgs.getString("content-encoding");
- if (res == null) {
- res = msgs.getString("output-encoding-name");
- }
- if (res != null) {
- // if an encoding has been found, return it
- return res;
- }
- }
- // default encoding
- return Utf8Properties.ENCODING;
- }
+ public String getContentType() {
+ return (msgs != null) ? msgs.getString("content-type") : null;
+ }
- public String getLang() {
- return lang;
- }
+ public String getContentLanguage() {
+ return (msgs != null) ? msgs.getString("content-language") : null;
+ }
- public void setCssVersion(String cssversion) {
- this.cssversion = cssversion;
+ /**
+ * Searches the properties list for a content-encoding one. If it does not
+ * exist, searches for output-encoding-name. If it still does not exists,
+ * the method returns the default utf-8 value
+ *
+ * @return the output encoding of this ApplContext
+ */
+ public String getContentEncoding() {
+ // return (msgs != null) ? msgs.getString("content-encoding") : null;
+ String res = null;
+ if (msgs != null) {
+ res = msgs.getString("content-encoding");
+ if (res == null) {
+ res = msgs.getString("output-encoding-name");
+ }
+ if (res != null) {
+ // if an encoding has been found, return it
+ return res;
+ }
}
+ // default encoding
+ return Utf8Properties.ENCODING;
+ }
+
+ public String getLang() {
+ return lang;
+ }
+
+ public void setCssVersion(String cssversion) {
+ this.cssversion = cssversion;
+ }
- public String getCssVersion() {
- if (cssversion == null) {
- cssversion = "css2";
- }
- return cssversion;
+ public String getCssVersion() {
+ if (cssversion == null) {
+ cssversion = "css2";
}
+ return cssversion;
+ }
- public void setProfile(String profile) {
- this.profile = profile;
- }
+ public void setProfile(String profile) {
+ this.profile = profile;
+ }
- public String getProfile() {
- if (profile == null) {
- return "";
- }
- return profile;
+ public String getProfile() {
+ if (profile == null) {
+ return "";
}
+ return profile;
+ }
- public void setOrigin(int origin) {
- this.origin = origin;
- }
+ public void setOrigin(int origin) {
+ this.origin = origin;
+ }
- public int getOrigin() {
- return origin;
- }
+ public int getOrigin() {
+ return origin;
+ }
- public void setMedium(String medium) {
- this.medium = medium;
- }
+ public void setMedium(String medium) {
+ this.medium = medium;
+ }
- public String getMedium() {
- return medium;
- }
+ public String getMedium() {
+ return medium;
+ }
- public String getInput() {
- return input;
- }
+ public String getInput() {
+ return input;
+ }
- public void setInput(String input) {
- this.input = input;
- }
+ public void setInput(String input) {
+ this.input = input;
+ }
- public String getLink() {
- return link;
- }
+ public String getLink() {
+ return link;
+ }
- public void setLink(String queryString) {
- this.link = queryString;
- }
+ public void setLink(String queryString) {
+ this.link = queryString;
+ }
- /**
- * Sets the content encoding to the first charset that appears in
- * <i>acceptCharset</i>. If the charset is not supported, the content
- * encoding will be utf-8
- *
- * @param acceptCharset
- * a String representing the Accept-Charset request parameter
- */
- public void setContentEncoding(String acceptCharset) {
- if (acceptCharset != null) {
- // uses some Jigsaw classes to parse the Accept-Charset
- // these classes need to load a lot of stuff, so it may be quite
- // long the first time
- HttpAcceptCharsetList charsetList;
- HttpAcceptCharset[] charsets;
+ /**
+ * Sets the content encoding to the first charset that appears in
+ * <i>acceptCharset</i>. If the charset is not supported, the content
+ * encoding will be utf-8
+ *
+ * @param acceptCharset
+ * a String representing the Accept-Charset request parameter
+ */
+ public void setContentEncoding(String acceptCharset) {
+ if (acceptCharset != null) {
+ // uses some Jigsaw classes to parse the Accept-Charset
+ // these classes need to load a lot of stuff, so it may be quite
+ // long the first time
+ HttpAcceptCharsetList charsetList;
+ HttpAcceptCharset[] charsets;
- charsetList = HttpFactory.parseAcceptCharsetList(acceptCharset);
- charsets = (HttpAcceptCharset[]) charsetList.getValue();
+ charsetList = HttpFactory.parseAcceptCharsetList(acceptCharset);
+ charsets = (HttpAcceptCharset[]) charsetList.getValue();
- String encoding = null;
- double quality = 0.0;
+ String encoding = null;
+ double quality = 0.0;
- String biasedcharset = getMsg().getString("output-encoding-name");
+ String biasedcharset = getMsg().getString("output-encoding-name");
- for (int i = 0; i < charsets.length && quality < 1.0; i++) {
- HttpAcceptCharset charset = charsets[i];
+ for (int i = 0; i < charsets.length && quality < 1.0; i++) {
+ HttpAcceptCharset charset = charsets[i];
- String currentCharset = charset.getCharset();
+ String currentCharset = charset.getCharset();
- // checks that the charset is supported by Java
+ // checks that the charset is supported by Java
- if (isCharsetSupported(currentCharset)) {
- double currentQuality = charset.getQuality();
+ if (isCharsetSupported(currentCharset)) {
+ double currentQuality = charset.getQuality();
- // we prefer utf-8
- // FIXME (the bias value and the biased charset
- // should be dependant on the language)
- if ((biasedcharset != null) && !biasedcharset.equalsIgnoreCase(currentCharset)) {
- currentQuality = currentQuality * 0.5;
- }
- if (currentQuality > quality) {
- quality = currentQuality;
- encoding = charset.getCharset();
- }
- }
- }
- if (encoding != null) {
- getMsg().properties.setProperty("content-encoding", encoding);
- } else {
- // no valid charset
- getMsg().properties.remove("content-encoding");
- }
- } else {
- // no Accept-Charset given
- getMsg().properties.remove("content-encoding");
+ // we prefer utf-8
+ // FIXME (the bias value and the biased charset
+ // should be dependant on the language)
+ if ((biasedcharset != null) && !biasedcharset.equalsIgnoreCase(currentCharset)) {
+ currentQuality = currentQuality * 0.5;
+ }
+ if (currentQuality > quality) {
+ quality = currentQuality;
+ encoding = charset.getCharset();
+ }
}
+ }
+ if (encoding != null) {
+ getMsg().properties.setProperty("content-encoding", encoding);
+ } else {
+ // no valid charset
+ getMsg().properties.remove("content-encoding");
+ }
+ } else {
+ // no Accept-Charset given
+ getMsg().properties.remove("content-encoding");
}
+ }
- private boolean isCharsetSupported(String charset) {
- if ("*".equals(charset)) {
- return true;
- }
- try {
- return Charset.isSupported(charset);
- } catch (Exception e) {
- return false;
- }
+ private boolean isCharsetSupported(String charset) {
+ if ("*".equals(charset)) {
+ return true;
+ }
+ try {
+ return Charset.isSupported(charset);
+ } catch (Exception e) {
+ return false;
}
+ }
+
+ public void setCharsetForURL(URL url, String charset) {
+ if (uricharsets == null) {
+ uricharsets = new HashMap<URL,String>();
+ }
+ uricharsets.put(url, charset);
+ }
+
+ public String getCharsetForURL(URL url) {
+ if (uricharsets == null) {
+ return null;
+ }
+ return uricharsets.get(url);
+ }
+
}
Index: HTTPURL.java
===================================================================
RCS file: /sources/public/2002/css-validator/org/w3c/css/util/HTTPURL.java,v
retrieving revision 1.20
retrieving revision 1.21
diff -u -d -r1.20 -r1.21
--- HTTPURL.java 4 Feb 2009 15:19:20 -0000 1.20
+++ HTTPURL.java 13 Feb 2009 21:50:16 -0000 1.21
@@ -19,6 +19,11 @@
import java.util.zip.GZIPInputStream;
+import org.w3c.www.mime.MimeType;
+import org.w3c.www.mime.MimeTypeFormatException;
+
+import org.apache.velocity.io.UnicodeInputStream;
+
/**
* @version $Revision$
* @author Philippe Le Hegaret
@@ -265,22 +270,55 @@
}
/* more madness */
- public static InputStream getInputStream(URLConnection uco) throws IOException {
+ public static InputStream getInputStream(ApplContext ac, URLConnection uco)
+ throws IOException
+ {
InputStream orig_stream = uco.getInputStream();
+ String charset;
String encoding;
if (orig_stream == null) {
return orig_stream; // let it fail elsewhere
}
encoding = uco.getContentEncoding();
// not set -> return
- if (encoding == null) {
- return orig_stream;
+ if (encoding != null) {
+ if (encoding.equalsIgnoreCase("gzip")) {
+ orig_stream = new GZIPInputStream(orig_stream);
+ }
}
- if (encoding.equalsIgnoreCase("gzip")) {
- return new GZIPInputStream(orig_stream);
+ charset = getCharacterEncoding(ac, uco);
+ if ((charset == null) || (charset.regionMatches(true, 0, "utf", 0, 3))) {
+ UnicodeInputStream is = new UnicodeInputStream(orig_stream);
+ charset = is.getEncodingFromStream();
+ if (charset != null) {
+ ac.setCharsetForURL(uco.getURL(), charset);
+ }
+ return is;
}
return orig_stream;
}
+
+ public static String getCharacterEncoding(ApplContext ac, URLConnection uco) {
+ String charset = ac.getCharsetForURL(uco.getURL());
+ if (charset != null) {
+ return charset;
+ }
+ String mtypestr = uco.getContentType();
+ if (mtypestr == null) {
+ return mtypestr;
+ }
+ MimeType mt;
+ try {
+ mt = new MimeType(mtypestr);
+ } catch (MimeTypeFormatException mex) {
+ return null;
+ }
+ charset = mt.getParameterValue("charset");
+ if (charset != null) {
+ ac.setCharsetForURL(uco.getURL(), charset);
+ }
+ return charset;
+ }
/**
*
*/
Received on Friday, 13 February 2009 21:50:40 UTC