W3C home > Mailing lists > Public > www-validator-cvs@w3.org > February 2009

2002/css-validator/org/w3c/css/util ApplContext.java,1.13,1.14 HTTPURL.java,1.20,1.21

From: Yves Lafon via cvs-syncmail <cvsmail@w3.org>
Date: Fri, 13 Feb 2009 21:50:18 +0000
To: www-validator-cvs@w3.org
Message-Id: <E1LY5vO-0002ts-Fj@lionel-hutz.w3.org>

Update of /sources/public/2002/css-validator/org/w3c/css/util
In directory hutz:/tmp/cvs-serv11022/org/w3c/css/util

Modified Files:
	ApplContext.java HTTPURL.java 
Log Message:
multi-byte handling, more comprehensive use of charsets, while trying to preserve BOM support

Index: ApplContext.java
===================================================================
RCS file: /sources/public/2002/css-validator/org/w3c/css/util/ApplContext.java,v
retrieving revision 1.13
retrieving revision 1.14
diff -u -d -r1.13 -r1.14
--- ApplContext.java	13 Aug 2007 08:30:37 -0000	1.13
+++ ApplContext.java	13 Feb 2009 21:50:16 -0000	1.14
@@ -9,6 +9,8 @@
 package org.w3c.css.util;
 
 import java.nio.charset.Charset;
+import java.util.HashMap;
+import java.net.URL;
 
 import org.w3c.css.parser.Frame;
 import org.w3c.www.http.HttpAcceptCharset;
@@ -21,221 +23,238 @@
  */
 public class ApplContext {
 
-	String credential = null;
-	String lang;
-	Messages msgs;
-	Frame frame;
-	String cssversion;
-	String profile;
-	String input;
-	Class cssselectorstyle;
-	int origin = -1;
-	String medium;
-	private String link;
-	int warningLevel = 0;
-
-	/**
-	 * Creates a new ApplContext
-	 */
-	public ApplContext(String lang) {
-		this.lang = lang;
-		msgs = new Messages(lang);
-	}
+    String credential = null;
+    String lang;
+    Messages msgs;
+    Frame frame;
+    String cssversion;
+    String profile;
+    String input;
+    Class cssselectorstyle;
+    int origin = -1;
+    String medium;
+    private String link;
+    int warningLevel = 0;
 
-	public int getWarningLevel() {
-		return warningLevel;
-	}
+    private HashMap<URL,String> uricharsets = null;
 
-	public void setWarningLevel(int warningLevel) {
-		this.warningLevel = warningLevel;
-	}
+    /**
+     * Creates a new ApplContext
+     */
+    public ApplContext(String lang) {
+	this.lang = lang;
+	msgs = new Messages(lang);
+    }
 
-	// as ugly as everything else
-	public String getCredential() {
-		return credential;
-	}
+    public int getWarningLevel() {
+	return warningLevel;
+    }
 
-	public void setCredential(String credential) {
-		this.credential = credential;
-	}
+    public void setWarningLevel(int warningLevel) {
+	this.warningLevel = warningLevel;
+    }
 
-	public void setFrame(Frame frame) {
-		this.frame = frame;
-		frame.ac = this;
-	}
+    // as ugly as everything else
+    public String getCredential() {
+	return credential;
+    }
 
-	public Frame getFrame() {
-		return frame;
-	}
+    public void setCredential(String credential) {
+	this.credential = credential;
+    }
 
-	public Class getCssSelectorsStyle() {
-		return cssselectorstyle;
-	}
+    public void setFrame(Frame frame) {
+	this.frame = frame;
+	frame.ac = this;
+    }
 
-	public void setCssSelectorsStyle(Class s) {
-		cssselectorstyle = s;
-	}
+    public Frame getFrame() {
+	return frame;
+    }
 
-	public Messages getMsg() {
-		return msgs;
-	}
+    public Class getCssSelectorsStyle() {
+	return cssselectorstyle;
+    }
 
-	public String getContentType() {
-		return (msgs != null) ? msgs.getString("content-type") : null;
-	}
+    public void setCssSelectorsStyle(Class s) {
+	cssselectorstyle = s;
+    }
 
-	public String getContentLanguage() {
-		return (msgs != null) ? msgs.getString("content-language") : null;
-	}
+    public Messages getMsg() {
+	return msgs;
+    }
 
-	/**
-	 * Searches the properties list for a content-encoding one. If it does not
-	 * exist, searches for output-encoding-name. If it still does not exists,
-	 * the method returns the default utf-8 value
-	 * 
-	 * @return the output encoding of this ApplContext
-	 */
-	public String getContentEncoding() {
-		// return (msgs != null) ? msgs.getString("content-encoding") : null;
-		String res = null;
-		if (msgs != null) {
-			res = msgs.getString("content-encoding");
-			if (res == null) {
-				res = msgs.getString("output-encoding-name");
-			}
-			if (res != null) {
-				// if an encoding has been found, return it
-				return res;
-			}
-		}
-		// default encoding
-		return Utf8Properties.ENCODING;
-	}
+    public String getContentType() {
+	return (msgs != null) ? msgs.getString("content-type") : null;
+    }
 
-	public String getLang() {
-		return lang;
-	}
+    public String getContentLanguage() {
+	return (msgs != null) ? msgs.getString("content-language") : null;
+    }
 
-	public void setCssVersion(String cssversion) {
-		this.cssversion = cssversion;
+    /**
+     * Searches the properties list for a content-encoding one. If it does not
+     * exist, searches for output-encoding-name. If it still does not exists,
+     * the method returns the default utf-8 value
+     * 
+     * @return the output encoding of this ApplContext
+     */
+    public String getContentEncoding() {
+	// return (msgs != null) ? msgs.getString("content-encoding") : null;
+	String res = null;
+	if (msgs != null) {
+	    res = msgs.getString("content-encoding");
+	    if (res == null) {
+		res = msgs.getString("output-encoding-name");
+	    }
+	    if (res != null) {
+		// if an encoding has been found, return it
+		return res;
+	    }
 	}
+	// default encoding
+	return Utf8Properties.ENCODING;
+    }
+    
+    public String getLang() {
+	return lang;
+    }
+    
+    public void setCssVersion(String cssversion) {
+	this.cssversion = cssversion;
+    }
 
-	public String getCssVersion() {
-		if (cssversion == null) {
-			cssversion = "css2";
-		}
-		return cssversion;
+    public String getCssVersion() {
+	if (cssversion == null) {
+	    cssversion = "css2";
 	}
+	return cssversion;
+    }
 
-	public void setProfile(String profile) {
-		this.profile = profile;
-	}
+    public void setProfile(String profile) {
+	this.profile = profile;
+    }
 
-	public String getProfile() {
-		if (profile == null) {
-			return "";
-		}
-		return profile;
+    public String getProfile() {
+	if (profile == null) {
+	    return "";
 	}
+	return profile;
+    }
 
-	public void setOrigin(int origin) {
-		this.origin = origin;
-	}
+    public void setOrigin(int origin) {
+	this.origin = origin;
+    }
 
-	public int getOrigin() {
-		return origin;
-	}
+    public int getOrigin() {
+	return origin;
+    }
 
-	public void setMedium(String medium) {
-		this.medium = medium;
-	}
+    public void setMedium(String medium) {
+	this.medium = medium;
+    }
 
-	public String getMedium() {
-		return medium;
-	}
+    public String getMedium() {
+	return medium;
+    }
 
-	public String getInput() {
-		return input;
-	}
+    public String getInput() {
+	return input;
+    }
 
-	public void setInput(String input) {
-		this.input = input;
-	}
+    public void setInput(String input) {
+	this.input = input;
+    }
 	
-	public String getLink() {
-		return link;
-	}
+    public String getLink() {
+	return link;
+    }
 
-	public void setLink(String queryString) {
-		this.link = queryString;
-	}
+    public void setLink(String queryString) {
+	this.link = queryString;
+    }
 
-	/**
-	 * Sets the content encoding to the first charset that appears in
-	 * <i>acceptCharset</i>. If the charset is not supported, the content
-	 * encoding will be utf-8
-	 * 
-	 * @param acceptCharset
-	 *            a String representing the Accept-Charset request parameter
-	 */
-	public void setContentEncoding(String acceptCharset) {
-		if (acceptCharset != null) {
-			// uses some Jigsaw classes to parse the Accept-Charset
-			// these classes need to load a lot of stuff, so it may be quite
-			// long the first time
-			HttpAcceptCharsetList charsetList;
-			HttpAcceptCharset[] charsets;
+    /**
+     * Sets the content encoding to the first charset that appears in
+     * <i>acceptCharset</i>. If the charset is not supported, the content
+     * encoding will be utf-8
+     * 
+     * @param acceptCharset
+     *            a String representing the Accept-Charset request parameter
+     */
+    public void setContentEncoding(String acceptCharset) {
+	if (acceptCharset != null) {
+	    // uses some Jigsaw classes to parse the Accept-Charset
+	    // these classes need to load a lot of stuff, so it may be quite
+	    // long the first time
+	    HttpAcceptCharsetList charsetList;
+	    HttpAcceptCharset[] charsets;
 
-			charsetList = HttpFactory.parseAcceptCharsetList(acceptCharset);
-			charsets = (HttpAcceptCharset[]) charsetList.getValue();
+	    charsetList = HttpFactory.parseAcceptCharsetList(acceptCharset);
+	    charsets = (HttpAcceptCharset[]) charsetList.getValue();
 
-			String encoding = null;
-			double quality = 0.0;
+	    String encoding = null;
+	    double quality = 0.0;
 
-			String biasedcharset = getMsg().getString("output-encoding-name");
+	    String biasedcharset = getMsg().getString("output-encoding-name");
 
-			for (int i = 0; i < charsets.length && quality < 1.0; i++) {
-				HttpAcceptCharset charset = charsets[i];
+	    for (int i = 0; i < charsets.length && quality < 1.0; i++) {
+		HttpAcceptCharset charset = charsets[i];
 
-				String currentCharset = charset.getCharset();
+		String currentCharset = charset.getCharset();
 
-				// checks that the charset is supported by Java
+		// checks that the charset is supported by Java
 
-				if (isCharsetSupported(currentCharset)) {
-					double currentQuality = charset.getQuality();
+		if (isCharsetSupported(currentCharset)) {
+		    double currentQuality = charset.getQuality();
 
-					// we prefer utf-8
-					// FIXME (the bias value and the biased charset
-					// should be dependant on the language)
-					if ((biasedcharset != null) && !biasedcharset.equalsIgnoreCase(currentCharset)) {
-						currentQuality = currentQuality * 0.5;
-					}
-					if (currentQuality > quality) {
-						quality = currentQuality;
-						encoding = charset.getCharset();
-					}
-				}
-			}
-			if (encoding != null) {
-				getMsg().properties.setProperty("content-encoding", encoding);
-			} else {
-				// no valid charset
-				getMsg().properties.remove("content-encoding");
-			}
-		} else {
-			// no Accept-Charset given
-			getMsg().properties.remove("content-encoding");
+		    // we prefer utf-8
+		    // FIXME (the bias value and the biased charset
+		    // should be dependant on the language)
+		    if ((biasedcharset != null) && !biasedcharset.equalsIgnoreCase(currentCharset)) {
+			currentQuality = currentQuality * 0.5;
+		    }
+		    if (currentQuality > quality) {
+			quality = currentQuality;
+			encoding = charset.getCharset();
+		    }
 		}
+	    }
+	    if (encoding != null) {
+		getMsg().properties.setProperty("content-encoding", encoding);
+	    } else {
+		// no valid charset
+		getMsg().properties.remove("content-encoding");
+	    }
+	} else {
+	    // no Accept-Charset given
+	    getMsg().properties.remove("content-encoding");
 	}
+    }
 
-	private boolean isCharsetSupported(String charset) {
-		if ("*".equals(charset)) {
-			return true;
-		}
-		try {
-			return Charset.isSupported(charset);
-		} catch (Exception e) {
-			return false;
-		}
+    private boolean isCharsetSupported(String charset) {
+	if ("*".equals(charset)) {
+	    return true;
+	}
+	try {
+	    return Charset.isSupported(charset);
+	} catch (Exception e) {
+	    return false;
 	}
+    }
+
+    public void setCharsetForURL(URL url, String charset) {
+	if (uricharsets == null) {
+	    uricharsets = new HashMap<URL,String>();
+	}
+	uricharsets.put(url, charset);
+    }
+    
+    public String getCharsetForURL(URL url) {
+	if (uricharsets == null) {
+	    return null;
+	}
+	return uricharsets.get(url);
+    }
+    
 }

Index: HTTPURL.java
===================================================================
RCS file: /sources/public/2002/css-validator/org/w3c/css/util/HTTPURL.java,v
retrieving revision 1.20
retrieving revision 1.21
diff -u -d -r1.20 -r1.21
--- HTTPURL.java	4 Feb 2009 15:19:20 -0000	1.20
+++ HTTPURL.java	13 Feb 2009 21:50:16 -0000	1.21
@@ -19,6 +19,11 @@
 
 import java.util.zip.GZIPInputStream;
 
+import org.w3c.www.mime.MimeType;
+import org.w3c.www.mime.MimeTypeFormatException;
+
+import org.apache.velocity.io.UnicodeInputStream;
+
 /**
  * @version $Revision$
  * @author  Philippe Le Hegaret
@@ -265,22 +270,55 @@
     }
 
     /* more madness */
-    public static InputStream getInputStream(URLConnection uco) throws IOException {
+    public static InputStream getInputStream(ApplContext ac, URLConnection uco) 
+	throws IOException 
+    {
 	InputStream orig_stream = uco.getInputStream();
+	String charset;
 	String encoding;
 	if (orig_stream == null) {
 	    return orig_stream; // let it fail elsewhere
 	}
 	encoding = uco.getContentEncoding();
 	// not set -> return
-	if (encoding == null) {
-	    return orig_stream;
+	if (encoding != null) {
+	    if (encoding.equalsIgnoreCase("gzip")) {
+		orig_stream = new GZIPInputStream(orig_stream);
+	    }
 	}
-	if (encoding.equalsIgnoreCase("gzip")) {
-	    return new GZIPInputStream(orig_stream);
+	charset = getCharacterEncoding(ac, uco);
+	if ((charset == null) || (charset.regionMatches(true, 0, "utf", 0, 3))) {
+	    UnicodeInputStream is = new UnicodeInputStream(orig_stream);
+	    charset =  is.getEncodingFromStream();
+	    if (charset != null) {
+		ac.setCharsetForURL(uco.getURL(), charset);
+	    }
+	    return is;
 	}
 	return orig_stream;
     }
+
+    public static String getCharacterEncoding(ApplContext ac, URLConnection uco) {
+	String charset = ac.getCharsetForURL(uco.getURL());
+	if (charset != null) {
+	    return charset;
+	}
+	String mtypestr = uco.getContentType();
+	if (mtypestr == null) {
+	    return mtypestr;
+	}
+	MimeType mt;
+	try { 
+	    mt = new MimeType(mtypestr);
+	} catch (MimeTypeFormatException mex) {
+	    return null;
+	}
+        charset =  mt.getParameterValue("charset");
+	if (charset != null) {
+	    ac.setCharsetForURL(uco.getURL(), charset);
+	}
+	return charset;
+    }
     /**
      *
      */
Received on Friday, 13 February 2009 21:50:40 GMT

This archive was generated by hypermail 2.2.0+W3C-0.50 : Thursday, 26 April 2012 12:55:08 GMT