XHR content-type rewriting


while trying to fix a bug I discovered a weird workaround in Mozilla 

The summary is:

1) when sending text using send(""), all browsers encode in UTF-8

2) the caller may have set the content-type header field before

3) if this was the case, the charset, if present, needs to be adjusted 

4) due to broken content (GWT), Mozilla tries to preserve the case of 
the charset name, if it was the "right" one (so if the caller set 
'UtF-8', that's what get's onto the wire). Apparently this was added 
because some servers didn't handle charset names properly.

So I wrote some tests to compare FF's behavior with other UAs.


- all UAs use the UTF-8 encoding for the payload

- Opera and IE do not rewrite the type; so if the caller sets the wrong 
charset, this is what is sent to the server

- Chrome, Safari and FF try to fix the charset param. All of them 
preserve the syntax (quoted-string vs token) and also handle single 
quotes incorrectly.

- Finally, only Firefox attempts to preserve the casing of the charset 
param - this may indicate that the workaround added for the 
aforementioned bug isn't needed anymore.

Test code:

import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.net.InetSocketAddress;
import java.util.List;

import com.sun.net.httpserver.HttpExchange;
import com.sun.net.httpserver.HttpHandler;
import com.sun.net.httpserver.HttpServer;

public class XHRContentTypeRewriting {

	public static void main(String[] args) throws IOException {

		HttpServer server = HttpServer.create(new InetSocketAddress(8080), 0);

		server.createContext("/start", new ServeHtml());
		server.createContext("/report", new Report());


	private static class ServeHtml implements HttpHandler {

		public void handle(HttpExchange h) throws IOException {
			String response = "<html><head><title>XHR Content-Type Rewriting 
					+ "<script>"
					+ "function post(type) {"
					+ "  var req = new XMLHttpRequest();\n"
					+ "  req.open ('POST', '/report', false);"
					+ "  req.setRequestHeader('Content-Type', type);\n"
					+ "  req.setRequestHeader('X-Test', type);\n"
					+ "  req.send('pound: \\u00a3');\n"
					+ "}\n"
					+ "function run() {\n"
					+ "  post('text/plain');\n"
					+ "  post('text/plain; charset=foo');\n"
					+ "  post('text/plain; charset=Iso-8859-1');\n"
					+ "  post('text/plain; charset=Utf-8');\n"
					+ "  post('text/plain; charset=\\'foo\\'');\n"
					+ "  post('text/plain; charset=\\'Iso-8859-1\\'');\n"
					+ "  post('text/plain; charset=\\'Utf-8\\'');\n"
					+ "  post('text/plain; charset=\"foo\"');\n"
					+ "  post('text/plain; charset=\"Iso-8859-1\"');\n"
					+ "  post('text/plain; charset=\"Utf-8\"');\n"
					+ "  post('text/plain; foo=\\'; charset=UTF-8');\n"
					+ "  post('text/plain; format=flowed; charset=ISO-8859-1');\n"
					+ "  post('text/plain; charset=ISO-8859-1; format=flowed');\n"
					+ "}\n"
					+ "</script>"
					+ "</head><body onload='run();'>"
					+ "</body></html>";

					"text/html; charset=UTF-8");
			h.sendResponseHeaders(200, response.getBytes().length);
			OutputStream os = h.getResponseBody();

	private static class Report implements HttpHandler {

		public void handle(HttpExchange h) throws IOException {
			List<String> ua = h.getRequestHeaders().get("User-Agent");
			List<String> ct = h.getRequestHeaders().get("Content-Type");
			List<String> xt = h.getRequestHeaders().get("X-Test");

			InputStream is = h.getRequestBody();
			int r;
			StringBuilder payload = new StringBuilder();

			do {
				r = is.read();
				if (r >= 0)
					payload.append(String.format("%02x ", r));
			} while (r >= 0);

			String response = "User-Agent: " + ua + "\n" + "  intended: " + xt
					+ "\n" + "  received: " + ct + "\n" + "   payload: "
					+ payload.toString() + "\n";


					"text/plain; charset=UTF-8");
			h.sendResponseHeaders(200, response.getBytes().length);
			OutputStream os = h.getResponseBody();

Results, with comments added:

User-Agent: [Mozilla/5.0 (Windows NT 6.1; WOW64; rv:8.0) Gecko/20100101 

   intended: [text/plain]
   received: [text/plain; charset=UTF-8]
    payload: 70 6f 75 6e 64 3a 20 c2 a3

   intended: [text/plain; charset=foo]
   received: [text/plain; charset=UTF-8]
    payload: 70 6f 75 6e 64 3a 20 c2 a3

   intended: [text/plain; charset=Iso-8859-1]
   received: [text/plain; charset=UTF-8]
    payload: 70 6f 75 6e 64 3a 20 c2 a3

   intended: [text/plain; charset=Utf-8]
   received: [text/plain; charset=Utf-8]
    payload: 70 6f 75 6e 64 3a 20 c2 a3

   intended: [text/plain; charset='foo']
   received: [text/plain; charset=UTF-8]
    payload: 70 6f 75 6e 64 3a 20 c2 a3

   intended: [text/plain; charset='Iso-8859-1']
   received: [text/plain; charset=UTF-8]
    payload: 70 6f 75 6e 64 3a 20 c2 a3

   intended: [text/plain; charset='Utf-8']
   received: [text/plain; charset='Utf-8']
    payload: 70 6f 75 6e 64 3a 20 c2 a3

   intended: [text/plain; charset="foo"]
   received: [text/plain; charset=UTF-8]
    payload: 70 6f 75 6e 64 3a 20 c2 a3

   intended: [text/plain; charset="Iso-8859-1"]
   received: [text/plain; charset=UTF-8]
    payload: 70 6f 75 6e 64 3a 20 c2 a3

   intended: [text/plain; charset="Utf-8"]
   received: [text/plain; charset="Utf-8"]
    payload: 70 6f 75 6e 64 3a 20 c2 a3

   intended: [text/plain; foo='; charset=UTF-8]
   received: [text/plain; charset=UTF-8; foo='; charset=UTF-8]
    payload: 70 6f 75 6e 64 3a 20 c2 a3
# confused by single quote in preceding param

   intended: [text/plain; format=flowed; charset=ISO-8859-1]
   received: [text/plain; format=flowed; charset=UTF-8]
    payload: 70 6f 75 6e 64 3a 20 c2 a3

   intended: [text/plain; charset=ISO-8859-1; format=flowed]
   received: [text/plain; charset=UTF-8; format=flowed]
    payload: 70 6f 75 6e 64 3a 20 c2 a3

User-Agent: [Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; 
# doesn't touch the type, thus sends inconsistent charset information

   intended: [text/plain]
   received: [text/plain]
    payload: 70 6f 75 6e 64 3a 20 c2 a3

   intended: [text/plain; charset=foo]
   received: [text/plain; charset=foo]
    payload: 70 6f 75 6e 64 3a 20 c2 a3

   intended: [text/plain; charset=Iso-8859-1]
   received: [text/plain; charset=Iso-8859-1]
    payload: 70 6f 75 6e 64 3a 20 c2 a3

   intended: [text/plain; charset=Utf-8]
   received: [text/plain; charset=Utf-8]
    payload: 70 6f 75 6e 64 3a 20 c2 a3

   intended: [text/plain; charset='foo']
   received: [text/plain; charset='foo']
    payload: 70 6f 75 6e 64 3a 20 c2 a3

   intended: [text/plain; charset='Iso-8859-1']
   received: [text/plain; charset='Iso-8859-1']
    payload: 70 6f 75 6e 64 3a 20 c2 a3

   intended: [text/plain; charset='Utf-8']
   received: [text/plain; charset='Utf-8']
    payload: 70 6f 75 6e 64 3a 20 c2 a3

   intended: [text/plain; charset="foo"]
   received: [text/plain; charset="foo"]
    payload: 70 6f 75 6e 64 3a 20 c2 a3

   intended: [text/plain; charset="Iso-8859-1"]
   received: [text/plain; charset="Iso-8859-1"]
    payload: 70 6f 75 6e 64 3a 20 c2 a3

   intended: [text/plain; charset="Utf-8"]
   received: [text/plain; charset="Utf-8"]
    payload: 70 6f 75 6e 64 3a 20 c2 a3

   intended: [text/plain; foo='; charset=UTF-8]
   received: [text/plain; foo='; charset=UTF-8]
    payload: 70 6f 75 6e 64 3a 20 c2 a3

   intended: [text/plain; format=flowed; charset=ISO-8859-1]
   received: [text/plain; format=flowed; charset=ISO-8859-1]
    payload: 70 6f 75 6e 64 3a 20 c2 a3

   intended: [text/plain; charset=ISO-8859-1; format=flowed]
   received: [text/plain; charset=ISO-8859-1; format=flowed]
    payload: 70 6f 75 6e 64 3a 20 c2 a3

User-Agent: [Opera/9.80 (Windows NT 6.1; U; en) Presto/2.9.168 
# doesn't touch the type, thus sends inconsistent charset information

   intended: [text/plain]
   received: [text/plain]
    payload: 70 6f 75 6e 64 3a 20 c2 a3

   intended: [text/plain; charset=foo]
   received: [text/plain; charset=foo]
    payload: 70 6f 75 6e 64 3a 20 c2 a3

   intended: [text/plain; charset=Iso-8859-1]
   received: [text/plain; charset=Iso-8859-1]
    payload: 70 6f 75 6e 64 3a 20 c2 a3

   intended: [text/plain; charset=Utf-8]
   received: [text/plain; charset=Utf-8]
    payload: 70 6f 75 6e 64 3a 20 c2 a3

   intended: [text/plain; charset='foo']
   received: [text/plain; charset='foo']
    payload: 70 6f 75 6e 64 3a 20 c2 a3

   intended: [text/plain; charset='Iso-8859-1']
   received: [text/plain; charset='Iso-8859-1']
    payload: 70 6f 75 6e 64 3a 20 c2 a3

   intended: [text/plain; charset='Utf-8']
   received: [text/plain; charset='Utf-8']
    payload: 70 6f 75 6e 64 3a 20 c2 a3

   intended: [text/plain; charset="foo"]
   received: [text/plain; charset="foo"]
    payload: 70 6f 75 6e 64 3a 20 c2 a3

   intended: [text/plain; charset="Iso-8859-1"]
   received: [text/plain; charset="Iso-8859-1"]
    payload: 70 6f 75 6e 64 3a 20 c2 a3

   intended: [text/plain; charset="Utf-8"]
   received: [text/plain; charset="Utf-8"]
    payload: 70 6f 75 6e 64 3a 20 c2 a3

   intended: [text/plain; foo='; charset=UTF-8]
   received: [text/plain; foo='; charset=UTF-8]
    payload: 70 6f 75 6e 64 3a 20 c2 a3

   intended: [text/plain; format=flowed; charset=ISO-8859-1]
   received: [text/plain; format=flowed; charset=ISO-8859-1]
    payload: 70 6f 75 6e 64 3a 20 c2 a3

   intended: [text/plain; charset=ISO-8859-1; format=flowed]
   received: [text/plain; charset=ISO-8859-1; format=flowed]
    payload: 70 6f 75 6e 64 3a 20 c2 a3

User-Agent: [Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/534.51.22 
(KHTML, like Gecko) Version/5.1.1 Safari/534.51.22]

   intended: [text/plain]
   received: [text/plain]
    payload: 70 6f 75 6e 64 3a 20 c2 a3
# charset missing

   intended: [text/plain; charset=foo]
   received: [text/plain; charset=UTF-8]
    payload: 70 6f 75 6e 64 3a 20 c2 a3

   intended: [text/plain; charset=Iso-8859-1]
   received: [text/plain; charset=UTF-8]
    payload: 70 6f 75 6e 64 3a 20 c2 a3

   intended: [text/plain; charset=Utf-8]
   received: [text/plain; charset=UTF-8]
    payload: 70 6f 75 6e 64 3a 20 c2 a3

   intended: [text/plain; charset='foo']
   received: [text/plain; charset='UTF-8']
    payload: 70 6f 75 6e 64 3a 20 c2 a3
# broken single quotes preserved, charset rewritten

   intended: [text/plain; charset='Iso-8859-1']
   received: [text/plain; charset='UTF-8']
    payload: 70 6f 75 6e 64 3a 20 c2 a3
# broken single quotes preserved, charset rewritten

   intended: [text/plain; charset='Utf-8']
   received: [text/plain; charset='UTF-8']
    payload: 70 6f 75 6e 64 3a 20 c2 a3
# broken single quotes preserved, charset rewritten

   intended: [text/plain; charset="foo"]
   received: [text/plain; charset="UTF-8"]
    payload: 70 6f 75 6e 64 3a 20 c2 a3

   intended: [text/plain; charset="Iso-8859-1"]
   received: [text/plain; charset="UTF-8"]
    payload: 70 6f 75 6e 64 3a 20 c2 a3

   intended: [text/plain; charset="Utf-8"]
   received: [text/plain; charset="UTF-8"]
    payload: 70 6f 75 6e 64 3a 20 c2 a3

   intended: [text/plain; foo='; charset=UTF-8]
   received: [text/plain; foo='; charset=UTF-8]
    payload: 70 6f 75 6e 64 3a 20 c2 a3

   intended: [text/plain; format=flowed; charset=ISO-8859-1]
   received: [text/plain; format=flowed; charset=UTF-8]
    payload: 70 6f 75 6e 64 3a 20 c2 a3

   intended: [text/plain; charset=ISO-8859-1; format=flowed]
   received: [text/plain; charset=UTF-8; format=flowed]
    payload: 70 6f 75 6e 64 3a 20 c2 a3

User-Agent: [Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.2 
(KHTML, like Gecko) Chrome/15.0.874.120 Safari/535.2]

   intended: [text/plain]
   received: [text/plain]
    payload: 70 6f 75 6e 64 3a 20 c2 a3
# charset missing

   intended: [text/plain; charset=foo]
   received: [text/plain; charset=UTF-8]
    payload: 70 6f 75 6e 64 3a 20 c2 a3

   intended: [text/plain; charset=Iso-8859-1]
   received: [text/plain; charset=UTF-8]
    payload: 70 6f 75 6e 64 3a 20 c2 a3

   intended: [text/plain; charset=Utf-8]
   received: [text/plain; charset=UTF-8]
    payload: 70 6f 75 6e 64 3a 20 c2 a3

   intended: [text/plain; charset='foo']
   received: [text/plain; charset='UTF-8']
    payload: 70 6f 75 6e 64 3a 20 c2 a3
# broken single quotes preserved, charset rewritten

   intended: [text/plain; charset='Iso-8859-1']
   received: [text/plain; charset='UTF-8']
    payload: 70 6f 75 6e 64 3a 20 c2 a3
# broken single quotes preserved, charset rewritten

   intended: [text/plain; charset='Utf-8']
   received: [text/plain; charset='UTF-8']
    payload: 70 6f 75 6e 64 3a 20 c2 a3
# broken single quotes preserved, charset rewritten

   intended: [text/plain; charset="foo"]
   received: [text/plain; charset="UTF-8"]
    payload: 70 6f 75 6e 64 3a 20 c2 a3

   intended: [text/plain; charset="Iso-8859-1"]
   received: [text/plain; charset="UTF-8"]
    payload: 70 6f 75 6e 64 3a 20 c2 a3

   intended: [text/plain; charset="Utf-8"]
   received: [text/plain; charset="UTF-8"]
    payload: 70 6f 75 6e 64 3a 20 c2 a3

   intended: [text/plain; foo='; charset=UTF-8]
   received: [text/plain; foo='; charset=UTF-8]
    payload: 70 6f 75 6e 64 3a 20 c2 a3

   intended: [text/plain; format=flowed; charset=ISO-8859-1]
   received: [text/plain; format=flowed; charset=UTF-8]
    payload: 70 6f 75 6e 64 3a 20 c2 a3

   intended: [text/plain; charset=ISO-8859-1; format=flowed]
   received: [text/plain; charset=UTF-8; format=flowed]
    payload: 70 6f 75 6e 64 3a 20 c2 a3

Hope this helps,


Received on Wednesday, 16 November 2011 13:19:14 UTC