XHR content-type rewriting

Hi,

while trying to fix a bug I discovered a weird workaround in Mozilla 
(<https://bugzilla.mozilla.org/show_bug.cgi?id=397234>).

The summary is:

1) when sending text using send(""), all browsers encode in UTF-8

2) the caller may have set the content-type header field before

3) if this was the case, the charset, if present, needs to be adjusted 
(<http://www.w3.org/TR/XMLHttpRequest/#the-send-method>)

4) due to broken content (GWT), Mozilla tries to preserve the case of 
the charset name, if it was the "right" one (so if the caller set 
'UtF-8', that's what get's onto the wire). Apparently this was added 
because some servers didn't handle charset names properly.

So I wrote some tests to compare FF's behavior with other UAs.

Summary:

- all UAs use the UTF-8 encoding for the payload

- Opera and IE do not rewrite the type; so if the caller sets the wrong 
charset, this is what is sent to the server

- Chrome, Safari and FF try to fix the charset param. All of them 
preserve the syntax (quoted-string vs token) and also handle single 
quotes incorrectly.

- Finally, only Firefox attempts to preserve the casing of the charset 
param - this may indicate that the workaround added for the 
aforementioned bug isn't needed anymore.

Test code:

import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.net.InetSocketAddress;
import java.util.List;

import com.sun.net.httpserver.HttpExchange;
import com.sun.net.httpserver.HttpHandler;
import com.sun.net.httpserver.HttpServer;

public class XHRContentTypeRewriting {

	public static void main(String[] args) throws IOException {

		HttpServer server = HttpServer.create(new InetSocketAddress(8080), 0);

		server.createContext("/start", new ServeHtml());
		server.createContext("/report", new Report());

		server.setExecutor(null);
		server.start();
	}

	private static class ServeHtml implements HttpHandler {

		@Override
		public void handle(HttpExchange h) throws IOException {
			String response = "<html><head><title>XHR Content-Type Rewriting 
Test</title>"
					+ "<script>"
					+ "function post(type) {"
					+ "  var req = new XMLHttpRequest();\n"
					+ "  req.open ('POST', '/report', false);"
					+ "  req.setRequestHeader('Content-Type', type);\n"
					+ "  req.setRequestHeader('X-Test', type);\n"
					+ "  req.send('pound: \\u00a3');\n"
					+ "}\n"
					+ "function run() {\n"
					+ "  post('text/plain');\n"
					+ "  post('text/plain; charset=foo');\n"
					+ "  post('text/plain; charset=Iso-8859-1');\n"
					+ "  post('text/plain; charset=Utf-8');\n"
					+ "  post('text/plain; charset=\\'foo\\'');\n"
					+ "  post('text/plain; charset=\\'Iso-8859-1\\'');\n"
					+ "  post('text/plain; charset=\\'Utf-8\\'');\n"
					+ "  post('text/plain; charset=\"foo\"');\n"
					+ "  post('text/plain; charset=\"Iso-8859-1\"');\n"
					+ "  post('text/plain; charset=\"Utf-8\"');\n"
					+ "  post('text/plain; foo=\\'; charset=UTF-8');\n"
					+ "  post('text/plain; format=flowed; charset=ISO-8859-1');\n"
					+ "  post('text/plain; charset=ISO-8859-1; format=flowed');\n"
					+ "}\n"
					+ "</script>"
					+ "</head><body onload='run();'>"
					+ "</body></html>";

			h.getResponseHeaders().set("Content-Type",
					"text/html; charset=UTF-8");
			h.sendResponseHeaders(200, response.getBytes().length);
			OutputStream os = h.getResponseBody();
			os.write(response.getBytes());
			os.close();
		}
	}

	private static class Report implements HttpHandler {

		@Override
		public void handle(HttpExchange h) throws IOException {
			List<String> ua = h.getRequestHeaders().get("User-Agent");
			List<String> ct = h.getRequestHeaders().get("Content-Type");
			List<String> xt = h.getRequestHeaders().get("X-Test");

			InputStream is = h.getRequestBody();
			int r;
			StringBuilder payload = new StringBuilder();

			do {
				r = is.read();
				if (r >= 0)
					payload.append(String.format("%02x ", r));
			} while (r >= 0);

			
			
			String response = "User-Agent: " + ua + "\n" + "  intended: " + xt
					+ "\n" + "  received: " + ct + "\n" + "   payload: "
					+ payload.toString() + "\n";

			System.err.println(response);

			h.getResponseHeaders().set("Content-Type",
					"text/plain; charset=UTF-8");
			h.sendResponseHeaders(200, response.getBytes().length);
			OutputStream os = h.getResponseBody();
			os.write(response.getBytes());
			os.close();
		}
	}
}


Results, with comments added:

User-Agent: [Mozilla/5.0 (Windows NT 6.1; WOW64; rv:8.0) Gecko/20100101 
Firefox/8.0]

   intended: [text/plain]
   received: [text/plain; charset=UTF-8]
    payload: 70 6f 75 6e 64 3a 20 c2 a3

   intended: [text/plain; charset=foo]
   received: [text/plain; charset=UTF-8]
    payload: 70 6f 75 6e 64 3a 20 c2 a3

   intended: [text/plain; charset=Iso-8859-1]
   received: [text/plain; charset=UTF-8]
    payload: 70 6f 75 6e 64 3a 20 c2 a3

   intended: [text/plain; charset=Utf-8]
   received: [text/plain; charset=Utf-8]
    payload: 70 6f 75 6e 64 3a 20 c2 a3

   intended: [text/plain; charset='foo']
   received: [text/plain; charset=UTF-8]
    payload: 70 6f 75 6e 64 3a 20 c2 a3

   intended: [text/plain; charset='Iso-8859-1']
   received: [text/plain; charset=UTF-8]
    payload: 70 6f 75 6e 64 3a 20 c2 a3

   intended: [text/plain; charset='Utf-8']
   received: [text/plain; charset='Utf-8']
    payload: 70 6f 75 6e 64 3a 20 c2 a3

   intended: [text/plain; charset="foo"]
   received: [text/plain; charset=UTF-8]
    payload: 70 6f 75 6e 64 3a 20 c2 a3

   intended: [text/plain; charset="Iso-8859-1"]
   received: [text/plain; charset=UTF-8]
    payload: 70 6f 75 6e 64 3a 20 c2 a3

   intended: [text/plain; charset="Utf-8"]
   received: [text/plain; charset="Utf-8"]
    payload: 70 6f 75 6e 64 3a 20 c2 a3

   intended: [text/plain; foo='; charset=UTF-8]
   received: [text/plain; charset=UTF-8; foo='; charset=UTF-8]
    payload: 70 6f 75 6e 64 3a 20 c2 a3
# confused by single quote in preceding param

   intended: [text/plain; format=flowed; charset=ISO-8859-1]
   received: [text/plain; format=flowed; charset=UTF-8]
    payload: 70 6f 75 6e 64 3a 20 c2 a3

   intended: [text/plain; charset=ISO-8859-1; format=flowed]
   received: [text/plain; charset=UTF-8; format=flowed]
    payload: 70 6f 75 6e 64 3a 20 c2 a3

User-Agent: [Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; 
Trident/5.0)]
# doesn't touch the type, thus sends inconsistent charset information

   intended: [text/plain]
   received: [text/plain]
    payload: 70 6f 75 6e 64 3a 20 c2 a3

   intended: [text/plain; charset=foo]
   received: [text/plain; charset=foo]
    payload: 70 6f 75 6e 64 3a 20 c2 a3

   intended: [text/plain; charset=Iso-8859-1]
   received: [text/plain; charset=Iso-8859-1]
    payload: 70 6f 75 6e 64 3a 20 c2 a3

   intended: [text/plain; charset=Utf-8]
   received: [text/plain; charset=Utf-8]
    payload: 70 6f 75 6e 64 3a 20 c2 a3

   intended: [text/plain; charset='foo']
   received: [text/plain; charset='foo']
    payload: 70 6f 75 6e 64 3a 20 c2 a3

   intended: [text/plain; charset='Iso-8859-1']
   received: [text/plain; charset='Iso-8859-1']
    payload: 70 6f 75 6e 64 3a 20 c2 a3

   intended: [text/plain; charset='Utf-8']
   received: [text/plain; charset='Utf-8']
    payload: 70 6f 75 6e 64 3a 20 c2 a3

   intended: [text/plain; charset="foo"]
   received: [text/plain; charset="foo"]
    payload: 70 6f 75 6e 64 3a 20 c2 a3

   intended: [text/plain; charset="Iso-8859-1"]
   received: [text/plain; charset="Iso-8859-1"]
    payload: 70 6f 75 6e 64 3a 20 c2 a3

   intended: [text/plain; charset="Utf-8"]
   received: [text/plain; charset="Utf-8"]
    payload: 70 6f 75 6e 64 3a 20 c2 a3

   intended: [text/plain; foo='; charset=UTF-8]
   received: [text/plain; foo='; charset=UTF-8]
    payload: 70 6f 75 6e 64 3a 20 c2 a3

   intended: [text/plain; format=flowed; charset=ISO-8859-1]
   received: [text/plain; format=flowed; charset=ISO-8859-1]
    payload: 70 6f 75 6e 64 3a 20 c2 a3

   intended: [text/plain; charset=ISO-8859-1; format=flowed]
   received: [text/plain; charset=ISO-8859-1; format=flowed]
    payload: 70 6f 75 6e 64 3a 20 c2 a3

User-Agent: [Opera/9.80 (Windows NT 6.1; U; en) Presto/2.9.168 
Version/11.51]
# doesn't touch the type, thus sends inconsistent charset information

   intended: [text/plain]
   received: [text/plain]
    payload: 70 6f 75 6e 64 3a 20 c2 a3

   intended: [text/plain; charset=foo]
   received: [text/plain; charset=foo]
    payload: 70 6f 75 6e 64 3a 20 c2 a3

   intended: [text/plain; charset=Iso-8859-1]
   received: [text/plain; charset=Iso-8859-1]
    payload: 70 6f 75 6e 64 3a 20 c2 a3

   intended: [text/plain; charset=Utf-8]
   received: [text/plain; charset=Utf-8]
    payload: 70 6f 75 6e 64 3a 20 c2 a3

   intended: [text/plain; charset='foo']
   received: [text/plain; charset='foo']
    payload: 70 6f 75 6e 64 3a 20 c2 a3

   intended: [text/plain; charset='Iso-8859-1']
   received: [text/plain; charset='Iso-8859-1']
    payload: 70 6f 75 6e 64 3a 20 c2 a3

   intended: [text/plain; charset='Utf-8']
   received: [text/plain; charset='Utf-8']
    payload: 70 6f 75 6e 64 3a 20 c2 a3

   intended: [text/plain; charset="foo"]
   received: [text/plain; charset="foo"]
    payload: 70 6f 75 6e 64 3a 20 c2 a3

   intended: [text/plain; charset="Iso-8859-1"]
   received: [text/plain; charset="Iso-8859-1"]
    payload: 70 6f 75 6e 64 3a 20 c2 a3

   intended: [text/plain; charset="Utf-8"]
   received: [text/plain; charset="Utf-8"]
    payload: 70 6f 75 6e 64 3a 20 c2 a3

   intended: [text/plain; foo='; charset=UTF-8]
   received: [text/plain; foo='; charset=UTF-8]
    payload: 70 6f 75 6e 64 3a 20 c2 a3

   intended: [text/plain; format=flowed; charset=ISO-8859-1]
   received: [text/plain; format=flowed; charset=ISO-8859-1]
    payload: 70 6f 75 6e 64 3a 20 c2 a3

   intended: [text/plain; charset=ISO-8859-1; format=flowed]
   received: [text/plain; charset=ISO-8859-1; format=flowed]
    payload: 70 6f 75 6e 64 3a 20 c2 a3

User-Agent: [Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/534.51.22 
(KHTML, like Gecko) Version/5.1.1 Safari/534.51.22]

   intended: [text/plain]
   received: [text/plain]
    payload: 70 6f 75 6e 64 3a 20 c2 a3
# charset missing

   intended: [text/plain; charset=foo]
   received: [text/plain; charset=UTF-8]
    payload: 70 6f 75 6e 64 3a 20 c2 a3

   intended: [text/plain; charset=Iso-8859-1]
   received: [text/plain; charset=UTF-8]
    payload: 70 6f 75 6e 64 3a 20 c2 a3

   intended: [text/plain; charset=Utf-8]
   received: [text/plain; charset=UTF-8]
    payload: 70 6f 75 6e 64 3a 20 c2 a3

   intended: [text/plain; charset='foo']
   received: [text/plain; charset='UTF-8']
    payload: 70 6f 75 6e 64 3a 20 c2 a3
# broken single quotes preserved, charset rewritten

   intended: [text/plain; charset='Iso-8859-1']
   received: [text/plain; charset='UTF-8']
    payload: 70 6f 75 6e 64 3a 20 c2 a3
# broken single quotes preserved, charset rewritten

   intended: [text/plain; charset='Utf-8']
   received: [text/plain; charset='UTF-8']
    payload: 70 6f 75 6e 64 3a 20 c2 a3
# broken single quotes preserved, charset rewritten

   intended: [text/plain; charset="foo"]
   received: [text/plain; charset="UTF-8"]
    payload: 70 6f 75 6e 64 3a 20 c2 a3

   intended: [text/plain; charset="Iso-8859-1"]
   received: [text/plain; charset="UTF-8"]
    payload: 70 6f 75 6e 64 3a 20 c2 a3

   intended: [text/plain; charset="Utf-8"]
   received: [text/plain; charset="UTF-8"]
    payload: 70 6f 75 6e 64 3a 20 c2 a3

   intended: [text/plain; foo='; charset=UTF-8]
   received: [text/plain; foo='; charset=UTF-8]
    payload: 70 6f 75 6e 64 3a 20 c2 a3

   intended: [text/plain; format=flowed; charset=ISO-8859-1]
   received: [text/plain; format=flowed; charset=UTF-8]
    payload: 70 6f 75 6e 64 3a 20 c2 a3

   intended: [text/plain; charset=ISO-8859-1; format=flowed]
   received: [text/plain; charset=UTF-8; format=flowed]
    payload: 70 6f 75 6e 64 3a 20 c2 a3

User-Agent: [Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.2 
(KHTML, like Gecko) Chrome/15.0.874.120 Safari/535.2]

   intended: [text/plain]
   received: [text/plain]
    payload: 70 6f 75 6e 64 3a 20 c2 a3
# charset missing

   intended: [text/plain; charset=foo]
   received: [text/plain; charset=UTF-8]
    payload: 70 6f 75 6e 64 3a 20 c2 a3

   intended: [text/plain; charset=Iso-8859-1]
   received: [text/plain; charset=UTF-8]
    payload: 70 6f 75 6e 64 3a 20 c2 a3

   intended: [text/plain; charset=Utf-8]
   received: [text/plain; charset=UTF-8]
    payload: 70 6f 75 6e 64 3a 20 c2 a3

   intended: [text/plain; charset='foo']
   received: [text/plain; charset='UTF-8']
    payload: 70 6f 75 6e 64 3a 20 c2 a3
# broken single quotes preserved, charset rewritten

   intended: [text/plain; charset='Iso-8859-1']
   received: [text/plain; charset='UTF-8']
    payload: 70 6f 75 6e 64 3a 20 c2 a3
# broken single quotes preserved, charset rewritten

   intended: [text/plain; charset='Utf-8']
   received: [text/plain; charset='UTF-8']
    payload: 70 6f 75 6e 64 3a 20 c2 a3
# broken single quotes preserved, charset rewritten

   intended: [text/plain; charset="foo"]
   received: [text/plain; charset="UTF-8"]
    payload: 70 6f 75 6e 64 3a 20 c2 a3

   intended: [text/plain; charset="Iso-8859-1"]
   received: [text/plain; charset="UTF-8"]
    payload: 70 6f 75 6e 64 3a 20 c2 a3

   intended: [text/plain; charset="Utf-8"]
   received: [text/plain; charset="UTF-8"]
    payload: 70 6f 75 6e 64 3a 20 c2 a3

   intended: [text/plain; foo='; charset=UTF-8]
   received: [text/plain; foo='; charset=UTF-8]
    payload: 70 6f 75 6e 64 3a 20 c2 a3

   intended: [text/plain; format=flowed; charset=ISO-8859-1]
   received: [text/plain; format=flowed; charset=UTF-8]
    payload: 70 6f 75 6e 64 3a 20 c2 a3

   intended: [text/plain; charset=ISO-8859-1; format=flowed]
   received: [text/plain; charset=UTF-8; format=flowed]
    payload: 70 6f 75 6e 64 3a 20 c2 a3



Hope this helps,

Julian

Received on Wednesday, 16 November 2011 13:19:14 UTC