Re: Transcribing non-ascii URLs [was: revised "generic syntax" internet draft]

Gary Adams - Sun Microsystems Labs BOS (Gary.Adams@east.sun.com)
Wed, 16 Apr 1997 16:53:42 -0400


Date: Wed, 16 Apr 1997 16:53:42 -0400
From: Gary.Adams@east.sun.com (Gary Adams - Sun Microsystems Labs BOS)
Message-Id: <199704162053.QAA04656@zeppo.East.Sun.COM>
To: bert@w3.org, uri@services.bunyip.com
Subject: Re: Transcribing non-ascii URLs [was: revised "generic syntax" internet draft]

On a similar note, here's how a Unicode client might take an input URL
and transcode the bits before sending to a server which only accepts
a platform specific encoding. This was hacked together today on JDK 1.1.1
and may still be buggy (i.e., only tested on Solaris 2.5 with LANG=en_US).

 - save in file UTF8URL.java
 - compile as  "javac UTF8URL.java"
 - run as "java UTF8URL"

-----------------------------------------------------------------------
import java.awt.* ;
import java.awt.event.* ;
import java.net.URLEncoder;
import sun.io.CharToByteConverter;

public class UTF8URL extends Frame 
	    	     implements TextListener, ItemListener{

   public String encoding;
   public String encoded_text;

   private CharToByteConverter btc = null;

   private String [] charset_list = {
	"8859_1", "8859_2", "8859_3", "8859_5",
	"8859_6", "8859_7", "8859_9", "Cp1250",
	"Cp1251", "Cp1252", "Cp1253", "Cp1254",
	"Cp1255", "Cp1256", "Cp1257", "Cp1258",
	"Cp437", "Cp737", "Cp775", "Cp850",
	"Cp852", "Cp855", "Cp857", "Cp860",
	"Cp861", "Cp862", "Cp863", "Cp864",
	"Cp865", "Cp866", "Cp869", "Cp874",
	"EUCJIS", "JIS", "MacArabic", "MacCentralEurope",
	"Macintosh", "MacCroatian", "MacCyrillic", "MacDingbat",
	"MacGreek", "MacHebrew", "MacIceland", "MacRoman",
	"MacRomania", "MacSymbol", "MacThai", "MacTurkish",
	"MacUkraine", "SJIS", "UTF8" 
	};

   public UTF8URL () {
      super("Character set demo");
      Panel p = new Panel ();
      Choice c = new Choice();
      TextArea t = new TextArea("", 3, 45, 
				TextArea.SCROLLBARS_VERTICAL_ONLY);

      for (int i = 0 ; i < charset_list.length; i++){
	c.add(charset_list[i]);
      }

      c.addItemListener(this);
      encoding = charset_list[0];
      try {
         btc = CharToByteConverter.getConverter(encoding);
      } catch (Exception e) {
         System.err.println(e.toString() + " " + encoding);
      }
      t.addTextListener(this);

      p.add(t);
      p.add(c);

      add(p);
      pack();
      show();
   }

   public void itemStateChanged(ItemEvent ie){
      encoding = ((Choice)(ie.getSource())).getSelectedItem();
   }

   public void textValueChanged(TextEvent te){
      String input_text =  ((TextArea)(te.getSource())).getText();
      int len = input_text.length();
      byte [] octets = new byte[len];

      // Transcode the user input characters to the specified encoding
      try {
         int length = btc.convert(input_text.toCharArray(), 0, len, 
		               octets, 0, len);
         String converted = new String(octets);

         // Perform URL escaping for unsafe characters 
         encoded_text = URLEncoder.encode(converted);

         System.out.println( encoding + "\t" 
			   + input_text + "\t" 
			   + encoded_text);
     } catch (Exception e) {
	System.err.println(e.toString());
     }
   }

   public static void main (String[] args) {
       UTF8URL udemo = new UTF8URL();
   }
}
\
/gra