Pages

Friday, June 1, 2012

Java - sorting strings with Hungarian characters

        Let's sort some Hungarian strings in alphabetical order:

        String[] words = {"Bruce","Géza","Ákos"};

        Collator c = Collator.getInstance(new Locale("hu"));      
        Arrays.sort(words, c);


        for(int m=0; m<words.length; m++)
            System.out.print(words[m]+", ");

3 comments:

  1. Did you try to sort with the following words Ágoston, Antal?
    In this case the sort order must be the following: Antal, Ágoston.

    ReplyDelete
    Replies
    1. Yes it is true. Should be improved. Try also Ántal and Agoston. It works, but the pronunciation...

      Delete
    2. package javaapplicationsort;

      import java.text.Collator;
      import java.util.Arrays;
      import java.util.Comparator;
      import java.util.HashMap;
      import java.util.Locale;
      import java.util.Map;

      /**
      *
      * @author Sipos Lehel
      */
      public class JavaApplicationSort {

      static Map map = new HashMap();

      public static void main(String[] args) {
      /*
      According to hu Collation Sequence page
      Hungarian alphabetic order is:
      A=Á, B, C, CS, D, DZ, DZS, E=É, F, G, GY, H, I=Í, J, K, L, LY, M,
      N, NY, O=Ó, Ö=Ő, P, Q, R, S, SZ, T, TY, U=Ú, Ü=Ű, V, W, X, Y, Z, ZS

      I use a MAP for the right order of Á, É, Í, Ó, Ő, Ú, Ű characters
      */
      map.put("A",0);
      map.put("Á",1);
      map.put("E",2);
      map.put("É",3);

      map.put("I",4);
      map.put("Í",5);

      map.put("O",6);
      map.put("Ó",7);
      map.put("Ö",8);
      map.put("Ő",9);

      map.put("U",10);
      map.put("Ú",11);
      map.put("Ü",12);
      map.put("Ű",13);

      //the example
      String[] words = {"Bruce","Ágoston","Antal", "Csilla","Cecil","Imre","Íbetű"};

      Collator c = Collator.getInstance(new Locale("HU"));
      Arrays.sort(words, c); //sorting the hungarian words without considering the accentuated letters order
      Arrays.sort(words, new CompSortEkezet());

      for(int m=0; m<words.length; m++)
      System.out.print(words[m]+", ");
      }

      static class CompSortEkezet implements Comparator {
      @Override
      public int compare(Object o1, Object o2) {
      String s1 = o1.toString().toUpperCase();
      String s2 = o2.toString().toUpperCase();
      int f = s1.compareTo(s2);
      if (f == 0)
      return 0;

      //this compares just the begining of the words
      String a = Character.toString(s1.charAt(0));
      String b = Character.toString(s2.charAt(0));

      //if the letter is an accentuated letter then:
      if (map.get(a) != null && map.get(b) != null) {
      if (map.get(a) < map.get(b)) {
      return -1;
      }
      else if (map.get(a) == map.get(b)) {
      return 0;
      }
      else {
      return 1;
      }
      }
      //we already have made the comparision of non accentuated letter, and therefore do not modify the prev. sort result:
      return 0;
      }
      }

      }

      Delete