Advertisement
nullzero

Anagram

Dec 2nd, 2012
121
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
C 2.63 KB | None | 0 0
  1. #include <stdio.h>
  2. #include <stdlib.h>
  3.  
  4. #define MAXN 100000 // assume that maximum number of words is MAXN
  5. #define ALLCHAR 256 // assume that maximum distinct alphabets is about ALLCHAR
  6. #define MAXCHAR 256 // assume that maximum length is MAXCHAR/3
  7. #define TRUE 1
  8. #define FALSE 0
  9. #define FILENAME "thaiwords.txt"
  10.  
  11. typedef struct{
  12.     int bucket[ALLCHAR], id;
  13. }Group;
  14.  
  15. typedef char bool;
  16.  
  17. char word[MAXN][MAXCHAR];
  18. Group group[MAXN];
  19.  
  20. int cmp(const void* aa, const void* bb){
  21.     Group* a = (Group*)aa;
  22.     Group* b = (Group*)bb;
  23.     int i;
  24.     for(i = 0; i < ALLCHAR; ++i)
  25.         if(a->bucket[i] != b->bucket[i]) return a->bucket[i] - b->bucket[i];
  26.            
  27.     return 0;
  28. }
  29.  
  30. bool is_equal(int a, int b){
  31.     int i;
  32.     for(i = 0; i < ALLCHAR; ++i)
  33.         if(group[a].bucket[i] != group[b].bucket[i]) return FALSE;
  34.            
  35.     return TRUE;
  36. }
  37.  
  38. int main(){
  39.     int n = 0, i, len;
  40.     FILE* fp = fopen(FILENAME, "r");
  41.     if(!fp) printf("File not found!\n"), exit(1);
  42.     while(fgets(word[n], MAXCHAR,fp)){
  43.         for(i = 0; word[n][i] != 0; ++i){
  44.             if(word[n][i] == ' ') continue;
  45.             group[n].bucket[(unsigned char)word[n][i]]++;
  46.         }
  47.         group[n].id = n;
  48.         n++;
  49.     }
  50.     qsort(group, n, sizeof(group[0]), cmp);
  51.     int allgroup = 0;
  52.    
  53.     printf("%s", word[group[0].id]); // note that fgets() get '\n' character.
  54.     allgroup = 1;
  55.     for(i = 1; i < n; ++i){
  56.         if(is_equal(i - 1, i)){
  57.             printf("%s", word[group[i].id]);
  58.         }else{
  59.             printf("\n");
  60.             printf("%s", word[group[i].id]);
  61.             allgroup++;
  62.         }
  63.     }
  64.     printf("\nall groups = %d, all words = %d\n", allgroup, n);
  65.     fclose(fp);
  66.     return 0;
  67. }
  68.  
  69. /*
  70. Complexity:
  71. O((MAXN * ALLCHAR * log(MAXN)) + (MAXN * MAXCHAR))
  72. It is good to use this algorithm when ALLCHAR is low, but MAXCHAR is high.
  73.  
  74. With suffix tree or string hashing, this bound would be
  75. O((MAXN * log(MAXN) * log(MAXCHAR)) + (MAXN * MAXCHAR * log(MAXCHAR))).
  76. It is good to use this algorithm when ALLCHAR is high, but MAXCHAR is low.
  77.  
  78. Note:
  79. From http://www.acc.umu.se/~saasha/charsets/?charset=iso-8859-11&charset=iso-8859-11
  80. Each Thai character becomes three numbers.
  81. 'ก' = 224, 184, 129
  82. 'ข' = 224, 184, 130
  83. ...
  84. '๛' = 224, 185, 155
  85.  
  86. Luckily, although we separate a single character to three numbers, there will not be the vague case; that is, we don't have to concern about false positive outcome.
  87.  
  88. Testcase:
  89. sadadsasdasddsaasdasdasdads
  90. สวัสดีabc
  91. asdadsasdasdasdasdasdasdads
  92. acสวีbดัส
  93. abc
  94. สวัสดีดี
  95. asdadsasdasddsaasdasdasdads
  96. */
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement