Advertisement
Guest User

Untitled

a guest
Sep 20th, 2018
85
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 4.16 KB | None | 0 0
  1. #include<stdio.h>
  2. #include<stdlib.h>
  3. #include<string.h>
  4. #define SS 200000
  5. #define TBLSZ 1009
  6.  
  7. char strSpace[SS];
  8. char *f = strSpace; //points to first free space in the array
  9.  
  10. struct Box {
  11. char *word;
  12. int count;
  13. struct Box *next;
  14. };
  15.  
  16. typedef struct Box box;
  17.  
  18. box *table[TBLSZ]; //initialized to 0
  19.  
  20. long hash(char *word) {
  21. long ans = 0;
  22. while (*word)
  23. ans = (ans<<1) ^ *word++;
  24. return ans%TBLSZ;
  25. }
  26.  
  27. int myToupper(int c) {
  28. int d = 'A'-'a';
  29. if (c>='a' && c<= 'z') {
  30. c += d;
  31. }
  32. return c;
  33. }
  34.  
  35. //returns 0 at EOF; 1 if it gets a word
  36. //the word will be stored at address f in strSpace
  37. int getWord(FILE *fp) {
  38. while (1) {
  39. int c = fgetc(fp);
  40. if (c==EOF) return 0;
  41. if ( (c>='A' && c<='Z') || (c>='a' && c<='z') ) {
  42. ungetc(c, fp);
  43. fscanf(fp, "%[A-Za-z]", f);
  44. return 1;
  45. }
  46. }
  47. }
  48.  
  49. //returns 0 if the word is NOT in the list
  50. //returns 1 otherwise and increases the count of box by 1
  51. //f points to the new word, index is the place the new word
  52. //would be found in the table.
  53. int search(char *f, int index) {
  54. box *curr = table[index];
  55. while (curr) {
  56. if (strcmp(f, curr->word)==0) {
  57. curr->count++;
  58. return 1;
  59. }
  60. curr = curr->next;
  61. }
  62. return 0;
  63. }
  64.  
  65. int main() {
  66. FILE *fp = fopen("/u1/junk/shakespeare.txt", "r");
  67. int count=0;
  68. char *longest = strSpace;
  69. int maxlength = 3;
  70.  
  71. while (getWord(fp)) {
  72. //convert to upper case
  73. char *f1 = f;
  74. while(*f1) {
  75. *f1 = myToupper(*f1);
  76. f1++;
  77. }
  78. //
  79. //if (count>20) break;
  80. //Check is the word in the table?
  81. int index = hash(f);
  82. if (search(f, index)==0) { //new word is not already in list
  83. count++;
  84. //put the word into the list
  85. //make new box and add it to list
  86. box *nb = malloc(sizeof(box));
  87. nb->word = f;
  88. nb->count = 1;
  89. nb->next = table[index];
  90. table[index] = nb;
  91. //f no longer points to free space; we need to up date f
  92. f = f1+1;
  93. if (f-strSpace>900000) printf("Running out of string space\n");
  94. }
  95. }
  96. printf("total number of distinct words is %d\n", count);
  97. printf("total amount of strSpace: %d\n", f-strSpace);
  98. //total number of words in the list containing THE
  99. int index = hash("THE");
  100. printf("THE hashes to %d\n", index);
  101. box *curr = table[index];
  102. count =0;
  103. while(curr) {
  104. count++;
  105. printf("%s\n", curr->word);
  106. curr = curr->next;
  107. }
  108. printf("the number of words in the list is %d\n", count);
  109. int i;
  110. int k16=0; //starting with zero lists of size 16
  111. for(i=0; i<1009; i++) {
  112. //find the length of the list at index i.
  113. box *curr = table[i];
  114. int knt=0;
  115. while(curr) {
  116. knt++;
  117. curr = curr->next;
  118. }
  119. if (knt==16)
  120. k16++;
  121. }
  122. printf("number of lists of size 16= %d\n", k16);
  123. printf("the most frequent word\n");
  124. char *mostFreq = 0;
  125. int freq = 0;
  126. int largest = 0;
  127. int empty = 0;
  128. int length = 1;
  129. char *lword;
  130. for (int i = 0; i < 1009; i++) {
  131. box *curr = table[i];
  132. while (curr) {
  133. if (curr->count > freq) {
  134. mostFreq = curr->word;
  135. freq = curr->count;
  136. }
  137. curr = curr->next;
  138. }
  139. }
  140.  
  141. for (int k = 0; k < 1009; k++) {
  142. box *curr = table[k];
  143. int streak = 0;
  144. while (curr) {
  145. if (curr->next != 0)
  146. streak++;
  147. if (streak > largest)
  148. largest = streak;
  149. curr = curr->next;
  150.  
  151. }
  152. }
  153.  
  154. for (int n = 0; n < 1009; n++) {
  155. box *curr = table[n];
  156. while (curr) {
  157. if (curr->count == 0)
  158. empty++;
  159. curr = curr->next;
  160. }
  161. }
  162.  
  163. for (int m = 0; m < 1009; m++) {
  164. box *curr = table[m];
  165. int streak = 0;
  166. while (curr) {
  167. if (strlen(curr->word) > streak)
  168. streak = strlen(curr->word);
  169. if (streak > length) {
  170. length = streak;
  171. lword = curr->word;
  172. }
  173. curr = curr->next;
  174. }
  175. }
  176.  
  177.  
  178. printf("word %s frequency %d\n", mostFreq, freq);
  179. printf("Largest list: %d\n", largest);
  180. printf("Empty lists: %d\n", empty);
  181. printf("Longest word %s: %d\n", lword, length);
  182. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement