Guest User

Untitled

a guest
Jul 16th, 2018
72
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 8.92 KB | None | 0 0
  1. /* Xingbo Wang CSC192 997583155 Assignment 3 */
  2.  
  3. #define _GNU_SOURCE
  4. #include <stdlib.h>
  5. #include <stdio.h>
  6. #include <string.h>
  7.  
  8. #define TAGFILENAME "tags.txt"
  9. #define TAGSTOCHECK 23
  10. #define MAXTAGNAMESIZE 25
  11.  
  12. typedef struct __tagtype{
  13. char name[MAXTAGNAMESIZE];
  14. unsigned short closing; //closing tag required? 1 = yes, 0 = no, 2 = maybe
  15. unsigned short depend; //must be used inside which tag?
  16. unsigned short opened; //count of num of times tag is opened
  17. unsigned short closed; //count of num of times tag is closed
  18. } tagtype;
  19.  
  20. unsigned int error = 0;
  21. unsigned int linen = 0, tagn = 0;
  22. unsigned int currenttag = 0, lasttag = 0;
  23. tagtype * tags = NULL;
  24. char * tempstr = NULL; //storage of temporary string to avoid memory leak
  25. char * string = NULL;
  26. char * tagstr = NULL;
  27. FILE * filep = NULL;
  28. FILE * tagfile = NULL;
  29.  
  30. //Waste of memory, only needed for one of the assignment's ridiculous requirements
  31. char realname1[MAXTAGNAMESIZE], realname2[MAXTAGNAMESIZE];
  32.  
  33.  
  34. /*----------------------------------------------------------------------------*/
  35.  
  36. int sumopentags(void)
  37. /* returns the sum of the number of currently unclosed tags */
  38. {
  39. int sum = 0, i = 0;
  40. for(i = 1; i < TAGSTOCHECK; i++) sum += (tags[i].opened - tags[i].closed);
  41. return sum;
  42. }
  43.  
  44. /*----------------------------------------------------------------------------*/
  45.  
  46. unsigned int whichtag(char * tag)
  47. /* returns tagtype index of the matching tag name, 0 if not found in tagtype */
  48. {
  49. unsigned int i = 0;
  50. for(i = 1; i < tagn; i++){
  51. if(strcmp(tag, tags[i].name) == 0){
  52. return i;
  53. }
  54. }
  55. return 0;
  56. }
  57.  
  58. /*----------------------------------------------------------------------------*/
  59.  
  60. unsigned int checkerrors()
  61. /* check for the 9 errors listed below
  62. 1 "<html> not first tag of document"
  63. 2 "</html> not last tag of document"
  64. 3 "<head> not first tag of html block"
  65. 4 "</body> not last tag of html block"
  66. 5 "html block does not contain exactly 1 head block"
  67. 6 "html block does not contain exactly 1 body block"
  68. 7 "tag used outside of valid block on line Z"
  69. 8 "closing </img> found on line Z"
  70. 9 "tags not properly closed; expecting </X>, got </Y> on line %u"
  71. */
  72. {
  73. int sum = sumopentags();
  74.  
  75. if(tags[whichtag("html")].opened < 1 && (sum == 1 || (sum == 0 && tags[currenttag].opened > 0 && currenttag != 0))) return 1;
  76. if(tags[whichtag("html")].closed > 0 && (sum > 0 || (sum == 0 && tags[currenttag].closing != 1))) return 2;
  77. if(tags[whichtag("head")].opened < 1 && (sum > 1 || (sum > 0 && tags[currenttag].closing != 1)) && currenttag != 0) return 3;
  78. if(tags[whichtag("body")].closed > 0 && tags[whichtag("html")].closed < 1 &&
  79. (sum > 1 || (sum == 1 && tags[currenttag].closing != 1))) return 4;
  80. if(tags[whichtag("head")].opened > 1 || (tags[whichtag("html")].closed >= 1 && tags[whichtag("head")].opened < 1)) return 5;
  81. if(tags[whichtag("head")].opened > 1 || (tags[whichtag("html")].closed >= 1 && tags[whichtag("head")].opened < 1)) return 6;
  82.  
  83. //special case for <li>
  84. if(currenttag == whichtag("li") && (tags[whichtag("ol")].opened - tags[whichtag("ol")].closed) == 0
  85. && (tags[whichtag("ul")].opened - tags[whichtag("ul")].closed) == 0) return 7;
  86. if((tags[currenttag].opened - tags[currenttag].closed) >= 1 && currenttag != whichtag("li")){
  87. if(tags[tags[currenttag].depend].opened < 1 ||
  88. (tags[tags[currenttag].depend].opened - tags[tags[currenttag].depend].closed) == 0)
  89. return 7;
  90. }else if (tags[currenttag].closing != 1 && currenttag != whichtag("li")){
  91. if(tags[tags[currenttag].depend].opened < 1 ||
  92. ((tags[tags[currenttag].depend].opened - tags[tags[currenttag].depend].closed) == 0
  93. && tags[tags[currenttag].depend].closing == 1))
  94. return 7;
  95. }
  96.  
  97. if(tags[whichtag("img")].closed > tags[whichtag("img")].opened) return 8;
  98. if((tags[lasttag].opened - tags[lasttag].closed) >= 1 &&
  99. (tags[currenttag].opened - tags[currenttag].closed) == 0 &&
  100. tags[currenttag].closing == 1)
  101. return 9;
  102.  
  103. //no errors
  104. return 0;
  105. }
  106.  
  107. /*----------------------------------------------------------------------------*/
  108.  
  109. void printerror(int errnum)
  110. /* prints the corresponding error string to stderr */
  111. {
  112. switch(errnum){
  113. case 1 : fprintf(stderr, "<html> not first tag of document\n"); break;
  114. case 2 : fprintf(stderr, "</html> not last tag of document\n"); break;
  115. case 3 : fprintf(stderr, "<head> not first tag of html block\n"); break;
  116. case 4 : fprintf(stderr, "</body> not last tag of html block\n"); break;
  117. case 5 : fprintf(stderr, "html block does not contain exactly 1 head block\n"); break;
  118. case 6 : fprintf(stderr, "html block does not contain exactly 1 body block\n"); break;
  119. case 7 : fprintf(stderr, "tag used outside of valid block on line %u\n", linen); break;
  120. case 8 : fprintf(stderr, "closing </img> found on line %u\n", linen); break;
  121. case 9 : fprintf(stderr, "tags not properly closed; expecting </%s>, got </%s> on line %u\n",
  122. realname2, realname1, linen); break;
  123. }
  124. }
  125.  
  126. /*----------------------------------------------------------------------------*/
  127.  
  128. void lowercase(char * string, unsigned int len)
  129. /* makes all the letters in a string lowercase */
  130. {
  131. unsigned int i = 0;
  132. for(i = 0; i < len; i++)
  133. if(string[i] <= 90 && string[i] >= 65)
  134. string[i] += 32;
  135. }
  136.  
  137. /*----------------------------------------------------------------------------*/
  138.  
  139. char * gettag(char * tagstr, unsigned int len)
  140. /* returns the tag name, e.g. "body" from <BODY background....> , retains original
  141. case of the tag name in realname1 and realname2*/
  142. {
  143. /* extract the first word*/
  144. tempstr = (char*)calloc(len, sizeof(char));
  145. memset(tempstr, 0, len);
  146. strncpy(tempstr, tagstr, (int)(strchrnul(tagstr, ' ') - tagstr));
  147. strcpy(realname2, realname1);
  148. strncpy(realname1, tempstr, MAXTAGNAMESIZE);
  149. lowercase(tempstr, len);
  150. return tempstr;
  151. }
  152.  
  153. /*----------------------------------------------------------------------------*/
  154.  
  155. int parseline(char * string, unsigned int len)
  156. /* parses a single line */
  157. {
  158. unsigned int n = 0, start = 0;
  159. tagstr = (char*)calloc(len, sizeof(char));
  160.  
  161. for(n = 0; n < len; n++){
  162. memset(tagstr, 0, len);
  163. if(string[n] == '<'){
  164. start = n;
  165. }else if(string[n] == '>'){
  166. /* parse the tag */
  167. /* record which tag was opened */
  168. strncpy(tagstr, &(string[start+1]), (n-start-1)*sizeof(char));
  169. if(tagstr[0] == '/'){
  170. currenttag = whichtag(gettag(&tagstr[1], (n-start)));
  171. if(currenttag > 0 && tags[currenttag].closing == 1){
  172. tags[currenttag].closed += 1;
  173. }
  174. }else{
  175. currenttag = whichtag(gettag(tagstr, (n-start)));
  176. if(currenttag > 0){
  177. tags[currenttag].opened += 1;
  178. /* close it too if closing not required */
  179. if(tags[currenttag].closing != 1) tags[currenttag].closed += 1;
  180. }
  181. }
  182. /* check for errors, print and return if found */
  183. error = checkerrors();
  184. if(error != 0) return 1;
  185. }
  186. if(currenttag != 0) lasttag = currenttag;
  187. }
  188. return 0;
  189. }
  190.  
  191. /*----------------------------------------------------------------------------*/
  192.  
  193. void free_all()
  194. /* frees all of the pointers and files */
  195. {
  196. if (string != NULL) free(string);
  197. if (tempstr != NULL) free(tempstr);
  198. if (tagstr != NULL) free(tagstr);
  199. if (filep != NULL) fclose(filep);
  200. if (tagfile != NULL) fclose(tagfile);
  201. if (tags != NULL) free(tags);
  202. }
  203.  
  204. /*----------------------------------------------------------------------------*/
  205.  
  206. int main(int argc, char * argv[])
  207. {
  208. /* open files, check for file errors */
  209. if(argc != 2){
  210. fprintf(stderr,"Wrong number of arguments\n");
  211. return 1;
  212. }
  213. filep = fopen(argv[1], "r");
  214. if(filep == NULL){
  215. fprintf(stderr, "ERROR %s not found\n", argv[1]);
  216. free_all();
  217. return EXIT_FAILURE;
  218. }
  219. tagfile = fopen(TAGFILENAME, "r");
  220. if(tagfile == NULL){
  221. fprintf(stderr, "ERROR %s not found\n", TAGFILENAME);
  222. free_all();
  223. return EXIT_FAILURE;
  224. }
  225.  
  226. unsigned int n = 0;
  227.  
  228. /* set up tagtype, match up index numbers with tags.txt line numbers */
  229. tags = (tagtype*) calloc(TAGSTOCHECK, sizeof(tagtype));
  230. tags[0].opened = 1; //<html> depends on this to be open
  231. tagn++;
  232.  
  233. /* read in the tag file and store their properties into tagtype*/
  234. do{
  235. string = (char*) realloc(string, ++n*sizeof(char));
  236. string[n-1] = fgetc(tagfile);
  237. if(string[n-1] == '\n'){
  238. tagn++;
  239. sscanf(string, "%s %hu %hu", tags[tagn-1].name, &tags[tagn-1].closing,
  240. &tags[tagn-1].depend);
  241. string = NULL;
  242. n = 0;
  243. }
  244. }while(!feof(tagfile));
  245.  
  246. /* read html and parse */
  247. do{
  248. string = (char*) realloc(string, ++n*sizeof(char));
  249. string[n-1] = fgetc(filep);
  250. if(string[n-1] == '\n'){
  251. linen++;
  252. if(parseline(string, n) == 1) break;
  253. string = NULL;
  254. n = 0;
  255. }
  256. }while(!feof(filep));
  257.  
  258. /* evaluate */
  259. if (currenttag != 1 && error == 0) error = 2; //if eof and </html> not found
  260. if (error != 0){
  261. printf("Verified. Page is invalid.\n");
  262. fprintf(stderr, "REASON ");
  263. printerror(error);
  264. }
  265. else printf("Verified. Page is valid.\n");
  266.  
  267. /* finish */
  268. free_all();
  269.  
  270. return EXIT_SUCCESS;
  271. }
Add Comment
Please, Sign In to add comment