Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- /* Xingbo Wang CSC192 997583155 Assignment 3 */
- #define _GNU_SOURCE
- #include <stdlib.h>
- #include <stdio.h>
- #include <string.h>
- #define TAGFILENAME "tags.txt"
- #define TAGSTOCHECK 23
- #define MAXTAGNAMESIZE 25
- typedef struct __tagtype{
- char name[MAXTAGNAMESIZE];
- unsigned short closing; //closing tag required? 1 = yes, 0 = no, 2 = maybe
- unsigned short depend; //must be used inside which tag?
- unsigned short opened; //count of num of times tag is opened
- unsigned short closed; //count of num of times tag is closed
- } tagtype;
- unsigned int error = 0;
- unsigned int linen = 0, tagn = 0;
- unsigned int currenttag = 0, lasttag = 0;
- tagtype * tags = NULL;
- char * tempstr = NULL; //storage of temporary string to avoid memory leak
- char * string = NULL;
- char * tagstr = NULL;
- FILE * filep = NULL;
- FILE * tagfile = NULL;
- //Waste of memory, only needed for one of the assignment's ridiculous requirements
- char realname1[MAXTAGNAMESIZE], realname2[MAXTAGNAMESIZE];
- /*----------------------------------------------------------------------------*/
- int sumopentags(void)
- /* returns the sum of the number of currently unclosed tags */
- {
- int sum = 0, i = 0;
- for(i = 1; i < TAGSTOCHECK; i++) sum += (tags[i].opened - tags[i].closed);
- return sum;
- }
- /*----------------------------------------------------------------------------*/
- unsigned int whichtag(char * tag)
- /* returns tagtype index of the matching tag name, 0 if not found in tagtype */
- {
- unsigned int i = 0;
- for(i = 1; i < tagn; i++){
- if(strcmp(tag, tags[i].name) == 0){
- return i;
- }
- }
- return 0;
- }
- /*----------------------------------------------------------------------------*/
- unsigned int checkerrors()
- /* check for the 9 errors listed below
- 1 "<html> not first tag of document"
- 2 "</html> not last tag of document"
- 3 "<head> not first tag of html block"
- 4 "</body> not last tag of html block"
- 5 "html block does not contain exactly 1 head block"
- 6 "html block does not contain exactly 1 body block"
- 7 "tag used outside of valid block on line Z"
- 8 "closing </img> found on line Z"
- 9 "tags not properly closed; expecting </X>, got </Y> on line %u"
- */
- {
- int sum = sumopentags();
- if(tags[whichtag("html")].opened < 1 && (sum == 1 || (sum == 0 && tags[currenttag].opened > 0 && currenttag != 0))) return 1;
- if(tags[whichtag("html")].closed > 0 && (sum > 0 || (sum == 0 && tags[currenttag].closing != 1))) return 2;
- if(tags[whichtag("head")].opened < 1 && (sum > 1 || (sum > 0 && tags[currenttag].closing != 1)) && currenttag != 0) return 3;
- if(tags[whichtag("body")].closed > 0 && tags[whichtag("html")].closed < 1 &&
- (sum > 1 || (sum == 1 && tags[currenttag].closing != 1))) return 4;
- if(tags[whichtag("head")].opened > 1 || (tags[whichtag("html")].closed >= 1 && tags[whichtag("head")].opened < 1)) return 5;
- if(tags[whichtag("head")].opened > 1 || (tags[whichtag("html")].closed >= 1 && tags[whichtag("head")].opened < 1)) return 6;
- //special case for <li>
- if(currenttag == whichtag("li") && (tags[whichtag("ol")].opened - tags[whichtag("ol")].closed) == 0
- && (tags[whichtag("ul")].opened - tags[whichtag("ul")].closed) == 0) return 7;
- if((tags[currenttag].opened - tags[currenttag].closed) >= 1 && currenttag != whichtag("li")){
- if(tags[tags[currenttag].depend].opened < 1 ||
- (tags[tags[currenttag].depend].opened - tags[tags[currenttag].depend].closed) == 0)
- return 7;
- }else if (tags[currenttag].closing != 1 && currenttag != whichtag("li")){
- if(tags[tags[currenttag].depend].opened < 1 ||
- ((tags[tags[currenttag].depend].opened - tags[tags[currenttag].depend].closed) == 0
- && tags[tags[currenttag].depend].closing == 1))
- return 7;
- }
- if(tags[whichtag("img")].closed > tags[whichtag("img")].opened) return 8;
- if((tags[lasttag].opened - tags[lasttag].closed) >= 1 &&
- (tags[currenttag].opened - tags[currenttag].closed) == 0 &&
- tags[currenttag].closing == 1)
- return 9;
- //no errors
- return 0;
- }
- /*----------------------------------------------------------------------------*/
- void printerror(int errnum)
- /* prints the corresponding error string to stderr */
- {
- switch(errnum){
- case 1 : fprintf(stderr, "<html> not first tag of document\n"); break;
- case 2 : fprintf(stderr, "</html> not last tag of document\n"); break;
- case 3 : fprintf(stderr, "<head> not first tag of html block\n"); break;
- case 4 : fprintf(stderr, "</body> not last tag of html block\n"); break;
- case 5 : fprintf(stderr, "html block does not contain exactly 1 head block\n"); break;
- case 6 : fprintf(stderr, "html block does not contain exactly 1 body block\n"); break;
- case 7 : fprintf(stderr, "tag used outside of valid block on line %u\n", linen); break;
- case 8 : fprintf(stderr, "closing </img> found on line %u\n", linen); break;
- case 9 : fprintf(stderr, "tags not properly closed; expecting </%s>, got </%s> on line %u\n",
- realname2, realname1, linen); break;
- }
- }
- /*----------------------------------------------------------------------------*/
- void lowercase(char * string, unsigned int len)
- /* makes all the letters in a string lowercase */
- {
- unsigned int i = 0;
- for(i = 0; i < len; i++)
- if(string[i] <= 90 && string[i] >= 65)
- string[i] += 32;
- }
- /*----------------------------------------------------------------------------*/
- char * gettag(char * tagstr, unsigned int len)
- /* returns the tag name, e.g. "body" from <BODY background....> , retains original
- case of the tag name in realname1 and realname2*/
- {
- /* extract the first word*/
- tempstr = (char*)calloc(len, sizeof(char));
- memset(tempstr, 0, len);
- strncpy(tempstr, tagstr, (int)(strchrnul(tagstr, ' ') - tagstr));
- strcpy(realname2, realname1);
- strncpy(realname1, tempstr, MAXTAGNAMESIZE);
- lowercase(tempstr, len);
- return tempstr;
- }
- /*----------------------------------------------------------------------------*/
- int parseline(char * string, unsigned int len)
- /* parses a single line */
- {
- unsigned int n = 0, start = 0;
- tagstr = (char*)calloc(len, sizeof(char));
- for(n = 0; n < len; n++){
- memset(tagstr, 0, len);
- if(string[n] == '<'){
- start = n;
- }else if(string[n] == '>'){
- /* parse the tag */
- /* record which tag was opened */
- strncpy(tagstr, &(string[start+1]), (n-start-1)*sizeof(char));
- if(tagstr[0] == '/'){
- currenttag = whichtag(gettag(&tagstr[1], (n-start)));
- if(currenttag > 0 && tags[currenttag].closing == 1){
- tags[currenttag].closed += 1;
- }
- }else{
- currenttag = whichtag(gettag(tagstr, (n-start)));
- if(currenttag > 0){
- tags[currenttag].opened += 1;
- /* close it too if closing not required */
- if(tags[currenttag].closing != 1) tags[currenttag].closed += 1;
- }
- }
- /* check for errors, print and return if found */
- error = checkerrors();
- if(error != 0) return 1;
- }
- if(currenttag != 0) lasttag = currenttag;
- }
- return 0;
- }
- /*----------------------------------------------------------------------------*/
- void free_all()
- /* frees all of the pointers and files */
- {
- if (string != NULL) free(string);
- if (tempstr != NULL) free(tempstr);
- if (tagstr != NULL) free(tagstr);
- if (filep != NULL) fclose(filep);
- if (tagfile != NULL) fclose(tagfile);
- if (tags != NULL) free(tags);
- }
- /*----------------------------------------------------------------------------*/
- int main(int argc, char * argv[])
- {
- /* open files, check for file errors */
- if(argc != 2){
- fprintf(stderr,"Wrong number of arguments\n");
- return 1;
- }
- filep = fopen(argv[1], "r");
- if(filep == NULL){
- fprintf(stderr, "ERROR %s not found\n", argv[1]);
- free_all();
- return EXIT_FAILURE;
- }
- tagfile = fopen(TAGFILENAME, "r");
- if(tagfile == NULL){
- fprintf(stderr, "ERROR %s not found\n", TAGFILENAME);
- free_all();
- return EXIT_FAILURE;
- }
- unsigned int n = 0;
- /* set up tagtype, match up index numbers with tags.txt line numbers */
- tags = (tagtype*) calloc(TAGSTOCHECK, sizeof(tagtype));
- tags[0].opened = 1; //<html> depends on this to be open
- tagn++;
- /* read in the tag file and store their properties into tagtype*/
- do{
- string = (char*) realloc(string, ++n*sizeof(char));
- string[n-1] = fgetc(tagfile);
- if(string[n-1] == '\n'){
- tagn++;
- sscanf(string, "%s %hu %hu", tags[tagn-1].name, &tags[tagn-1].closing,
- &tags[tagn-1].depend);
- string = NULL;
- n = 0;
- }
- }while(!feof(tagfile));
- /* read html and parse */
- do{
- string = (char*) realloc(string, ++n*sizeof(char));
- string[n-1] = fgetc(filep);
- if(string[n-1] == '\n'){
- linen++;
- if(parseline(string, n) == 1) break;
- string = NULL;
- n = 0;
- }
- }while(!feof(filep));
- /* evaluate */
- if (currenttag != 1 && error == 0) error = 2; //if eof and </html> not found
- if (error != 0){
- printf("Verified. Page is invalid.\n");
- fprintf(stderr, "REASON ");
- printerror(error);
- }
- else printf("Verified. Page is valid.\n");
- /* finish */
- free_all();
- return EXIT_SUCCESS;
- }
Add Comment
Please, Sign In to add comment