Advertisement
Guest User

xchk

a guest
Jun 11th, 2013
272
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
C 8.25 KB | None | 0 0
  1. /* XCHK: Simple XML Tag matcher.
  2. * Invokation: xchk [-R] filename
  3. * Given some input file with XML code, XCHK will report to the user if there is an error.
  4. * If the user invokes XCHK using -R, it will output a reformatted version of the input to stdout.
  5. */
  6.  
  7. #include <stdio.h>
  8. #include <string.h>
  9. #include <ctype.h>
  10. #include <stdlib.h>
  11.  
  12. #define MAX_TEXT_LEN 100
  13. #define INDENT_INCREMENT 4
  14.  
  15. /*Item types:
  16. * Text: Ordinary, non-tag text.
  17. * StartTag: A tag in the form <tagname ...>
  18. * EndTag: A tag in the form </tagname ...>
  19. * EmptyElement: A tag in the form <tagname ... />
  20. * EndOfInput: EOF
  21. */
  22. typedef enum{Text, StartTag, EndTag, EmptyElement, EndOfInput}ItemType;
  23. char *g_fileName;
  24. FILE *g_inputFile;
  25. int g_lineNumber;
  26. int g_createOutput;
  27. char g_savedChar;
  28.  
  29. void PrintSpaces(int n);
  30. char ReadNormalizedChar(void);
  31. void UnreadChar(char ch);
  32. void ReadTagName(char *tagName, char ch, int *tagNameCount, int *doneReadingName);
  33. ItemType ReadItem(char *text, char *tagName);
  34. void MatchUntilEndTag(char *tagName, int nestingDepth);
  35. void MatchUntilEof(void);
  36.  
  37. /****************/
  38.  
  39. /* PrintSpaces
  40. * Prints the number of specified spaces to stdout for the -R flag.
  41. * input: The number of spaces to be print
  42. */
  43. void PrintSpaces(int n){
  44.   int i;
  45.   for(i = 0; i < n; i++){
  46.     fprintf(stdout," ");
  47.   }
  48. }
  49.  
  50. /* ReadNormalizedChar
  51. * Takes the output from fgetc(g_inputFile) and returns the appropriate char.
  52. * Whenever a whitespace is encountered, a single space is returned; that is, no consecutive spaces are returned, and no non-space whitespaces are parsed.
  53. * If EOF, returns a null byte.
  54. * Outputs: The current char from fgetc(g_inputFile).
  55. */
  56. char ReadNormalizedChar(void){
  57.   char ch;
  58.   static int readingWhitespace;
  59.  
  60. /*If there is a saved char from UnreadChar, use that
  61. * Otherwise, use fgetc.*/
  62.   if(g_savedChar){
  63.     ch = g_savedChar;
  64.     g_savedChar = '\0';
  65.   }
  66.   else{
  67.     ch = fgetc(g_inputFile);
  68.   }
  69. /*Handling spaces*/
  70.   if(isspace(ch)){
  71.     if(ch == '\n'){
  72.       g_lineNumber++;
  73.     }
  74.     if(!readingWhitespace){    
  75.       readingWhitespace = 1;
  76.       return ' ';
  77.     }
  78.     while(readingWhitespace){
  79.       ch = fgetc(g_inputFile);
  80.       if(ch == '\n'){
  81.     g_lineNumber++;
  82.       }
  83.       if(!isspace(ch)){
  84.     readingWhitespace = 0;
  85.       }
  86.     }
  87.   }
  88. /*Handle EOF*/
  89.   if(ch == EOF){
  90.     return '\0';
  91.   }
  92.   return ch;
  93. }
  94.  
  95. /* UnreadChar
  96. * Reverts the current value of ch to a previous, saved value.
  97. * Used in ReadItem to assist in reading tag items.
  98. * Outputs: Saves ch to g_savedChar.
  99. */
  100. void UnreadChar(char ch){
  101.   if(g_savedChar){
  102.     fprintf(stderr,"Can\'t unread character twice.\n");
  103.     exit(1);
  104.   }
  105.   g_savedChar = ch;
  106. }
  107.  
  108. /* ReadTagName
  109. * When a tag is detected, finds the name of the tag in the form <name ... /> or </name ...> etc.
  110. * ReadTagName terminates when a space is detected.
  111. * Tag names which do not start with a letter return an error, and tag names which contain illegal characters also return an error.
  112. * Outputs: The tag name to the char* tagName.
  113. */
  114. void ReadTagName(char *tagName, char ch, int *tagNameCount, int *doneReadingName){
  115.   if(*tagNameCount > MAX_TEXT_LEN){
  116.     fprintf(stderr,"%s: %d: Tag name exceeded MAX_TEXT_LEN of %d characters.\n",g_fileName,g_lineNumber,MAX_TEXT_LEN);
  117.   }
  118.   if(*tagNameCount == 0){
  119.     if(isalpha(ch)){
  120.       tagName[*tagNameCount] = ch;
  121.     }
  122.     else if(ch == '/'){
  123.       return;
  124.     }
  125.     else{
  126.       fprintf(stderr,"%s: %d: Tag name must start with a letter.\n",g_fileName,g_lineNumber);
  127.       exit(1);
  128.     }
  129.   }
  130.   if(*tagNameCount > 0){
  131.     if(isalnum(ch)){
  132.       tagName[*tagNameCount] = ch;
  133.     }
  134.     else if(ch == '>' || ch == '/' || isspace(ch)){
  135.       *doneReadingName = 1;
  136.       tagName[*tagNameCount] = '\0';
  137.       return;
  138.     }
  139.     else{
  140.       fprintf(stderr,"%s: %d: Tag name contains illegal symbol.\n",g_fileName, g_lineNumber);
  141.       exit(1);
  142.     }
  143.   }
  144.   *tagNameCount = *tagNameCount + 1;
  145. }
  146.  
  147. /* ReadItem
  148. * Repeatedly calls ReadNormalizedChar and determines the type of item.
  149. * If a tag is encountered it calls ReadTagName to determine the tag name.
  150. * All results are stored in char *text and char *tagName.
  151. * Outputs: The type of item encountered.
  152. */
  153. ItemType ReadItem(char *text, char *tagName){
  154.   char ch;
  155.   int i;
  156.   /*Flags*/
  157.   int wasTag = 0, slashAtStart = 0, slashAtEnd = 0, inputEndFound = 0;
  158.   int tagNameCount = 0;
  159.   int doneReadingName = 0;
  160.  
  161.   for(i = 0; i < MAX_TEXT_LEN; i++){
  162.     ch = ReadNormalizedChar();
  163.     /*Read the tag name*/
  164.     if(wasTag && !doneReadingName){
  165.       ReadTagName(tagName, ch, &tagNameCount, &doneReadingName);
  166.     }
  167.     /*Deciding if the text being read is a tag or not.*/
  168.     if(ch == '<'){
  169.       if(i == 0){
  170.     wasTag = 1;
  171.       }
  172.       /*Find dangling < characters*/
  173.       else{
  174.     if(wasTag){
  175.       fprintf(stderr,"%s: %d: Unexpected < character.\n",g_fileName, g_lineNumber);
  176.     }
  177.     /*A tag was encountered, so the item type has changed and we are done reading this chunk of input.*/
  178.     else{
  179.       UnreadChar(ch);
  180.       text[i] = '\0';
  181.       break;
  182.     }
  183.       }
  184.     }
  185.     /*Deciding if a > signifies an endtag or an empty element tag.*/
  186.     if(ch == '/'){
  187.       if(wasTag){
  188.     if(text[i-1] == '<'){
  189.       slashAtStart = 1;
  190.     }
  191.       }
  192.     }
  193.     if(ch == '>'){
  194.       if(wasTag){  
  195.     if(text[i-1] == '/'){
  196.       slashAtEnd = 1;
  197.     }
  198.     text[i] = ch;
  199.     text[i+1] = '\0';
  200.     break;
  201.       }
  202.       else{
  203.     fprintf(stderr,"%s: %d: Unexpected > character.\n",g_fileName, g_lineNumber);
  204.     exit(1);
  205.       }
  206.     }
  207.     /* Found EOF */
  208.     if(ch == '\0'){
  209.       inputEndFound = 1;
  210.       break;
  211.     }
  212.   }
  213.   /* Deciding return value based off set flags*/
  214.   if(wasTag){
  215.     printf("Tag name: %s\n",tagName);
  216.     if(slashAtStart){
  217.       return EndTag;
  218.     }
  219.     else if(slashAtEnd){
  220.       return EmptyElement;
  221.     }
  222.     else if(slashAtStart && slashAtEnd){
  223.       fprintf(stderr,"%s: %d: Malformed tag.\n",g_fileName, g_lineNumber);
  224.     }
  225.     else{
  226.       return StartTag;
  227.     }
  228.   }
  229.   if(inputEndFound){
  230.      return EndOfInput;
  231.   }
  232.    return Text;
  233. }
  234.  
  235. /* MatchUntilEndTag
  236. * Finds tags via ReadItem until a matching tag is encountered. If no matching tag is encountered, then an error is returned.
  237. * Calls itself recursively whenever ReadItem finds a StartTag item.
  238. * Outputs: None.
  239. */
  240. void MatchUntilEndTag(char *tagName, int nestingDepth){
  241.   ItemType item;
  242.   char text[MAX_TEXT_LEN+1];
  243.   char nextTagName[MAX_TEXT_LEN+1];
  244.   int tagLineNumber = 0;
  245.   while(1){
  246.     item = ReadItem(text, nextTagName);
  247.     if(item == EndTag && strncmp(nextTagName, tagName, MAX_TEXT_LEN) == 0){
  248.       if(g_createOutput){
  249.     PrintSpaces(nestingDepth*INDENT_INCREMENT);
  250.     fprintf(stdout,"%s",text);
  251.       }
  252.        break;
  253.     }
  254.     if(g_createOutput){
  255.       PrintSpaces((nestingDepth+1)*INDENT_INCREMENT);
  256.       fprintf(stdout,"%s",text);
  257.     }
  258.     if(item == StartTag){
  259.       MatchUntilEndTag(nextTagName, nestingDepth+1);
  260.     }
  261.     if(item == EndOfInput){
  262.       fprintf(stderr,"%s: %d: Unmatched tag.\n",g_fileName, g_lineNumber);
  263.       exit(1);
  264.     }
  265.   }
  266. }
  267.  
  268. /* MatchUntilEof
  269. * Matches items until an EndOfInput item is returned from ReadItem.
  270. * If a StartTag is encountered, it calls MatchUntilEndTag.
  271. * Outputs: None
  272. */
  273. void MatchUntilEof(void){
  274.     ItemType item;
  275.     char text[MAX_TEXT_LEN+1];
  276.     char tagName[MAX_TEXT_LEN+1];
  277.     while((item = ReadItem(text, tagName)) != EndOfInput){
  278.       if(g_createOutput){
  279.     fprintf(stdout,"%s",text);
  280.       }
  281.       if(item == StartTag){
  282.     MatchUntilEndTag(tagName, 0);
  283.       }
  284.    }
  285. }
  286.  
  287. /****************/
  288.  
  289. /* Main
  290. * Handles file operations and input values.
  291. * Proper XCHK invocation: ./xchk [-R] filename
  292. */
  293. int main(int argc, char* argv[]){
  294.     if(argc == 3){
  295.       if(strncmp(argv[1], "-R", 2)!=0){
  296.             fprintf(stderr,"%s: Expected -R as second parameter.\n",argv[0]);
  297.             return -1;
  298.     }
  299.     g_fileName = argv[2];
  300.     g_createOutput = 1;
  301.     }
  302.     else if(argc == 2){
  303.       g_fileName = argv[1];
  304.     }
  305.     else{
  306.       fprintf(stderr,"%s: Improper invocation. Try %s [-R] g_filename \n",argv[0], argv[0]);
  307.       return -1;
  308.     }
  309.     g_inputFile = fopen(g_fileName, "r");
  310.     if(g_inputFile == NULL){
  311.       fprintf(stderr, "%s: Could not open %s.\n",argv[0],g_fileName);
  312.       return -1;
  313.     }
  314.     MatchUntilEof();
  315.     fclose(g_inputFile);
  316.     return 0;
  317. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement