Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- /* XCHK: Simple XML Tag matcher.
- * Invokation: xchk [-R] filename
- * Given some input file with XML code, XCHK will report to the user if there is an error.
- * If the user invokes XCHK using -R, it will output a reformatted version of the input to stdout.
- */
- #include <stdio.h>
- #include <string.h>
- #include <ctype.h>
- #include <stdlib.h>
- #define MAX_TEXT_LEN 100
- #define INDENT_INCREMENT 4
- /*Item types:
- * Text: Ordinary, non-tag text.
- * StartTag: A tag in the form <tagname ...>
- * EndTag: A tag in the form </tagname ...>
- * EmptyElement: A tag in the form <tagname ... />
- * EndOfInput: EOF
- */
- typedef enum{Text, StartTag, EndTag, EmptyElement, EndOfInput}ItemType;
- char *g_fileName;
- FILE *g_inputFile;
- int g_lineNumber;
- int g_createOutput;
- char g_savedChar;
- void PrintSpaces(int n);
- char ReadNormalizedChar(void);
- void UnreadChar(char ch);
- void ReadTagName(char *tagName, char ch, int *tagNameCount, int *doneReadingName);
- ItemType ReadItem(char *text, char *tagName);
- void MatchUntilEndTag(char *tagName, int nestingDepth);
- void MatchUntilEof(void);
- /****************/
- /* PrintSpaces
- * Prints the number of specified spaces to stdout for the -R flag.
- * input: The number of spaces to be print
- */
- void PrintSpaces(int n){
- int i;
- for(i = 0; i < n; i++){
- fprintf(stdout," ");
- }
- }
- /* ReadNormalizedChar
- * Takes the output from fgetc(g_inputFile) and returns the appropriate char.
- * Whenever a whitespace is encountered, a single space is returned; that is, no consecutive spaces are returned, and no non-space whitespaces are parsed.
- * If EOF, returns a null byte.
- * Outputs: The current char from fgetc(g_inputFile).
- */
- char ReadNormalizedChar(void){
- char ch;
- static int readingWhitespace;
- /*If there is a saved char from UnreadChar, use that
- * Otherwise, use fgetc.*/
- if(g_savedChar){
- ch = g_savedChar;
- g_savedChar = '\0';
- }
- else{
- ch = fgetc(g_inputFile);
- }
- /*Handling spaces*/
- if(isspace(ch)){
- if(ch == '\n'){
- g_lineNumber++;
- }
- if(!readingWhitespace){
- readingWhitespace = 1;
- return ' ';
- }
- while(readingWhitespace){
- ch = fgetc(g_inputFile);
- if(ch == '\n'){
- g_lineNumber++;
- }
- if(!isspace(ch)){
- readingWhitespace = 0;
- }
- }
- }
- /*Handle EOF*/
- if(ch == EOF){
- return '\0';
- }
- return ch;
- }
- /* UnreadChar
- * Reverts the current value of ch to a previous, saved value.
- * Used in ReadItem to assist in reading tag items.
- * Outputs: Saves ch to g_savedChar.
- */
- void UnreadChar(char ch){
- if(g_savedChar){
- fprintf(stderr,"Can\'t unread character twice.\n");
- exit(1);
- }
- g_savedChar = ch;
- }
- /* ReadTagName
- * When a tag is detected, finds the name of the tag in the form <name ... /> or </name ...> etc.
- * ReadTagName terminates when a space is detected.
- * Tag names which do not start with a letter return an error, and tag names which contain illegal characters also return an error.
- * Outputs: The tag name to the char* tagName.
- */
- void ReadTagName(char *tagName, char ch, int *tagNameCount, int *doneReadingName){
- if(*tagNameCount > MAX_TEXT_LEN){
- fprintf(stderr,"%s: %d: Tag name exceeded MAX_TEXT_LEN of %d characters.\n",g_fileName,g_lineNumber,MAX_TEXT_LEN);
- }
- if(*tagNameCount == 0){
- if(isalpha(ch)){
- tagName[*tagNameCount] = ch;
- }
- else if(ch == '/'){
- return;
- }
- else{
- fprintf(stderr,"%s: %d: Tag name must start with a letter.\n",g_fileName,g_lineNumber);
- exit(1);
- }
- }
- if(*tagNameCount > 0){
- if(isalnum(ch)){
- tagName[*tagNameCount] = ch;
- }
- else if(ch == '>' || ch == '/' || isspace(ch)){
- *doneReadingName = 1;
- tagName[*tagNameCount] = '\0';
- return;
- }
- else{
- fprintf(stderr,"%s: %d: Tag name contains illegal symbol.\n",g_fileName, g_lineNumber);
- exit(1);
- }
- }
- *tagNameCount = *tagNameCount + 1;
- }
- /* ReadItem
- * Repeatedly calls ReadNormalizedChar and determines the type of item.
- * If a tag is encountered it calls ReadTagName to determine the tag name.
- * All results are stored in char *text and char *tagName.
- * Outputs: The type of item encountered.
- */
- ItemType ReadItem(char *text, char *tagName){
- char ch;
- int i;
- /*Flags*/
- int wasTag = 0, slashAtStart = 0, slashAtEnd = 0, inputEndFound = 0;
- int tagNameCount = 0;
- int doneReadingName = 0;
- for(i = 0; i < MAX_TEXT_LEN; i++){
- ch = ReadNormalizedChar();
- /*Read the tag name*/
- if(wasTag && !doneReadingName){
- ReadTagName(tagName, ch, &tagNameCount, &doneReadingName);
- }
- /*Deciding if the text being read is a tag or not.*/
- if(ch == '<'){
- if(i == 0){
- wasTag = 1;
- }
- /*Find dangling < characters*/
- else{
- if(wasTag){
- fprintf(stderr,"%s: %d: Unexpected < character.\n",g_fileName, g_lineNumber);
- }
- /*A tag was encountered, so the item type has changed and we are done reading this chunk of input.*/
- else{
- UnreadChar(ch);
- text[i] = '\0';
- break;
- }
- }
- }
- /*Deciding if a > signifies an endtag or an empty element tag.*/
- if(ch == '/'){
- if(wasTag){
- if(text[i-1] == '<'){
- slashAtStart = 1;
- }
- }
- }
- if(ch == '>'){
- if(wasTag){
- if(text[i-1] == '/'){
- slashAtEnd = 1;
- }
- text[i] = ch;
- text[i+1] = '\0';
- break;
- }
- else{
- fprintf(stderr,"%s: %d: Unexpected > character.\n",g_fileName, g_lineNumber);
- exit(1);
- }
- }
- /* Found EOF */
- if(ch == '\0'){
- inputEndFound = 1;
- break;
- }
- }
- /* Deciding return value based off set flags*/
- if(wasTag){
- printf("Tag name: %s\n",tagName);
- if(slashAtStart){
- return EndTag;
- }
- else if(slashAtEnd){
- return EmptyElement;
- }
- else if(slashAtStart && slashAtEnd){
- fprintf(stderr,"%s: %d: Malformed tag.\n",g_fileName, g_lineNumber);
- }
- else{
- return StartTag;
- }
- }
- if(inputEndFound){
- return EndOfInput;
- }
- return Text;
- }
- /* MatchUntilEndTag
- * Finds tags via ReadItem until a matching tag is encountered. If no matching tag is encountered, then an error is returned.
- * Calls itself recursively whenever ReadItem finds a StartTag item.
- * Outputs: None.
- */
- void MatchUntilEndTag(char *tagName, int nestingDepth){
- ItemType item;
- char text[MAX_TEXT_LEN+1];
- char nextTagName[MAX_TEXT_LEN+1];
- int tagLineNumber = 0;
- while(1){
- item = ReadItem(text, nextTagName);
- if(item == EndTag && strncmp(nextTagName, tagName, MAX_TEXT_LEN) == 0){
- if(g_createOutput){
- PrintSpaces(nestingDepth*INDENT_INCREMENT);
- fprintf(stdout,"%s",text);
- }
- break;
- }
- if(g_createOutput){
- PrintSpaces((nestingDepth+1)*INDENT_INCREMENT);
- fprintf(stdout,"%s",text);
- }
- if(item == StartTag){
- MatchUntilEndTag(nextTagName, nestingDepth+1);
- }
- if(item == EndOfInput){
- fprintf(stderr,"%s: %d: Unmatched tag.\n",g_fileName, g_lineNumber);
- exit(1);
- }
- }
- }
- /* MatchUntilEof
- * Matches items until an EndOfInput item is returned from ReadItem.
- * If a StartTag is encountered, it calls MatchUntilEndTag.
- * Outputs: None
- */
- void MatchUntilEof(void){
- ItemType item;
- char text[MAX_TEXT_LEN+1];
- char tagName[MAX_TEXT_LEN+1];
- while((item = ReadItem(text, tagName)) != EndOfInput){
- if(g_createOutput){
- fprintf(stdout,"%s",text);
- }
- if(item == StartTag){
- MatchUntilEndTag(tagName, 0);
- }
- }
- }
- /****************/
- /* Main
- * Handles file operations and input values.
- * Proper XCHK invocation: ./xchk [-R] filename
- */
- int main(int argc, char* argv[]){
- if(argc == 3){
- if(strncmp(argv[1], "-R", 2)!=0){
- fprintf(stderr,"%s: Expected -R as second parameter.\n",argv[0]);
- return -1;
- }
- g_fileName = argv[2];
- g_createOutput = 1;
- }
- else if(argc == 2){
- g_fileName = argv[1];
- }
- else{
- fprintf(stderr,"%s: Improper invocation. Try %s [-R] g_filename \n",argv[0], argv[0]);
- return -1;
- }
- g_inputFile = fopen(g_fileName, "r");
- if(g_inputFile == NULL){
- fprintf(stderr, "%s: Could not open %s.\n",argv[0],g_fileName);
- return -1;
- }
- MatchUntilEof();
- fclose(g_inputFile);
- return 0;
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement