Advertisement
Guest User

Untitled

a guest
Oct 13th, 2015
80
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 14.98 KB | None | 0 0
  1. /*
  2. * Author: James Walter Erardi
  3. * University of Massachusetts Lowell
  4. *
  5. * James_Erardi@student.uml.edu
  6. * © UMass Lowell 2015
  7. *
  8. * Created on September 8, 2015, 10:15 AM
  9. * Edited on September 11, 2015, 4:06PM
  10. * Edited on September 22, 2015, 4:00PM
  11. * Edited on September 24, 2015, 4:33PM
  12. * Edited on September 25, 2015, 11:38AM
  13. * Edited on October 1st, 2015, 11:00AM
  14. */
  15.  
  16.  
  17. #include <cstdlib> //the netbeans version of stdlib
  18. #include <iostream> //required lib for input/output stream functions
  19. #include <cstring> //required lib for c-strings
  20. #include <string> //required lib for strings
  21. #include <fstream> //required lib for reading in files
  22. #include <vector> //required lib for STL vector
  23. #include <algorithm> //supplies a few useful algorithms
  24. #include "Element.h" //header file containing functions
  25. #include <stack> //required lib for STL stack
  26. #include "Attribute.h" //header file containing attribute functions
  27.  
  28.  
  29. using namespace std; //standard namespace operation scope, so we don't need to put std:: before everything
  30.  
  31. //add a new child element
  32. void elements::addChild(elements* child){
  33. children.push_back(child);
  34.  
  35. }
  36.  
  37.  
  38.  
  39. /**
  40. * This function is used during debugging to display the parser state.
  41. * @param ps the parser state
  42. * Code provided by Professor Jesse M Heines
  43. */
  44. //void ShowState(ParserState ps) {
  45. // cout << "ParserState = ";
  46. // switch (ps) {
  47. // case UNKNOWN: cout << "UNKNOWN"; break;
  48. // case STARTING_DOCUMENT: cout << "STARTING_DOCUMENT"; break;
  49. // case DIRECTIVE: cout << "DIRECTIVE"; break;
  50. // case ELEMENT_OPENING_TAG: cout << "ELEMENT_OPENING_TAG"; break;
  51. // case ELEMENT_CONTENT: cout << "ELEMENT_CONTENT"; break;
  52. // case ELEMENT_NAME_AND_CONTENT: cout << "ELEMENT_CONTENT"; break;
  53. // case ELEMENT_CLOSING_TAG: cout << "ELEMENT_CLOSING_TAG"; break;
  54. // case SELF_CLOSING_TAG: cout << "SELF_CLOSING_TAG"; break;
  55. // case STARTING_COMMENT: cout << "STARTING_COMMENT"; break;
  56. // case IN_COMMENT: cout << "IN_COMMENT"; break;
  57. // case ENDING_COMMENT: cout << "ENDING_COMMENT"; break;
  58. // case ONE_LINE_COMMENT: cout << "ONE_LINE_COMMENT"; break;
  59. // case ERROR: cout << "ERROR"; break;
  60. // default: cout << "UNKNOWN"; break;
  61. // }
  62. // cout << endl;
  63. //}
  64.  
  65.  
  66.  
  67.  
  68. /*
  69. * printVector function takes in a vector and prints it using
  70. * string iterators.
  71. *
  72. */
  73. void printVector(vector<elements> vec)
  74. {
  75. vector<elements>::iterator it;
  76.  
  77.  
  78.  
  79. for (it = vec.begin(); it != vec.end(); ++it)
  80. {
  81. cout << it->get_Line() << ' ' << it->get_Name();
  82. cout << '\n';
  83. }
  84.  
  85. }
  86.  
  87. /*
  88. * This function handles almost all of assignment three's parsing.
  89. * I know this function is way, way, way too big. I tried making things like the print functions a bit more modular
  90. * but I was having pointer and run time crashes all over the place. I have a Logic Design exam this Friday and I just
  91. * don't really have the free time to fix this up, unfortunately. I understand if points will be deducted, I just figured
  92. * I should admit my programs glaring flaw before I get destroyed because of it.
  93. *
  94. * The function takes in an xml file's name. This was done for testing purposes, so I could implement Professor Heines XML file
  95. * easily in order to test mine the way he tested his.
  96. */
  97. void read_XML(string song_name){
  98. string line; //string variable used for the current line of XML being parsed
  99. ifstream songfile; //ifstream songfile, object used for the XML input file
  100. int intLinenum = 1; //an integer which represents the current line number on the XML file which we're parsing
  101. int startpos = 0, endpos = 0; //two integers which represent the designated starting and ending position
  102. //of the current line of XML which we're working with.
  103. //value of variables dependent on what kind of line it is
  104. int secondOpen; //an integer which I made on the fly to represent the second open brackets position.
  105. //a fairly ghetto fix, would try to re-vamp how I did this if I had the time.
  106. //string tag = "";
  107.  
  108. elements* tag; //element pointer for the current XML file's tag, in order to initiate a new element
  109. vector<elements> song_to_be_parsed; //vector which was used for assignment 2, obsolete for this assignment
  110. stack<elements*> stack_of_elements; //stack of element pointers in which we'll push opening brackets onto
  111.  
  112. vector<elements*> song;
  113.  
  114. //bool to represent if we're on a parent
  115. bool parent = false;
  116. //elements* popped_element; //variable which was created to assign the popped element to. ended up being obsolete.
  117.  
  118.  
  119. //
  120. songfile.open(song_name); //assign ifstream object songfile by the passed in xml file
  121. while (getline(songfile, line))
  122. {
  123. //instantiate's integer startpos and endpos to the location of the current line
  124. //which is being read's open and end bracket
  125. //startpos = line.find("<");
  126. //endpos = line.find(">", startpos);
  127. ////tag = line.substr(startpos + 1, (endpos - startpos) - 1);
  128. //tag = new elements((line.substr(startpos + 1, (endpos - startpos) - 1)), intLinenum);
  129. //cout << "Line:" << line << endl;
  130.  
  131. if (line.find("<!--") != string::npos) //looking comments
  132. {
  133. cout << intLinenum << ": Parser State - Starting comment: " << line << endl; //shows the first part of the comment
  134. intLinenum++; //increment for the starting comment
  135. getline(songfile, line); //gets next line
  136. while (line.find("-->") == string::npos) //searching for the end of the comment
  137. {
  138. intLinenum++; //important to increment the linenum now, because we will use getline() to move to the next line soon
  139. //and we would not count for this complete element if we don't increment an extra time
  140. cout << intLinenum << ": Parser State - Comment: " << line << endl;
  141. getline(songfile, line); //gets next line
  142. }
  143. cout << intLinenum << ": Parser State - Ending comment: " << line << endl << endl;
  144. getline(songfile, line); //gets next line
  145. intLinenum++; //counts the line again
  146. }
  147.  
  148.  
  149. //if loop that attempts to find complete lines, and move past them
  150. if (line.at((line.find("<")) + 1) != '/'){
  151. startpos = line.find("<");
  152. endpos = line.find(">", startpos);
  153.  
  154. //figure out if this is a complete xml line on one line
  155. int temp2 = line.find('/'); //finds the slash, which would indicate a closing tag
  156. if (temp2 != string::npos&&line.at(temp2 - 1) == '<'){
  157. //if this is a child tag, ie parent is true, add to
  158. if (parent){
  159. //song.insert(addChild())
  160. }
  161. cout << intLinenum << ": Parser State - Complete line: " << line << endl;
  162. //force it to go onto the next line
  163. //getline(songfile, line);
  164.  
  165. //cout << "Found complete line:" << line << endl;
  166.  
  167. // boolTwoLines = true;
  168. getline(songfile, line);
  169. //i++;
  170. intLinenum++;
  171. }
  172.  
  173. }
  174.  
  175.  
  176.  
  177. //if loop that finds the opening bracket
  178. secondOpen = line.find("/");
  179. if ((line.at((line.find("<")) + 1) != '/')&&secondOpen==string::npos){
  180.  
  181. startpos = line.find("<");
  182. endpos = line.find(">", startpos + 1);
  183.  
  184.  
  185. //tag = line.substr(startpos + 1, (endpos - startpos) - 1);
  186. tag = new elements(getOpen(line), intLinenum);
  187. //tag = new elements((line.substr(startpos + 1, (endpos - startpos) - 1)), intLinenum);
  188. cout << intLinenum << ": Opening tag is: " << tag->get_Name() << endl;
  189. parent = true;
  190. //pops current open tag into the stack. hopefully, next thing popped in will be the closing tag
  191. stack_of_elements.push(tag);
  192. //intLinenum++;
  193.  
  194. //cout << "Top of the stack is: " << stack_of_elements.top()->get_Name() << endl;
  195.  
  196. //cout << "Opening bracket - Top of stack is: " << stack_of_elements.top()->get_Name() << endl << endl;
  197.  
  198.  
  199. }
  200.  
  201. //if loop to find where the closing brackets are
  202. if (line.at((line.find("<")) + 1) == '/'){
  203. startpos = line.find("/");
  204. endpos = line.find(">", startpos);
  205. tag = new elements((line.substr(startpos + 1, (endpos - startpos) - 1)), intLinenum);
  206. cout << intLinenum << ": Parser State - Closing tag is: " << tag->get_Name() << endl;
  207. //stack_of_elements.push(new elements(tag->get_Name(), tag->get_Line()));
  208. //intLinenum++;
  209.  
  210. //cout << "stack_of_elements.top()->get_Name(): " << stack_of_elements.top()->get_Name() << endl;
  211. //if loop to look if the top of the stack is equal to this closing bracket
  212.  
  213. // cout << "----------------------------------" << endl;
  214. //cout << "Top of the stack is: " << stack_of_elements.top()->get_Name() << endl;
  215. //cout << "This closing bracket is: " << tag->get_Name() << endl;
  216. //cout << "----------------------------------" << endl << endl;
  217.  
  218.  
  219. //if loop which should be a function (afraid of messing things up, though)
  220. //this if loop will check if the top of the stack's name (which is an opening bracket)
  221. //is equal to the current closing bracket's name.
  222. //if it is, we have found a match. print out success and pop the open bracket off the stack.
  223. if (stack_of_elements.top()->get_Name() == tag->get_Name()){
  224.  
  225. //lines for well formed, distinct output
  226. cout << endl << "----------------------------------" << endl;
  227.  
  228. cout << "Found closing bracket for open bracket: " << stack_of_elements.top()->get_Name() << endl;
  229. cout << "The closing bracket is: " << tag->get_Name() << endl;
  230. cout << "Popping the bracket off of the stack." << endl;
  231. stack_of_elements.pop();
  232. cout << "New top of the stack is: " << stack_of_elements.top()->get_Name() << endl;
  233.  
  234. cout << "----------------------------------" << endl << endl;
  235.  
  236. }
  237. //cout << "Closing bracket - Top of stack is: " << stack_of_elements.top()->get_Name() << endl << endl;
  238. }
  239.  
  240. //increment the int which represents the current line of XML after we've finished parsing the line
  241. intLinenum++;
  242.  
  243. //if (line.at((line.find(">"))))
  244. //if (line.at((line.find("<")) + 1) != '/'){
  245. //
  246. //
  247. // //cout << "This line is: " << line << endl;
  248. //
  249. //}
  250. //cout << "start:" << startpos << "end:" << endpos;
  251.  
  252. //instantiate the string tag to a string located between the start pos and end pos, which is the full XML line
  253. //tag = line.substr(startpos, endpos);
  254. //cout << startpos << " tag: " << tag << " " << endpos << endl;
  255.  
  256. //pushes the full XML tags into the vector
  257. //song_to_be_parsed.push_back(elements(tag, i));
  258.  
  259. }
  260. //printVector(song_to_be_parsed);
  261.  
  262. //prints the top of the stack. when all is said and done, the top of the stack should be the XML file directive.
  263. cout << intLinenum << ": Top of the stack is: " << stack_of_elements.top()->get_Name() << endl;
  264.  
  265. if (stack_of_elements.top()->get_Name() == "xml"){
  266. cout << endl << "*****************" << endl << "The top of the stack is the xml directive." << endl <<
  267. "Congratulations, your xml file is well formed and has been parsed correctly."
  268. << endl << "*****************" << endl;
  269. }
  270. //return song_to_be_parsed;
  271. }
  272.  
  273.  
  274. /*function from prior assignment. this is obsolete for this program.*/
  275. /*
  276. vector<elements> parse_vector(vector<elements> song){
  277. //initialize a few new strings for final parsing
  278. string current;
  279. string fixed_string;
  280. string line1;
  281. //integer to represent the lines which contain opening tags
  282. int g = 0;
  283. //ints to represent the start and endpos in the current element being parsed
  284. int startpos = 0, endpos = 0;
  285. cout << "----------------------------------" << endl;
  286. printVector(song);
  287. //initialize a new, final vector for the parsed information
  288. vector<elements> fixed_song;
  289.  
  290. //stack of elements to check for well formedness
  291. stack<elements> stack_tags;
  292.  
  293. //in line iterator declaration to iterate through unparsed vector
  294. for (vector<elements>::iterator it = song.begin(); it != song.end(); ++it)
  295. {
  296. //set line1 string to the current vector element being iterated
  297. line1 = it->get_Name();
  298.  
  299.  
  300. // cout << "itr->get_Name() yields " << it->get_Name() << endl;
  301. //this if loop will ensure we will ONLY be working with opening tags
  302. if (line1.at((line1.find("<")) + 1) != '/'){
  303.  
  304. //increment parsed, open bracket line number
  305. g++;
  306.  
  307. //allows us to work on a new variable string instead of having to access the element inside
  308. //the vector of elements
  309. current = it->get_Name();
  310.  
  311. //initializes startpos and endpos to bracket positions like before
  312. startpos = current.find("<");
  313. endpos = current.find(">");
  314.  
  315.  
  316. // cout << "Current is: " << current << endl;
  317.  
  318. //creates a fixed string initialized to a substring without brackets, using endpos and startpos
  319. //the +1 for startpos increments past the first <, -1 de-increments before the >
  320. fixed_string = current.substr(startpos + 1, (endpos - startpos) - 1);
  321.  
  322.  
  323.  
  324. //printing for testing purposes
  325. //cout << g << " fixed: " << fixed_string << endl;
  326.  
  327. //this if loop applies same logic as before's to remove brackets, but this time to remove spaces
  328. //in order to address the id="..." tags
  329. if (fixed_string.find(" ") != string::npos){
  330. endpos = fixed_string.find(" ");
  331.  
  332. fixed_string = fixed_string.substr(startpos, (endpos - startpos));
  333.  
  334. }
  335. //creates a brand new vector with all parsed information
  336. fixed_song.push_back(elements(fixed_string, g));
  337.  
  338. }
  339. }
  340. return fixed_song;
  341.  
  342.  
  343. }
  344. */
  345.  
  346. /*
  347. * getOpen function takes in the current open bracket line
  348. * it will take the string line and clean up the line, evaluating it based on
  349. * what the line is.
  350. * Tyler Bainbridge helped me with the code/theory of this code for this function.
  351. */
  352.  
  353. string getOpen(string line)
  354. {
  355.  
  356. int startPos = 0, endPos = 0; //two integers which represent the designated starting and ending position
  357. //of the current line of XML which we're working with.
  358. //value of variables dependent on what kind of line it is
  359. string tag1, finalTag;
  360.  
  361. startPos = line.find("<");
  362. endPos = line.find(">", startPos + 1); //goes through the current XML files open line and finds the ending bracket
  363.  
  364. tag1 = line.substr(startPos + 1, (endPos - startPos) - 1); //creates a new string that for only what is in the brackets
  365. startPos = tag1.find("<");
  366. endPos = tag1.find(">", startPos + 1); //sets endPos now to the ending of the new, bracket-less string
  367.  
  368. if (tag1.find(" ", startPos + 1) != string::npos) //checks for spaces in current, bracket-less tag
  369. {
  370. if (ispunct(tag1.at(startPos + 1))) //if theres punctuation, ie it's a self closing tag, skip over this punctuation
  371. {
  372. startPos++; //increment startPos to skip the punctuation
  373. }
  374. endPos = tag1.find(" ", startPos + 1); //sets the endPos to where the first instance of the space is
  375. finalTag = tag1.substr(startPos + 1, (endPos - startPos) - 1); //remove whatever is after the space (XML directive) and stores in final tag
  376. }
  377.  
  378. if (tag1.find(" ", startPos + 1) == string::npos) //if there is no space in the tag
  379. {
  380. if (ispunct(tag1.at(startPos + 1))) //if theres punctuation, ie it's a self closing tag, skip over this punctuation
  381. {
  382. startPos++; //increment startPos to skip the punctuation
  383. }
  384. finalTag = tag1.substr(startPos + 1, (endPos - startPos) - 1); //stores whatever the tag1 is after the punctuation truncation
  385. }
  386.  
  387. return finalTag;
  388. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement