Guest User

Untitled

a guest
Jun 21st, 2018
75
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.56 KB | None | 0 0
  1.  
  2. #include <iostream>
  3. #include <fstream>
  4. #include <string>
  5.  
  6. using namespace std;
  7.  
  8. void remove_header(string &line);
  9. void remove_footer(string &line);
  10. bool contains_func(string line);
  11. int find_pos(string line);
  12.  
  13. /*Reads a HTML document and removes all HTML elements and outputs a text file with plain text*/
  14.  
  15. int main()
  16. {
  17. string line;
  18. ifstream htmlStream("test_file.html");
  19. ofstream output("output.txt");
  20. if (htmlStream.is_open())
  21. {
  22. while ( htmlStream.good() )
  23. {
  24. getline (htmlStream,line);
  25.  
  26. if (contains_func(line)== true)
  27. {
  28. remove_header(line);
  29. }
  30.  
  31. if (contains_func(line)== true)
  32. {
  33. remove_footer(line);
  34. }
  35.  
  36. if (output.is_open())
  37. {
  38. output<<line<<"\n";
  39. }
  40. }
  41. }
  42. htmlStream.close();
  43. output.close();
  44. return 0;
  45. }
  46.  
  47.  
  48. /**
  49. *Takes in a string and removes the start tag of the html element ex. <html>
  50. @param &line A line of code taken from a HTML file
  51. */
  52. void remove_header(string &line)
  53. {
  54. int end_pos=0;
  55. int start_pos = find_pos(line);
  56.  
  57. if (start_pos == 0)
  58. {
  59. for (int j=0;j<line.length();j++)
  60. {
  61. if (line[j]!='>')
  62. {
  63. end_pos++;
  64. }
  65. else
  66. {
  67. end_pos++;
  68. break;
  69. }
  70. }
  71. }
  72. // if HTML element desginates a list add a dash instead of just removing the element
  73. if (line.substr(start_pos,end_pos) == "<li>")
  74. {
  75. line.replace(start_pos,end_pos,"- ");
  76. }
  77. else
  78. line.erase(start_pos,end_pos);
  79. }
  80.  
  81. /**
  82. *Takes in a string and removes the end tag of the html element ex. </html>
  83. @param &line A line of code taken from a HTML file
  84. */
  85. void remove_footer(string &line)
  86. {
  87. int end_pos=0;
  88. int start_pos = find_pos(line);
  89. if (start_pos > 0)
  90. {
  91. for (int j=0;j<line.length();j++)
  92. {
  93. if (line[j]!='>')
  94. {
  95. end_pos++;
  96. }
  97. else
  98. {
  99. end_pos++;
  100. break;
  101. }
  102. }
  103. }
  104. line.erase(start_pos,end_pos);
  105. }
  106.  
  107. /**
  108. *Takes in a string and finds the position where the HTML element begins
  109. @param line A line of code taken from a HTML file
  110. @return The position where the HTML element starts
  111. */
  112. int find_pos(string line)
  113. {
  114. for (int i = 0;i<line.length();i++)
  115. {
  116. if(line[i] == '<')
  117. {
  118. return i;
  119. }
  120. }
  121. }
  122.  
  123. /**
  124. *Determines if the string contains an HTML element
  125. *Used to determine if remove functions should be implemented
  126. @param line A line of code taken from a HTML file
  127. @return true If the string contains an HTML element ex. '<html>'
  128. */
  129. bool contains_func(string line)
  130. {
  131. for (int i = 0;i<line.length();i++)
  132. {
  133. while(line[i] != '<')
  134. {
  135. break;
  136. }
  137.  
  138. if (line[i] == '<')
  139. {
  140. return true;
  141. }
  142. }
  143. }
Add Comment
Please, Sign In to add comment