Advertisement
Guest User

Untitled

a guest
Aug 24th, 2019
82
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.84 KB | None | 0 0
  1. from html.parser import HTMLParser
  2. import csv
  3.  
  4. class MLStripper(HTMLParser):
  5. def __init__(self):
  6. self.reset()
  7. self.strict = False
  8. self.convert_charrefs= True
  9. self.fed = []
  10. def handle_data(self, d):
  11. self.fed.append(d)
  12. def get_data(self):
  13. return ''.join(self.fed)
  14.  
  15. def strip_tags(html):
  16. s = MLStripper()
  17. s.feed(html)
  18. text = s.get_data()
  19. return text
  20. counter = 0
  21. with open('MVS_TASKS_ALEX.csv', mode='r', newline='',encoding='utf-8') as MVS_TASKS,\
  22. open('MVS_TASKS_ALEX2.csv', mode='a', newline='',encoding='utf-8') as MVS_TASKS2:
  23. for line in MVS_TASKS:
  24. line = line.replace(""", "")
  25. line = strip_tags(line)
  26. MVS_TASKS2.write(line)
  27. MVS_TASKS.close()
  28. MVS_TASKS2.close()
  29. with open('MVS_TASKS_ALEX2.csv', mode='r', newline='',encoding='utf-8') as MVS_TASKS,\
  30. open('MVS_TASKS_HTMLSTRIP.csv', mode='a', newline='', encoding='utf-8') as MVS_TASKS_HTMLSTRIP:
  31. reader = csv.reader(MVS_TASKS, delimiter = ',', quotechar='"', quoting=csv.QUOTE_ALL, skipinitialspace=True)
  32. for line in reader:
  33. print(line)
  34. MVS_TASKS_HTMLSTRIP.write(line[0] + ", " +
  35. line[1] + ", " +
  36. line[2] + ", " +
  37. line[3] + ", " +
  38. line[4] + ", " +
  39. line[5] + ", " +
  40. line[6] + ", " +
  41. line[7] + ", " +
  42. line[8] + ", " +
  43. "\"" + line[9] + "\"" + ", " +
  44. "\"" + line[10] + "\"" + ", " +
  45. line[11] + ", " +
  46. line[12] + "\n")
  47. counter += 1
  48. MVS_TASKS_HTMLSTRIP.close()
  49. MVS_TASKS.close()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement