Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- from html.parser import HTMLParser
- import csv
- class MLStripper(HTMLParser):
- def __init__(self):
- self.reset()
- self.strict = False
- self.convert_charrefs= True
- self.fed = []
- def handle_data(self, d):
- self.fed.append(d)
- def get_data(self):
- return ''.join(self.fed)
- def strip_tags(html):
- s = MLStripper()
- s.feed(html)
- text = s.get_data()
- return text
- counter = 0
- with open('MVS_TASKS_ALEX.csv', mode='r', newline='',encoding='utf-8') as MVS_TASKS,\
- open('MVS_TASKS_ALEX2.csv', mode='a', newline='',encoding='utf-8') as MVS_TASKS2:
- for line in MVS_TASKS:
- line = line.replace(""", "")
- line = strip_tags(line)
- MVS_TASKS2.write(line)
- MVS_TASKS.close()
- MVS_TASKS2.close()
- with open('MVS_TASKS_ALEX2.csv', mode='r', newline='',encoding='utf-8') as MVS_TASKS,\
- open('MVS_TASKS_HTMLSTRIP.csv', mode='a', newline='', encoding='utf-8') as MVS_TASKS_HTMLSTRIP:
- reader = csv.reader(MVS_TASKS, delimiter = ',', quotechar='"', quoting=csv.QUOTE_ALL, skipinitialspace=True)
- for line in reader:
- print(line)
- MVS_TASKS_HTMLSTRIP.write(line[0] + ", " +
- line[1] + ", " +
- line[2] + ", " +
- line[3] + ", " +
- line[4] + ", " +
- line[5] + ", " +
- line[6] + ", " +
- line[7] + ", " +
- line[8] + ", " +
- "\"" + line[9] + "\"" + ", " +
- "\"" + line[10] + "\"" + ", " +
- line[11] + ", " +
- line[12] + "\n")
- counter += 1
- MVS_TASKS_HTMLSTRIP.close()
- MVS_TASKS.close()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement