Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- from bs4 import BeautifulSoup
- import re
- import linecache
- # | r r+ w w+ a a+
- #------------------|--------------------------
- #read | + + + +
- #write | + + + + +
- #write after seek | + + +
- #create | + + + +
- #truncate | + +
- #position at start | + + + +
- #position at end | + +
- # https://regexr.com/
- #text = re.sub("<.*?>", "", text)
- SourcecodeFile = "C:\\Users\\Thomas\\Documents\\VSCode\\motdfile\\schedule\\sourcecode2.txt"
- ResultsFile ="C:\\Users\\Thomas\\Documents\\VSCode\\motdfile\\schedule\\results.txt"
- ResultsFormattedFile = "C:\\Users\\Thomas\\Documents\\VSCode\\motdfile\\schedule\\resultsedited.txt"
- f = open(SourcecodeFile, "r+")
- html = f.read()
- f.close()
- html = html.replace(" ", "")
- html = html.replace(" ", "")
- html = re.sub("<!--.*?-->", "", html)
- f = open(SourcecodeFile, "w")
- f.write(html)
- f.close()
- soup = BeautifulSoup(html, 'html.parser')
- f = open("results.txt", "w+")
- for item in soup.find_all("td"):
- f.write(item.get_text())
- f.seek(0)
- with open('resultsedited.txt','w') as f2:
- for line in f:
- if not line.isspace() and line != "pw\n" and line != "tt\n" and line != "hw\n":
- f2.write(line)
- f.close()
- """
- alles [
- [
- datum,
- lesuur[vak, tijd, docent, locatie]
- lesuur[vak, tijd, docent, locatie]
- lesuur[vak, tijd, docent, locatie]
- lesuur[vak, tijd, docent, locatie]
- ]
- [
- datum,
- lesuur[vak, tijd, docent, locatie]
- lesuur[vak, tijd, docent, locatie]
- lesuur[vak, tijd, docent, locatie]
- lesuur[vak, tijd, docent, locatie]
- ]
- [
- datum,
- lesuur[vak, tijd, docent, locatie]
- lesuur[vak, tijd, docent, locatie]
- lesuur[vak, tijd, docent, locatie]
- lesuur[vak, tijd, docent, locatie]
- ]
- [
- datum,
- lesuur[vak, tijd, docent, locatie]
- lesuur[vak, tijd, docent, locatie]
- lesuur[vak, tijd, docent, locatie]
- lesuur[vak, tijd, docent, locatie]
- ]
- ]
- """
- schedule = []
- with open('resultsedited.txt','r+') as f:
- f.seek(0, 0)
- lineNumber = 0
- for line in f:
- lineNumber += 1
- if re.match(r"\w+ \d+ \w+", line):
- schedule.append([line[:-2]])
- elif re.match(r".*? - \w*? - .*", line):
- itemList = line.split(" - ")
- subject = itemList[0]
- schedule[-1].append([subject])
- hour = linecache.getline("resultsedited.txt", lineNumber - 1)[:-1]
- schedule[-1][-1].insert(0, int(hour))
- elif line == "hele dag\n":
- schedule[-1].append([0])
- schedule[-1][-1].append(linecache.getline("resultsedited.txt", (lineNumber + 1))[:-1])
- elif re.match(r" .*\n", line):
- schedule[-1][-1].append(line[2:-1])
- elif re.match(r"\(.*?\)", line):
- location = line[:-1]
- if location == "( )":
- location = "(MHP)"
- if isinstance(schedule[-1][-1], str):
- schedule[-1].append([0])
- schedule[-1][-1].append(linecache.getline("resultsedited.txt", (lineNumber + 1))[2:-1])
- schedule[-1][-1].append(location[1:-1])
- else:
- schedule[-1][-1].append(location[1:-1])
- for a in schedule:
- print(str(a[0]) + "\n")
- count = 1
- while (count < len(a)):
- countNested = 1
- print("\t" + str(a[count][0]))
- while (countNested < len(a[count])):
- print("\t\t" + str(a[count][countNested]))
- countNested += 1
- count += 1
- #datum : "\w* \d+ \w*"
- #vak/docent/klas: ".*? - \w*? - .*"
- #lokaal: "\(\d*?\)"
- #Nog om te doen: commenten wat alles is en fucking veel andere shit lmao
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement