Advertisement
cantcodelol

magister filter

Jul 16th, 2018
98
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 3.91 KB | None | 0 0
  1. from bs4 import BeautifulSoup
  2. import re
  3. import linecache
  4.  
  5. #                  | r   r+   w   w+   a   a+
  6. #------------------|--------------------------
  7. #read              | +   +        +        +
  8. #write             |     +    +   +    +   +
  9. #write after seek  |     +    +   +
  10. #create            |          +   +    +   +
  11. #truncate          |          +   +
  12. #position at start | +   +    +   +
  13. #position at end   |                   +   +
  14.  
  15. # https://regexr.com/
  16. #text = re.sub("<.*?>", "", text)
  17.  
  18.  
  19. SourcecodeFile = "C:\\Users\\Thomas\\Documents\\VSCode\\motdfile\\schedule\\sourcecode2.txt"
  20. ResultsFile ="C:\\Users\\Thomas\\Documents\\VSCode\\motdfile\\schedule\\results.txt"
  21. ResultsFormattedFile = "C:\\Users\\Thomas\\Documents\\VSCode\\motdfile\\schedule\\resultsedited.txt"
  22. f = open(SourcecodeFile, "r+")
  23. html = f.read()
  24. f.close()
  25. html = html.replace("   ", "")
  26. html = html.replace("&nbsp;", "")
  27. html = re.sub("<!--.*?-->", "", html)
  28. f = open(SourcecodeFile, "w")
  29. f.write(html)
  30. f.close()
  31. soup = BeautifulSoup(html, 'html.parser')
  32.  
  33. f = open("results.txt", "w+")
  34. for item in soup.find_all("td"):
  35.     f.write(item.get_text())
  36. f.seek(0)
  37. with open('resultsedited.txt','w') as f2:
  38.     for line in f:
  39.         if not line.isspace() and line != "pw\n" and line != "tt\n" and line != "hw\n":
  40.             f2.write(line)
  41. f.close()
  42.  
  43. """
  44. alles [
  45.    [
  46.        datum,
  47.        lesuur[vak, tijd, docent, locatie]
  48.        lesuur[vak, tijd, docent, locatie]
  49.        lesuur[vak, tijd, docent, locatie]
  50.        lesuur[vak, tijd, docent, locatie]
  51.    ]
  52.    [
  53.        datum,
  54.        lesuur[vak, tijd, docent, locatie]
  55.        lesuur[vak, tijd, docent, locatie]
  56.        lesuur[vak, tijd, docent, locatie]
  57.        lesuur[vak, tijd, docent, locatie]
  58.    ]
  59.    [
  60.        datum,
  61.        lesuur[vak, tijd, docent, locatie]
  62.        lesuur[vak, tijd, docent, locatie]
  63.        lesuur[vak, tijd, docent, locatie]
  64.        lesuur[vak, tijd, docent, locatie]
  65.    ]
  66.    [
  67.        datum,
  68.        lesuur[vak, tijd, docent, locatie]
  69.        lesuur[vak, tijd, docent, locatie]
  70.        lesuur[vak, tijd, docent, locatie]
  71.        lesuur[vak, tijd, docent, locatie]
  72.    ]
  73. ]
  74. """
  75. schedule = []
  76.  
  77.  
  78. with open('resultsedited.txt','r+') as f:
  79.     f.seek(0, 0)
  80.     lineNumber = 0
  81.     for line in f:
  82.         lineNumber += 1
  83.         if re.match(r"\w+ \d+ \w+", line):
  84.             schedule.append([line[:-2]])
  85.         elif re.match(r".*? - \w*? - .*", line):
  86.             itemList = line.split(" - ")
  87.             subject = itemList[0]
  88.             schedule[-1].append([subject])
  89.             hour = linecache.getline("resultsedited.txt", lineNumber - 1)[:-1]
  90.             schedule[-1][-1].insert(0, int(hour))
  91.         elif line == "hele dag\n":
  92.             schedule[-1].append([0])
  93.             schedule[-1][-1].append(linecache.getline("resultsedited.txt", (lineNumber + 1))[:-1])
  94.         elif re.match(r"  .*\n", line):
  95.             schedule[-1][-1].append(line[2:-1])        
  96.         elif re.match(r"\(.*?\)", line):
  97.             location = line[:-1]
  98.             if location == "( )":
  99.                 location = "(MHP)"
  100.             if isinstance(schedule[-1][-1], str):
  101.                 schedule[-1].append([0])
  102.                 schedule[-1][-1].append(linecache.getline("resultsedited.txt", (lineNumber + 1))[2:-1])
  103.                 schedule[-1][-1].append(location[1:-1])
  104.             else:
  105.                 schedule[-1][-1].append(location[1:-1])
  106.            
  107.  
  108.            
  109.  
  110.  
  111. for a in schedule:
  112.     print(str(a[0]) + "\n")
  113.     count = 1
  114.     while (count < len(a)):
  115.         countNested = 1
  116.         print("\t" + str(a[count][0]))
  117.         while (countNested < len(a[count])):
  118.             print("\t\t" + str(a[count][countNested]))
  119.             countNested += 1
  120.         count += 1
  121.  
  122.  
  123. #datum : "\w* \d+ \w*"
  124. #vak/docent/klas: ".*? - \w*? - .*"
  125. #lokaal: "\(\d*?\)"
  126.  
  127.  
  128. #Nog om te doen: commenten wat alles is en fucking veel andere shit lmao
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement