Advertisement
DeaD_EyE

demos parsing html

Jul 24th, 2020 (edited)
1,155
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 2.37 KB | None | 0 0
  1. #!/usr/bin/env python3
  2. """
  3. Erstmal nur als Konzept ...
  4. """
  5. import time
  6. import csv
  7. import datetime
  8. from collections import namedtuple
  9. from functools import wraps
  10. from io import StringIO
  11.  
  12. import requests
  13. from bs4 import BeautifulSoup
  14.  
  15. Felder = "datum tag plz stadt uhrzeit ort art".split()
  16. Result = namedtuple("Termine", Felder)
  17.  
  18.  
  19. def cache(ttl):
  20.     def wrapper(func):
  21.         result = None
  22.         last = 0
  23.  
  24.         @wraps(func)
  25.         def inner(*args, **kwargs):
  26.             nonlocal result
  27.             nonlocal last
  28.             if result is None or time.monotonic() > last:
  29.                 result = func(*args, **kwargs)
  30.                 last = time.monotonic() + ttl
  31.             return result
  32.  
  33.         return inner
  34.  
  35.     return wrapper
  36.  
  37.  
  38. @cache(3600)
  39. def get_demo_termine():
  40.     time.sleep(2)
  41.     url = "https://demo.terminkalender.top/"
  42.     try:
  43.         req = requests.get(url)
  44.     except Exception as e:
  45.         print(repr(e))
  46.         return
  47.     return BeautifulSoup(req.content, "html.parser")
  48.  
  49.  
  50. def demo_termine():
  51.     bs = get_demo_termine()
  52.     for row in bs.find_all("tr"):
  53.         row = row.find_all("td")
  54.         iterator = map(lambda x: x.text.strip(), row)
  55.         try:
  56.             result = Result(*iterator)
  57.         except TypeError:
  58.             continue
  59.         datum_zeit = result.datum + "T" + result.uhrzeit.partition(" ")[0]
  60.         try:
  61.             date = datetime.datetime.fromisoformat(datum_zeit)
  62.         except ValueError:
  63.             continue
  64.         result = Result(date, *result[1:])
  65.         yield result
  66.  
  67.  
  68. def get_cities():
  69.     return sorted({result.stadt.title() for result in demo_termine()})
  70.  
  71.  
  72. def next_demos():
  73.     jetzt = datetime.datetime.now()
  74.     return [termin for termin in demo_termine() if jetzt < termin.datum]
  75.  
  76.  
  77. def by_date(termin):
  78.     return termin.datum
  79.  
  80.  
  81. def print_next_demos():
  82.     for termin in sorted(set(next_demos()), key=by_date):
  83.         print(
  84.             f"{termin.datum:%d.%m} | {termin.datum:%H:%M} | {termin.plz:<5} | {termin.stadt:<25} | {termin.ort:<31} | {termin.art}"
  85.         )
  86.  
  87.  
  88. def as_csv():
  89.     io = StringIO()
  90.     writer = csv.writer(io)
  91.     writer.writerow(Felder)
  92.     for termin in sorted(next_demos(), key=by_date):
  93.         writer.writerow([termin.datum.isoformat(), *termin[1:]])
  94.     return io.getvalue()
  95.  
  96.  
  97. if __name__ == "__main__":
  98.     print_next_demos()
  99.  
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement