Advertisement
DeaD_EyE

odoo wkhtmltopdf debugging

Jul 22nd, 2020
233
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 5.40 KB | None | 0 0
  1. #!/usr/bin/env python3
  2.  
  3. """
  4. Wrapper to debug wkhtmltopdf
  5. """
  6.  
  7. import sys
  8. import logging
  9. import logging.handlers
  10. import shutil
  11. import pwd
  12. import os
  13. from pathlib import Path
  14. from stat import S_IROTH, S_IXOTH, S_IRGRP
  15. from subprocess import Popen, PIPE
  16. from urllib.parse import urljoin, urlparse
  17.  
  18. import requests
  19. import setproctitle
  20. from bs4 import BeautifulSoup
  21.  
  22.  
  23. setproctitle.setproctitle("wkhtmltopdf.py")
  24.  
  25.  
  26. PWD = pwd.getpwnam("odoo")
  27. UID = PWD.pw_uid
  28. GID = PWD.pw_gid
  29. DEBUG = True
  30. LAST_BODY = Path("/tmp/last_body.html")
  31. LAST_HEADER = Path("/tmp/last_header.html")
  32. LAST_FOOTER = Path("/tmp/last_footer.html")
  33. LAST_HTML = Path("/tmp/last_html.html")
  34. LAST_ARGS = Path("/tmp/last_args.txt")
  35. STDOUT = Path("/tmp/last_wkhtmtopdf_stdout.txt")
  36. STDERR = Path("/tmp/last_wkhtmtopdf_stderr.txt")
  37. WKHTMLTOPDF = Path("/usr/bin/wkhtmltopdf.bin")
  38.  
  39. handler = logging.handlers.SysLogHandler(address="/dev/log")
  40. logger = logging.getLogger("wkhtmltopdf")
  41. logger.setLevel(logging.INFO)
  42. logger.addHandler(handler)
  43.  
  44.  
  45. def url_is_relative(url):
  46.     url = urlparse(url)
  47.     return not url.path.startswith("/")
  48.  
  49.  
  50. def url_no_base(url):
  51.     url = urlparse(url)
  52.     return url.path.startswith("/") and not url.scheme and not url.netloc
  53.  
  54.  
  55. def download_files(html_file):
  56.     tmpdir = Path("/tmp")
  57.     static_dir = Path("/tmp/static")
  58.     static_dir.mkdir(exist_ok=True)
  59.     static_dir.chmod(static_dir.stat().st_mode | S_IROTH | S_IXOTH)
  60.     with open(html_file, "rb") as fd:
  61.         bs = BeautifulSoup(fd.read(), "html.parser")
  62.     base_element = bs.find("base")
  63.     base_url = base_element["href"]
  64.     # base_element["href"] = base_url # + ":8080"
  65.     if not url_is_relative(base_url):
  66.         raise ValueError("base_url must be absolute.")
  67.     for element in bs.find_all("link", href=True):
  68.         if url_no_base(element["href"]):
  69.             download_url = urljoin(base_url, element["href"])
  70.             if download_url.rpartition(".")[2] in ("woff", "woff2"):
  71.                 continue
  72.             req = session.get(download_url)
  73.             logger.info(f"GET {download_url} {req.status_code}")
  74.             if req.status_code == 200:
  75.                 file_name = Path(urlparse(download_url).path).name
  76.                 file = static_dir / file_name
  77.                 with file.open("wb") as fd:
  78.                     fd.write(req.content)
  79.                 os.chown(file, UID, GID)
  80.                 os.chmod(file, file.stat().st_mode | S_IRGRP | S_IROTH)
  81.                 element["href"] = str(file)
  82.     with open(html_file, "wt") as fd:
  83.         fd.write(bs.prettify())
  84.     os.chown(html_file, UID, GID)
  85.     os.chmod(html_file, html_file.stat().st_mode | S_IRGRP | S_IROTH)
  86.  
  87.  
  88. if __name__ == "__main__":
  89.     args = sys.argv[1:]
  90.     header = b""
  91.     footer = b""
  92.     session = requests.Session()
  93.     if DEBUG and "--cookie" in args:
  94.         session_id_idx = args.index("--cookie") + 2
  95.         session.cookies["session_id"] = args[session_id_idx]
  96.     if DEBUG and "--header-html" in args:
  97.         header_idx = args.index("--header-html") + 1
  98.         header_uri = args[header_idx]
  99.         if header_uri.startswith("http"):
  100.             header_req = session.get(header_uri)
  101.             if header_req.status_code == 200:
  102.                 if header_req.content:
  103.                     with LAST_HEADER.open("wb") as fd:
  104.                         fd.write(header_req.content)
  105.         else:
  106.             shutil.copy(header_uri, LAST_HEADER)
  107.         os.chown(LAST_HEADER, UID, GID)
  108.         try:
  109.             download_files(LAST_HEADER)
  110.         except Exception as e:
  111.             with open("/tmp/log.txt", "w") as fd:
  112.                 fd.write(repr(e))
  113.     if DEBUG and "--footer-html" in args:
  114.         footer_idx = args.index("--footer-html") + 1
  115.         footer_uri = args[footer_idx]
  116.         if footer_uri.startswith("http"):
  117.             footer_req = session.get(footer_uri)
  118.             if footer_req.status_code == 200:
  119.                 if footer_req.content:
  120.                     with LAST_FOOTER.open("wb") as fd:
  121.                         fd.write(footer_req.content)
  122.         else:
  123.             shutil.copy(footer_uri, LAST_FOOTER)
  124.         os.chown(LAST_FOOTER, UID, GID)
  125.         download_files(LAST_FOOTER)
  126.     if len(args) >= 2:
  127.         try:
  128.             uri = args[-2]
  129.             if uri.startswith("http"):
  130.                 body_req = session.get(uri)
  131.                 if body_req.status_code == 200:
  132.                     with LAST_BODY.open("wb") as fd:
  133.                         fd.write(body_req.content)
  134.             else:
  135.                 shutil.copy(uri, LAST_BODY)
  136.             download_files(LAST_BODY)
  137.         except IndexError:
  138.             pass
  139.  
  140.     if DEBUG:
  141.         while "--quiet" in args:
  142.             args.remove("--quiet")
  143.  
  144.     proc = Popen([WKHTMLTOPDF, *args], stdin=None, stdout=PIPE, stderr=PIPE)
  145.  
  146.     if DEBUG:
  147.         with LAST_ARGS.open("w") as fd:
  148.             fd.write("\n".join(args))
  149.         os.chown(LAST_ARGS, UID, GID)
  150.         stdout, stderr = proc.communicate()
  151.         with STDOUT.open("wb") as stdout_fd, STDERR.open("wb") as stderr_fd:
  152.             stdout_fd.write(stdout)
  153.             stderr_fd.write(stderr)
  154.         os.chown(STDOUT, UID, GID)
  155.         os.chown(STDERR, UID, GID)
  156.         print(stdout.decode(), file=sys.stdout)
  157.         print(stderr.decode(), file=sys.stderr)
  158.         # proc.wait()
  159.         shutil.copy(args[-1], "/tmp/last_output.pdf")
  160.         os.chmod("/tmp/last_output.pdf", 0o644)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement