Advertisement
Guest User

pdf.py

a guest
Jan 17th, 2016
528
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 5.43 KB | None | 0 0
  1. # Copyright (c) 2015, Frappe Technologies Pvt. Ltd. and Contributors
  2. # MIT License. See license.txt
  3. from __future__ import unicode_literals
  4.  
  5. import pdfkit, os, frappe, sys
  6. from frappe.utils import scrub_urls
  7. from frappe import _
  8. from bs4 import BeautifulSoup
  9.  
  10. def get_pdf(html, options=None):
  11.     if not options:
  12.         options = {}
  13.  
  14.     options.update({
  15.         "print-media-type": None,
  16.         "background": None,
  17.         "images": None,
  18.         'margin-top': '15mm',
  19.         'margin-right': '15mm',
  20.         'margin-bottom': '15mm',
  21.         'margin-left': '15mm',
  22.         'encoding': "UTF-8",
  23.         'quiet': None,
  24.         'no-outline': None,
  25.     })
  26.  
  27.     #create file from input html based on the divs content or create an empty file
  28.     header_html_file = make_html_file(html, "header")
  29.     footer_html_file = make_html_file(html, "footer")
  30.  
  31.     #update the printing options like margin-top based on variables in html or predefined settings
  32.     options = read_options_from_html(html, header_html_file[0], footer_html_file[0])
  33.  
  34.     if frappe.session and frappe.session.sid:
  35.         options['cookie'] = [('sid', '{0}'.format(frappe.session.sid))]
  36.  
  37.     if not options.get("page-size"):
  38.         options['page-size'] = frappe.db.get_single_value("Print Settings", "pdf_page_size") or "A4"
  39.  
  40.     html = scrub_urls(html)
  41.     fname = os.path.join("/tmp", frappe.generate_hash() + ".pdf")
  42.  
  43.     try:
  44.         pdfkit.from_string(html, fname, options=options or {}, )
  45.  
  46.         with open(fname, "rb") as fileobj:
  47.             filedata = fileobj.read()
  48.  
  49.     except IOError, e:
  50.         if "ContentNotFoundError" in e.message or "ContentOperationNotPermittedError" in e.message:
  51.             # allow pdfs with missing images if file got created
  52.             if os.path.exists(fname):
  53.                 with open(fname, "rb") as fileobj:
  54.                     filedata = fileobj.read()
  55.  
  56.             else:
  57.                 frappe.throw(_("PDF generation failed because of broken image links"))
  58.         else:
  59.             raise
  60.  
  61.     finally:
  62.         # always cleanup
  63.         if os.path.exists(fname):
  64.             os.remove(fname)
  65.  
  66.     try:
  67.         os.remove(header_html_file[1])
  68.     except:
  69.         pass
  70.     try:
  71.         os.remove(footer_html_file[1])
  72.     except:
  73.         pass
  74.     return filedata
  75.  
  76.  
  77.  
  78. def read_options_from_html(html, header_html_file, footer_html_file):
  79.     options = {}
  80.  
  81.     soup = BeautifulSoup(html, "html5lib")
  82.     if (header_html_file):
  83.         header_html_file_url = frappe.utils.get_request_site_address() + "/files/" + header_html_file
  84.         options.update({
  85.             'header-html': header_html_file_url,
  86.         })
  87.     if (footer_html_file):
  88.         footer_html_file_url = frappe.utils.get_request_site_address() + "/files/" + footer_html_file
  89.         options.update({
  90.             'footer-html': footer_html_file_url,
  91.         })
  92.  
  93.     try:
  94.         margin_top = soup.find('span', id='margintop')
  95.         margin_top = margin_top.contents
  96.     except:
  97.         margin_top = "15mm"
  98.     options.update({
  99.         'margin-top': margin_top,
  100.     })
  101.  
  102.     options.update({
  103.         "print-media-type": None,
  104.         "background": None,
  105.         "images": None,
  106.         'margin-right': '15mm',
  107.         'margin-bottom': '15mm',
  108.         'margin-left': '15mm',
  109.         'encoding': "UTF-8",
  110.         'quiet': None,
  111.         'no-outline': None,
  112.     })
  113.     return options
  114.  
  115. #this function will copy all head info from the soup of the parent document's html and will then return a head element string
  116. def copy_head_info(soup):
  117.     html_style = ""
  118.     html_styles = soup.findAll('style')
  119.     try:
  120.         for style in html_styles:
  121.             html_style += style.prettify()
  122.     except:
  123.         pass
  124.  
  125.     try:
  126.         html_head = ''.join(map(str, soup.find('head').contents))
  127.     except:
  128.         html_head = ''
  129.  
  130.     html_head += """
  131.       <script>
  132.             function subst() {
  133.                 var vars={};
  134.             var x=window.location.search.substring(1).split('&');
  135.             for (var i in x) {var z=x[i].split('=',2);vars[z[0]] = unescape(z[1]);}
  136.             var x=['frompage','topage','page','webpage','section','subsection','subsubsection'];
  137.             for (var i in x) {
  138.             var y = document.getElementsByClassName(x[i]);
  139.             for (var j=0; j<y.length; ++j) y[j].textContent = vars[x[i]];
  140.             }
  141.             }
  142.             subst()
  143.         </script>
  144.     """
  145.     html_head = "<head>"+ html_head + html_style + "</head>"
  146.     return html_head
  147.  
  148.  
  149. #this function will create a file with the contents we need for header and footer
  150. def make_html_file(html, type="header"):
  151.  
  152.     #make sure we use the correct encoding utf8
  153.     reload(sys)
  154.     sys.setdefaultencoding('utf8')
  155.     soup = BeautifulSoup(html, "html5lib")
  156.  
  157.     #set doctype to html5
  158.     html_doctype = """<!DOCTYPE html>"""
  159.  
  160.     #make sure we get all styles / scripts of the parent document
  161.     html_head = copy_head_info(soup)
  162.  
  163.     #get the header div
  164.     if (type=="header"):
  165.         pdf_header = soup.find('div', id='htmlheader')
  166.     else:
  167.         pdf_header = soup.find('div', id='htmlfooter')
  168.  
  169.     try:
  170.         html_content = ''.join(map(str, pdf_header))
  171.     except:
  172.         html_content = ''
  173.  
  174.     #create the html body content
  175.     html_body = """<body onload="subst()" style="margin:0; padding:0;"><div class="print-format"><div class="wrapper">""" + html_content  + """</div></div></body></html>"""
  176.  
  177.     #create the complete html of the page
  178.     header_html = html_doctype + html_head + html_body
  179.  
  180.     fname = type
  181.     temp_file = create_temp_html_file(fname, header_html)
  182.     return temp_file
  183.  
  184. #this function will create a random filename and then return the filename and filepath
  185. def create_temp_html_file(fname, html):
  186.     reload(sys)
  187.     sys.setdefaultencoding('utf8')
  188.     temp_name = fname + os.urandom(16).encode('hex') + ".html"
  189.     fpath = frappe.utils.get_files_path() + "/" + temp_name
  190.  
  191.     while(os.path.exists(fpath)):
  192.         temp_name = fname + os.urandom(16).encode('hex') + ".html"
  193.         fpath = frappe.utils.get_files_path() + "/" + temp_name
  194.  
  195.     f = open(fpath,'w')
  196.     f.write(html)
  197.     f.close()
  198.     return temp_name, fpath
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement