Advertisement
Guest User

Untitled

a guest
Feb 28th, 2018
100
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.93 KB | None | 0 0
  1. def truncus03a(url='http://legis.senado.leg.br/legislacao/ListaTextoSigen.action?norma=483900&id=14310946&idBinario=15795498&mime=application/rtf',
  2.                path='', filename=''):
  3.     header = '<!DOCTYPE html>' \
  4.              '<html lang="pt-br">' \
  5.              '<head>' \
  6.              '<meta http-equiv="Content-Type" content="text/html; charset=iso8859-1"/>' \
  7.              '<link rel="stylesheet" href="../../../saj_projects/view/legis_3.css">' \
  8.              '</head>' \
  9.              '<body>' \
  10.              '<header> ' \
  11.              '<h1>Presidência da República</h1> ' \
  12.              '<h2>Casa Civil</h2> ' \
  13.              '<h3>Subchefia para Assuntos Jurídicos</h3> ' \
  14.              '</header>'
  15.     footer='<p class="dou">Este texto não substitui o publicado no DOU de 17.1.2018</p></body></html>'
  16.     if not filename:
  17.         filename = '{}.html'.format(inspect.stack()[0][3])
  18.  
  19.     if not path:
  20.         path = ''
  21.         #os.mkdir(path, 0o755)
  22.  
  23.     page = re.sub(r' style="[^"]+"', '', requests.get(url).text)
  24.     page = re.sub(r'<span>|</span>', '', page)
  25.     page = re.sub(r'EpgrafeAlt1|Epgrafe', 'epigrafe', page)
  26.     page = re.sub(r'EmentaAlt2|Ementa', 'ementa', page)
  27.     page = re.sub(r'Assinatura1Alt7|Assinatura1', 'presidente', page)
  28.     page = re.sub(r'Assinatura2Alt8|Assinatura2', 'ministro', page)
  29.     soup = BeautifulSoup(page, 'html.parser').find_all(id='conteudoPrincipal')
  30.     caterva = soup[0].div.html.body
  31.  
  32.     container = caterva.find_all('div')[2]
  33.     print(container)
  34.     result = header + container.prettify() + footer
  35.     print(BeautifulSoup(result, 'html.parser').prettify())
  36.     #os.mkdir(path, 0o755)
  37.     try:
  38.         with open("{}{}".format(path, filename), 'w') as file:
  39.             file.write(BeautifulSoup(result, 'html.parser').prettify())
  40.             #pass
  41.         sys.exit(1);
  42.  
  43.     except:
  44.         print ('Saindo do programa')
  45.  
  46.  
  47. if __name__ == '__main__':
  48.     truncus03a()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement