Guest User

Untitled

a guest
Sep 4th, 2016
143
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.49 KB | None | 0 0
  1. import requests
  2. from bs4 import BeautifulSoup
  3. import re
  4. from xlwt import Workbook
  5.  
  6. patent_no ='8829700'
  7. url="http://patft.uspto.gov/netacgi/nph-Parser?Sect1=PTO2&Sect2=HITOFF&u=%2Fnetahtml%2FPTO%2Fsearch-adv.htm&r=1&p=1&f=G&l=50&d=PTXT&S1=8829700&OS=8829700&RS=8829700%22,%228829700_patent_dwd.html"
  8.  
  9. r = requests.get(url)
  10. soup = BeautifulSoup(r.text,'html.parser')
  11. field1 = "Inventors:"
  12. field2 = "Assignee:"
  13. field3 = "Filed:"
  14.  
  15. for i in soup.find_all('th'):
  16. if i.string:
  17. tag = i.string.strip()
  18. if field1 == tag:
  19. inv = (i.find_next_sibling('td').text).split(")',")
  20. re.sub('[^A-Za-z0-9]+', '', inv[0])
  21. loc=str(inv).split("),")
  22. if field2 == tag: # Assignee
  23. asg=i.find_next_sibling('td').text# Assignee Text String
  24. asg=asg[0:asg.find('(')].strip()
  25. if field3 == tag: # Filed
  26. filed= i.find_next_sibling('td').text # Filed Text String
  27.  
  28.  
  29. # Creating the excel file
  30.  
  31. wb=Workbook()
  32. sheet1=wb.add_sheet('Sheet 1')
  33.  
  34. sheet1.write(0,0,field3[:-1])
  35. sheet1.write(1,0,filed)
  36.  
  37. sheet1.write(0,1,field2[:-1])
  38. sheet1.write(1,1,asg)
  39.  
  40. ev=2
  41. odd=3
  42. for i in xrange(1,len(loc)):
  43. wr=str(loc[i-1]).strip()
  44. wr=wr.replace(r"\[|'u|\\n","")
  45. print(wr)
  46. sheet1.write(0,odd,"Loc"+str(i))
  47. sheet1.write(1,odd,wr[wr.find('(')+1:].strip())
  48. odd=odd+2
  49. sheet1.write(0,ev,"Inv"+str(i))
  50. sheet1.write(1,ev,wr[0:wr.find('(')].strip())
  51. ev=ev+2
  52. wb.save('example.xlsx')
Add Comment
Please, Sign In to add comment