Advertisement
Kafke

Scrape unicode.org

Feb 7th, 2018
140
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 0.90 KB | None | 0 0
  1. # import urllib2
  2. # import urllib
  3. from bs4 import BeautifulSoup
  4. from binascii import a2b_base64
  5.  
  6. gmail = 11
  7. softbank = 12
  8. docomo = 13
  9. kddi = 14
  10.  
  11. font = docomo
  12. ext = '.png'
  13. # url = 'http://unicode.org/emoji/charts/full-emoji-list.html'
  14. contents = open('emojitable.html','r').read()
  15. soup = BeautifulSoup(contents)
  16. tbody = soup.find("tbody")
  17. for row in tbody.findAll('tr'):
  18.     #first_column = row.findAll('th')[0].contents
  19.     uni = ""
  20.     sb = ""
  21.     columns = row.findAll('td')
  22.     if len(columns) > 0:
  23.         uni = columns[1].a.contents[0].replace('U+','').replace(' ','_')
  24.         sbc = columns[font]
  25.         if sbc.contents is not '\u2014':
  26.             if sbc.img is not None:
  27.                 sb = sbc.img['src']
  28.                 if sb is not None:
  29.                     binary_data = a2b_base64(sb.replace('data:image/png;base64,',''))
  30.                     fd = open('rip/'+uni+ext, 'wb')
  31.                     fd.write(binary_data)
  32.                     fd.close()
  33.                     #print uni, sb
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement