Advertisement
TankorSmash

Genres from fixedAPI.nfpox.py

Feb 18th, 2013
106
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 0.70 KB | None | 0 0
  1. from bs4 import BeautifulSoup
  2. import requests
  3. import os
  4. from lxml import etree
  5.  
  6.  
  7. with open(r"C:\fixedapi.NFPOX") as f:
  8.     lines = f.readlines()[2:-1]
  9.    
  10.  
  11. groups = []
  12. for line in lines:
  13.     root = etree.XML(line)
  14.    
  15.     for cat in root.findall('category'):
  16.        
  17.         if "genres" in cat.attrib['scheme']:
  18.             genre_id = cat.attrib['scheme'].split(r"/")[-1]
  19.             genre_string = cat.attrib['label']
  20.             groups.append((genre_id, genre_string))
  21.        
  22.        
  23.        
  24. done_list = sorted(set(groups), key=lambda x: int(x[0]))
  25.  
  26. ##uncomment this for printout
  27. # for grp in done_list:
  28. #     print grp[0], grp[1]
  29.  
  30. print "DONE!"
  31. print len(done_list)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement