Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- from bs4 import BeautifulSoup
- import requests
- import os
- from lxml import etree
- with open(r"C:\fixedapi.NFPOX") as f:
- lines = f.readlines()[2:-1]
- groups = []
- for line in lines:
- root = etree.XML(line)
- for cat in root.findall('category'):
- if "genres" in cat.attrib['scheme']:
- genre_id = cat.attrib['scheme'].split(r"/")[-1]
- genre_string = cat.attrib['label']
- groups.append((genre_id, genre_string))
- done_list = sorted(set(groups), key=lambda x: int(x[0]))
- ##uncomment this for printout
- # for grp in done_list:
- # print grp[0], grp[1]
- print "DONE!"
- print len(done_list)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement