Guest User

Untitled

a guest
Jul 19th, 2020
22
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 2.00 KB | None | 0 0
  1. import wikipediaapi
  2. from chord import Chord
  3. import copy
  4. import IPython
  5.  
  6. wiki_wiki = wikipediaapi.Wikipedia('en')
  7.  
  8. categoryDict = {}
  9. pageList = []
  10. blacklist = ["mdy", "dmy", "article", "Article", "Page", "page", "Wiki", "CS1", "AC with"]
  11.  
  12. class page:
  13.     def __init__(self, title):
  14.         self.cats = []
  15.         self.title = title
  16.     def addCat(self, newCat):
  17.         self.cats.append(newCat)
  18.     def setCats(self, newCats):
  19.         self.cats = copy.deepcopy(newCats)
  20.     def getCats(self):
  21.         return self.cats
  22.     def getTitle(self):
  23.         return self.title
  24.  
  25. def getCategories(pages): #Assigns categories to a list of pages
  26.     for p in pages:
  27.         print(p.getTitle())
  28.         for x in wiki_wiki.page(p.getTitle()).categories:
  29.             if not any(word in x for word in blacklist):
  30.                 p.addCat(x)
  31.  
  32. for x in wiki_wiki.page("Wikipedia:Vital Articles").links:
  33.     if ":" not in x and "article" not in x and "Article" not in x: #Removes all articles starting "Category:" or "Articles with..."
  34.         pageList.append(page(x))
  35.        
  36. getCategories(pageList)
  37. print("Done!")
  38.  
  39. for x in pageList:
  40.     for y in x.getCats():
  41.         if y not in categoryDict:
  42.             categoryDict.update({y:1})
  43.         else:
  44.             categoryDict[y] += 1
  45.  
  46. categoryDict = {key:val for key, val in categoryDict.items() if val >= 4}
  47.  
  48. numCats = len(categoryDict)
  49. print(numCats)
  50.  
  51. catNames = list(categoryDict.keys())
  52. categoryOccurences = [[0 for x in range(numCats)] for y in range(numCats)]
  53.  
  54. def getCategoryCrosses(cat1, cat2):
  55.     for p in pageList:
  56.         if cat1 in p.getCats() and cat2 in p.getCats():
  57.             categoryOccurences[catNames.index(cat1)][catNames.index(cat2)] += 1
  58.             categoryOccurences[catNames.index(cat2)][catNames.index(cat1)] += 1
  59.  
  60. for x in catNames:
  61.     print(x, catNames.index(x), len(catNames))
  62.     for y in catNames:
  63.         getCategoryCrosses(x, y)
  64.  
  65. for x in categoryOccurences:
  66.     print(x)
  67.  
  68. Chord(categoryOccurences, catNames).show()
Add Comment
Please, Sign In to add comment