Advertisement
Guest User

Untitled

a guest
Jun 21st, 2018
315
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 3.88 KB | None | 0 0
  1. # -*- coding: utf-8 -*-
  2. """
  3. Created on Sun Nov 20 10:29:32 2016
  4.  
  5. @author: hina
  6. """
  7. print ()
  8.  
  9. import networkx
  10. from operator import itemgetter
  11. import matplotlib.pyplot
  12.  
  13. # read the data from the amazon-books.txt;
  14. # populate amazonProducts nested dicitonary;
  15. # key = ASIN; value = MetaData associated with ASIN
  16. fhr = open('./amazon-books.txt', 'r', encoding='utf-8', errors='ignore')
  17. amazonBooks = {}
  18. fhr.readline()
  19. for line in fhr:
  20. cell = line.split('\t')
  21. MetaData = {}
  22. MetaData['Id'] = cell[0].strip()
  23. ASIN = cell[1].strip()
  24. MetaData['Title'] = cell[2].strip()
  25. MetaData['Categories'] = cell[3].strip()
  26. MetaData['Group'] = cell[4].strip()
  27. MetaData['Copurchased'] = cell[5].strip()
  28. MetaData['SalesRank'] = int(cell[6].strip())
  29. MetaData['TotalReviews'] = int(cell[7].strip())
  30. MetaData['AvgRating'] = float(cell[8].strip())
  31. MetaData['DegreeCentrality'] = int(cell[9].strip())
  32. MetaData['ClusteringCoeff'] = float(cell[10].strip())
  33. amazonBooks[ASIN] = MetaData
  34. fhr.close()
  35.  
  36. # read the data from amazon-books-copurchase.adjlist;
  37. # assign it to copurchaseGraph weighted Graph;
  38. # node = ASIN, edge= copurchase, edge weight = category similarity
  39. fhr=open("amazon-books-copurchase.edgelist", 'rb')
  40. copurchaseGraph=networkx.read_weighted_edgelist(fhr)
  41. fhr.close()
  42.  
  43. # now let's assume a person is considering buying the following book;
  44. # what else can we recommend to them based on copurchase behavior
  45. # we've seen from other users?
  46. print ("Looking for Recommendations for Customer Purchasing this Book:")
  47. print ("--------------------------------------------------------------")
  48. asin = '0805047905'
  49.  
  50. # let's first get some metadata associated with this book
  51. print ("ASIN = ", asin)
  52. print ("Title = ", amazonBooks[asin]['Title'])
  53. print ("SalesRank = ", amazonBooks[asin]['SalesRank'])
  54. print ("TotalReviews = ", amazonBooks[asin]['TotalReviews'])
  55. print ("AvgRating = ", amazonBooks[asin]['AvgRating'])
  56. print ("DegreeCentrality = ", amazonBooks[asin]['DegreeCentrality'])
  57. print ("ClusteringCoeff = ", amazonBooks[asin]['ClusteringCoeff'])
  58.  
  59. # now let's look at the ego network associated with this asin
  60. # which is esentially comprised of all the books that have been
  61. # copurchased with this book in the past
  62. ego = networkx.ego_graph(copurchaseGraph, asin, radius=1)
  63. print ("Ego Network:",
  64. "Nodes=", ego.number_of_nodes(),
  65. "Edges=", ego.number_of_edges())
  66. print ()
  67.  
  68. print ("Top 3 Recommendations based on Copurchase Data and Average Ratings")
  69. print ("------------------------------------------------------------------")
  70. # how can we pick the Top 3 recomemndations for this person?
  71. # let's first use the island method to remove edges with
  72. # edge weight below a threshold
  73.  
  74. threshold = 0.5
  75. egotrim = networkx.Graph()
  76. for n1, n2, e in ego.edges(data=True):
  77. if e['weight'] >= threshold:
  78. egotrim.add_edge(n1,n2,e)
  79. print ("Trimmed Ego Network:",
  80. "Threshold=", threshold,
  81. "Nodes=", egotrim.number_of_nodes(),
  82. "Edges=", egotrim.number_of_edges())
  83. ego = egotrim
  84. print ()
  85.  
  86. # now let's consider the average rating of all the nodes
  87. # connected to the ego node by a single hop, and sort them
  88. # by descending order of average rating
  89.  
  90.  
  91. egoNeighbors = [(asin, n, amazonBooks[n]['ClusteringCoeff']) for n in ego.neighbors(asin)]
  92. egoNeighbors = sorted(egoNeighbors, key=itemgetter(2), reverse=True)
  93.  
  94. # print Top 3 recommendations based on average rating
  95. for asin, n, rating in egoNeighbors[:5]:
  96. print ("ASIN = ", n)
  97. print ("Title = ", amazonBooks[n]['Title'])
  98. print ("SalesRank = ", amazonBooks[n]['SalesRank'])
  99. print ("TotalReviews = ", amazonBooks[n]['TotalReviews'])
  100. print ("AvgRating = ", amazonBooks[n]['AvgRating'])
  101. print ("DegreeCentrality = ", amazonBooks[n]['DegreeCentrality'])
  102. print ("ClusteringCoeff = ", amazonBooks[n]['ClusteringCoeff'])
  103. print ()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement