Untitled

# -*- coding: utf-8 -*-
"""
Created on Sun Nov 20 10:29:32 2016

@author: hina
"""
print ()

import networkx
from operator import itemgetter
import matplotlib.pyplot

# read the data from the amazon-books.txt;
# populate amazonProducts nested dicitonary;
# key = ASIN; value = MetaData associated with ASIN
fhr = open('./amazon-books.txt', 'r', encoding='utf-8', errors='ignore')
amazonBooks = {}
fhr.readline()
for line in fhr:
    cell = line.split('\t')
    MetaData = {}
    MetaData['Id'] = cell[0].strip()
    ASIN = cell[1].strip()
    MetaData['Title'] = cell[2].strip()
    MetaData['Categories'] = cell[3].strip()
    MetaData['Group'] = cell[4].strip()
    MetaData['Copurchased'] = cell[5].strip()
    MetaData['SalesRank'] = int(cell[6].strip())
    MetaData['TotalReviews'] = int(cell[7].strip())
    MetaData['AvgRating'] = float(cell[8].strip())
    MetaData['DegreeCentrality'] = int(cell[9].strip())
    MetaData['ClusteringCoeff'] = float(cell[10].strip())
    amazonBooks[ASIN] = MetaData
fhr.close()

# read the data from amazon-books-copurchase.adjlist;
# assign it to copurchaseGraph weighted Graph;
# node = ASIN, edge= copurchase, edge weight = category similarity
fhr=open("amazon-books-copurchase.edgelist", 'rb')
copurchaseGraph=networkx.read_weighted_edgelist(fhr)
fhr.close()

# now let's assume a person is considering buying the following book;
# what else can we recommend to them based on copurchase behavior
# we've seen from other users?
print ("Looking for Recommendations for Customer Purchasing this Book:")
print ("--------------------------------------------------------------")
asin = '0805047905'

# let's first get some metadata associated with this book
print ("ASIN = ", asin)
print ("Title = ", amazonBooks[asin]['Title'])
print ("SalesRank = ", amazonBooks[asin]['SalesRank'])
print ("TotalReviews = ", amazonBooks[asin]['TotalReviews'])
print ("AvgRating = ", amazonBooks[asin]['AvgRating'])
print ("DegreeCentrality = ", amazonBooks[asin]['DegreeCentrality'])
print ("ClusteringCoeff = ", amazonBooks[asin]['ClusteringCoeff'])

# now let's look at the ego network associated with this asin
# which is esentially comprised of all the books that have been
# copurchased with this book in the past
ego = networkx.ego_graph(copurchaseGraph, asin, radius=1)
print ("Ego Network:",
       "Nodes=", ego.number_of_nodes(),
        "Edges=", ego.number_of_edges())
print ()

print ("Top 3 Recommendations based on Copurchase Data and Average Ratings")
print ("------------------------------------------------------------------")
# how can we pick the Top 3 recomemndations for this person?
# let's first use the island method to remove edges with
# edge weight below a threshold

threshold = 0.5
egotrim = networkx.Graph()
for n1, n2, e in ego.edges(data=True):
    if e['weight'] >= threshold:
        egotrim.add_edge(n1,n2,e)
print ("Trimmed Ego Network:",
       "Threshold=", threshold,
       "Nodes=", egotrim.number_of_nodes(),
        "Edges=", egotrim.number_of_edges())
ego = egotrim
print ()

# now let's consider the average rating of all the nodes
# connected to the ego node by a single hop, and sort them
# by descending order of average rating


egoNeighbors = [(asin, n, amazonBooks[n]['ClusteringCoeff']) for n in ego.neighbors(asin)]
egoNeighbors = sorted(egoNeighbors, key=itemgetter(2), reverse=True)

# print Top 3 recommendations based on average rating
for asin, n, rating in egoNeighbors[:5]:
    print ("ASIN = ", n)
    print ("Title = ", amazonBooks[n]['Title'])
    print ("SalesRank = ", amazonBooks[n]['SalesRank'])
    print ("TotalReviews = ", amazonBooks[n]['TotalReviews'])
    print ("AvgRating = ", amazonBooks[n]['AvgRating'])
    print ("DegreeCentrality = ", amazonBooks[n]['DegreeCentrality'])
    print ("ClusteringCoeff = ", amazonBooks[n]['ClusteringCoeff'])
    print ()