Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- user document
- 0 john book
- 1 jane article
- 2 jane book
- 3 jane book
- 4 jim article
- 5 john book
- 6 jim blogpost
- 7 jane blogpost
- 8 jane blogpost
- 9 jane blogpost
- blogpost article book
- john 1 3 0
- jane 0 0 1
- jim 4 0 2
- df = pd.DataFrame(index=users, columns=documents)
- df.fillna(0, inplace=True)
- grouped = records.groupby(['user', 'document'])
- for elem in grouped:
- user, document = elem[0]
- downloads = len(elem[1])
- df.loc[user, document] = downloads
- records = pd.DataFrame([
- ('john', 'book'),
- ('jane', 'article'),
- ('jane','book'),
- ('jane','book'),
- ('jim', 'article'),
- ('john', 'book'),
- ('jim', 'blogpost'),
- ('jane', 'blogpost'),
- ('jane', 'blogpost'),
- ('jane', 'blogpost')
- ], columns=['user', 'document'])
- print(records)
- users = list(set(records['user']))
- users.sort()
- documents = list(set(records['document']))
- documents.sort()
- print(users)
- print(documents)
- df = pd.DataFrame(index=users, columns=documents)
- df.fillna(0, inplace=True)
- print(df)
- grouped = records.groupby(['user', 'document'])
- for elem in grouped:
- user, document = elem[0]
- downloads = len(elem[1])
- df.loc[user, document] = downloads
Add Comment
Please, Sign In to add comment