Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- articles = pd.read_csv('articles.csv')
- articles = articles[pd.notnull(articles['author'])]
- year = articles.groupby(['year'])['id'].nunique().idxmax()
- articles['authors'] = articles['author'].apply(lambda L:
- str(L).replace(' and ', ',').replace('&', ',').replace('</strong>', '').replace('<strong>', '').replace('</sub>', '').replace('<sub>', '').split(','))
- s = articles.apply(lambda x: pd.Series(x['authors']),axis=1).stack().reset_index(level=1, drop=True)
- s.name = 'author'
- articles = articles.drop('author', axis=1).join(s)
- a = articles.loc[articles['year'] == year]
- a.groupby('author')['id'].nunique().nlargest(n=10)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement