gregwa

FCM141 Pandas script

Jan 10th, 2019
186
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 2.11 KB | None | 0 0
  1. #!/usr/bin/python3
  2. import pandas as pd
  3. #-----------------------------------
  4. # Read the CSV file into a DataFrame
  5. filename = 'BreadBasket_DMS.csv'
  6. df = pd.read_csv(filename)
  7. # get a list of the column names (headers)
  8. col_list = df.columns.values.tolist()
  9. print(col_list)
  10. print(df.count())
  11. # create a list of unique dates from the DataFrame......
  12. datelist = df['Date'].unique().tolist()
  13. print(len(datelist), min(datelist), max(datelist))
  14. # Next, create a list of the unique items...
  15. itemlist = df['Item'].unique().tolist()
  16. print(itemlist)
  17. print(len(itemlist))
  18. # Sort by 'Item' column
  19. df2 = df.sort_values('Item')
  20. print(df2)
  21. # Groupby
  22. df3 = df.groupby(['Item'])
  23. tacos = df3.get_group('Tacos/Fajita')
  24. print(tacos)
  25. # Groupby and count
  26. df4 = df.groupby('Item').count()
  27. print(df4)
  28. # Question #1 ...
  29. # By Date, show how many of each item were sold...
  30. # produces a Series Data object
  31. byDate = df.groupby(['Date', 'Item'])['Date'].count()
  32. # Produces a DataFrame object
  33. # byDate = df.groupby(['Date','Item'])[['Item']].count()
  34. print(byDate)
  35. # By Item, show total sold for entire period...
  36. itemcount = df.groupby('Item')[['Transaction']].count()
  37. # Same as above, but sorted by Transaction ascending
  38. print('====================================')
  39. print('SortedItemCount #1 - Question #2')
  40. sorteditemcount = df.groupby('Item')[['Transaction']].count().sort_values('Transaction')
  41. print(sorteditemcount)
  42. print(sorteditemcount.head(10))
  43. print(sorteditemcount.tail(10))
  44. print('====================================')
  45. print('SortedItemCount #2 - Question #2')
  46. sorteditemcount2 = df.groupby('Item')[['Transaction']].count().sort_values('Transaction', ascending=False)
  47. print(sorteditemcount2)
  48. print(sorteditemcount2.head(10))
  49. print(sorteditemcount2.tail(10))
  50. # Question #3...
  51. # By Date|Time Trans per hour
  52. print('====================================')
  53. print('Question #3')
  54. print('By Hour')
  55. byhour = df.groupby(['Date', 'Time'])[['Transaction']].count()
  56. print(byhour)
  57. # =============================================================================
  58. #
  59. # =============================================================================
Add Comment
Please, Sign In to add comment