Advertisement
makispaiktis

BASIC CODE AND PROCESS TO DECIDE CHART

Jul 2nd, 2023
934
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 4.65 KB | None | 0 0
  1. # 0. Imports
  2. import pandas as pd
  3. pd.plotting.register_matplotlib_converters()
  4. import matplotlib.pyplot as plt
  5. # %matplotlib inline
  6. import seaborn as sns
  7.  
  8.  
  9.  
  10.  
  11. # **********************************************************************************************
  12. # **********************************************************************************************
  13. # 1. Show trends and changes over time (lineplot) - There is usually a column named "Date"
  14. # **********************************************************************************************
  15. # **********************************************************************************************
  16.  
  17. # 1.1. Lineplot = Show for all the hotels (first plot) and then for only 1 (second plot)
  18. museum_data = pd.read_csv(museum_filepath, index_col="Date", parse_dates=True)
  19. sns.lineplot(data=museum_data)
  20. sns.lineplot(data=museum_data['Avila Adobe'], label="Avila Adobe")
  21.  
  22.  
  23.  
  24.  
  25. # **********************************************************************************************
  26. # **********************************************************************************************
  27. # 2. Show Distribution (histplot, kdeplot, jointplot)
  28. # **********************************************************************************************
  29. # **********************************************************************************************
  30.  
  31. # 2.1. Histplot = Histogram (with or without 'hue' argument)
  32. cancer_data = pd.read_csv(cancer_filepath, index_col="Id")
  33. sns.histplot(data=cancer_data, x="Area (mean)", hue="Diagnosis")
  34.  
  35. # 2.2. Kdeplot = Smoothed Histogram (with or without 'hue' argument)
  36. cancer_data = pd.read_csv(cancer_filepath, index_col="Id")
  37. sns.kdeplot(data=cancer_data, x="Radius (worst)", hue="Diagnosis")
  38.  
  39. # 2.3. Jointplot = 2D-KDE plot or 2D-HIST plot (with or without 'kind' argument)
  40. cancer_data = pd.read_csv(cancer_filepath, index_col="Id")
  41. sns.jointplot(data=cancer_data, x="Radius (worst)", y="Area (mean)")
  42. sns.jointplot(data=cancer_data, x="Radius (worst)", y="Area (mean)", kind="hist")
  43. sns.jointplot(data=cancer_data, x="Radius (worst)", y="Area (mean)", kind="kde")
  44.  
  45.  
  46.  
  47.  
  48. # **********************************************************************************************
  49. # **********************************************************************************************
  50. # 3. Show comparison (barplot, heatmap)
  51. # **********************************************************************************************
  52. # **********************************************************************************************
  53.  
  54. # 3.1. Barplot = Bar diagram
  55. ign_data = pd.read_csv(ign_filepath, index_col="Platform")
  56. sns.barplot(x=ign_data.index, y=ign_data['Racing'])
  57.  
  58. # 3.2. Heatmap = Color-coded tiles (with 'annot=True', numbers in tiles are visible)
  59. # ALL THE NUMBERS IN DATAFRAME MUST ARE THE SAME THING: EX: DELAY FOR AIRLINE COMPANY
  60. # EX: ROW1 = 1 (Jan), ROW2 = 2 (Feb), ....
  61. # EX: COL1 = A (AEGEAN), COL2 = B (RYANAIR), ....
  62. # ALL THE NUMBERS ARE DELAYS
  63. ign_data = pd.read_csv(ign_filepath, index_col="Platform")
  64. sns.heatmap(data=ign_data, annot=True)
  65.  
  66.  
  67.  
  68.  
  69. # **********************************************************************************************
  70. # **********************************************************************************************
  71. # 4. Show relation between 2 or 3 variables (scatterplot, regplot, swarmplot, lmplot)
  72. # **********************************************************************************************
  73. # **********************************************************************************************
  74.  
  75. # 4.1. Scatterplot - Relation between 2 variables: BOTH CONTINUOUS
  76. candy_data = pd.read_csv(candy_filepath, index_col="id")
  77. sns.scatterplot(x=candy_data['sugarpercent'], y=candy_data['winpercent'])
  78.  
  79. # 4.2. Regplot - Relation and regression line between 2 variables: BOTH CONTINUOUS
  80. candy_data = pd.read_csv(candy_filepath, index_col="id")
  81. sns.regplot(x=candy_data['sugarpercent'], y=candy_data['winpercent'])
  82.  
  83. # 4.3. Swarmplot - Relation between 2 variables: 1 CONTINUOUS + 1 CATEGORICAL
  84. candy_data = pd.read_csv(candy_filepath, index_col="id")
  85. sns.swarmplot(x=candy_data["chocolate"], y=candy_data["winpercent"])
  86.  
  87. # 4.4. Scatterplot - Relation between 3 variables: 2 CONTINUOUS + 1 CATEGORICAL (in 'hue' argument)
  88. candy_data = pd.read_csv(candy_filepath, index_col="id")
  89. sns.scatterplot(x=candy_data['pricepercent'], y=candy_data['winpercent'], hue=candy_data['chocolate'])
  90.  
  91. # 4.5. Lmplot - Relation and regression line between 3 variables: 2 CONTINUOUS + 1 CATEGORICAL (in 'hue' argument)
  92. candy_data = pd.read_csv(candy_filepath, index_col="id")
  93. sns.lmplot(data=candy_data, x="pricepercent", y="winpercent", hue="chocolate")
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement