Guest User

Untitled

a guest
Jul 18th, 2018
101
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 4.21 KB | None | 0 0
  1. # Import plotting modules
  2. import matplotlib.pyplot as plt
  3. import seaborn as sns
  4. # Import numpy
  5. import numpy as np
  6.  
  7. # Set default Seaborn style
  8. sns.set()
  9.  
  10.  
  11. # Compute number of data points: n_data
  12. n_data = len(versicolor_petal_length)
  13.  
  14. # Number of bins is the square root of number of data points: n_bins
  15. n_bins = np.sqrt(n_data)
  16.  
  17. # Convert number of bins to integer: n_bins
  18. n_bins = int(n_bins)
  19.  
  20. # Plot the histogram
  21. plt.hist(versicolor_petal_length, bins = n_bins)
  22.  
  23. # Label axes
  24. _ = plt.xlabel('petal length (cm)')
  25. _ = plt.ylabel('count')
  26.  
  27. # Show histogram
  28. plt.show()
  29.  
  30. # Create bee swarm plot with Seaborn's default settings
  31. _ = sns.swarmplot(x='species', y='petal length (cm)',data =df)
  32.  
  33. # Label the axes
  34. _ = plt.xlabel('species')
  35. _ = plt.ylabel('petal length')
  36. # Show the plot
  37. plt.show()
  38.  
  39. def ecdf(data):
  40. """Compute ECDF for a one-dimensional array of measurements."""
  41. # Number of data points: n
  42. n = len(data)
  43.  
  44. # x-data for the ECDF: x
  45. x = np.sort(data)
  46.  
  47. # y-data for the ECDF: y
  48. y = np.arange(1, n+1) / n
  49.  
  50. return x, y
  51.  
  52. # Compute Empirical Cumulative Distribution Function for versicolor data: x_vers, y_vers
  53. x_vers, y_vers = ecdf(versicolor_petal_length)
  54.  
  55. # Generate plot
  56. _ = plt.plot(x_vers, y_vers, marker='.', linestyle='none')
  57.  
  58. # Label the axes
  59. _ = plt.xlabel('petal lengths')
  60. _ = plt.ylabel('ECDF')
  61.  
  62. # Display the plot
  63. plt.show()
  64.  
  65. # plot ECDFs for the petal lengths of all three iris species
  66. x_set, y_set = ecdf(setosa_petal_length)
  67. x_vers, y_vers = ecdf(versicolor_petal_length)
  68. x_virg, y_virg = ecdf(virginica_petal_length)
  69.  
  70. # Plot all ECDFs on the same plot
  71. _ = plt.plot(x_set, y_set, marker='.', linestyle='none')
  72. _ = plt.plot(x_vers, y_vers, marker='.', linestyle='none')
  73. _ = plt.plot(x_virg, y_virg, marker='.', linestyle='none')
  74.  
  75. # Annotate the plot
  76. plt.legend(('setosa', 'versicolor', 'virginica'), loc='lower right')
  77. _ = plt.xlabel('petal length (cm)')
  78. _ = plt.ylabel('ECDF')
  79.  
  80. # Display the plot
  81. plt.show()
  82.  
  83. # Compute the mean: mean_length_vers
  84. mean_length_vers = np.mean(versicolor_petal_length)
  85. # Print the result with some nice formatting
  86. print('I. versicolor:', mean_length_vers, 'cm')
  87.  
  88. # Specify array of percentiles: percentiles
  89. percentiles = np.array([2.5, 25, 50, 75, 97.5])
  90.  
  91. # Compute percentiles: ptiles_vers
  92. ptiles_vers = np.percentile(versicolor_petal_length, percentiles)
  93.  
  94. # Print the result
  95. print('Versicolor length percentiles:', ptiles_vers,)
  96.  
  97. # To see how the percentiles relate to the ECDF
  98.  
  99. # Plot the ECDF
  100. _ = plt.plot(x_vers, y_vers, '.')
  101. _ = plt.xlabel('petal length (cm)')
  102. _ = plt.ylabel('ECDF')
  103.  
  104. # Overlay percentiles as red diamonds.
  105. _ = plt.plot(ptiles_vers, percentiles/100, marker='D', color='red',
  106. linestyle='none')
  107.  
  108. # Show the plot
  109. plt.show()
  110.  
  111. # Create box plot with Seaborn's default settings
  112. _ = sns.boxplot(x='species', y='petal length (cm)', data =df)
  113.  
  114. # Label the axes
  115. _ = plt.xlabel('species')
  116. _ = plt.ylabel('petal length (cm)')
  117.  
  118. # Show the plot
  119. plt.show()
  120.  
  121. # Array of differences to mean: differences
  122. differences = versicolor_petal_length - np.mean(versicolor_petal_length)
  123.  
  124. # Square the differences: diff_sq
  125. diff_sq = differences**2
  126.  
  127. # Compute the mean square difference: variance_explicit
  128. variance_explicit = np.mean(diff_sq)
  129.  
  130. # Compute the variance using NumPy: variance_np
  131. variance_np = np.var(versicolor_petal_length)
  132.  
  133. # Print the results
  134. print(variance_np, variance_explicit)
  135.  
  136. # Make a scatter plot
  137. _ = plt.plot(versicolor_petal_length, versicolor_petal_width, marker='.', linestyle='none')
  138.  
  139.  
  140. # Label the axes
  141. _ = plt.xlabel('Petal length')
  142. _ = plt.ylabel('Petal Width')
  143.  
  144. # Show the result
  145. plt.show()
  146.  
  147. # Compute the covariance matrix: covariance_matrix
  148. covariance_matrix = np.cov(versicolor_petal_length, versicolor_petal_width)
  149.  
  150. # Print covariance matrix
  151. print(covariance_matrix)
  152.  
  153. # Extract covariance of length and width of petals: petal_cov
  154. petal_cov = covariance_matrix[[0],[1]]
  155.  
  156. # Print the length/width covariance
  157. print(petal_cov)
  158.  
  159. def pearson_r(x, y):
  160. """Compute Pearson correlation coefficient between two arrays."""
  161. # Compute correlation matrix: corr_mat
  162. corr_mat = np.corrcoef(x,y)
  163.  
  164. # Return entry [0,1]
  165. return corr_mat[0,1]
  166.  
  167. # Compute Pearson correlation coefficient for I. versicolor: r
  168. r = pearson_r(versicolor_petal_length, versicolor_petal_width)
  169.  
  170. # Print the result
  171. print(r)
Add Comment
Please, Sign In to add comment