Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # Import plotting modules
- import matplotlib.pyplot as plt
- import seaborn as sns
- # Import numpy
- import numpy as np
- # Set default Seaborn style
- sns.set()
- # Compute number of data points: n_data
- n_data = len(versicolor_petal_length)
- # Number of bins is the square root of number of data points: n_bins
- n_bins = np.sqrt(n_data)
- # Convert number of bins to integer: n_bins
- n_bins = int(n_bins)
- # Plot the histogram
- plt.hist(versicolor_petal_length, bins = n_bins)
- # Label axes
- _ = plt.xlabel('petal length (cm)')
- _ = plt.ylabel('count')
- # Show histogram
- plt.show()
- # Create bee swarm plot with Seaborn's default settings
- _ = sns.swarmplot(x='species', y='petal length (cm)',data =df)
- # Label the axes
- _ = plt.xlabel('species')
- _ = plt.ylabel('petal length')
- # Show the plot
- plt.show()
- def ecdf(data):
- """Compute ECDF for a one-dimensional array of measurements."""
- # Number of data points: n
- n = len(data)
- # x-data for the ECDF: x
- x = np.sort(data)
- # y-data for the ECDF: y
- y = np.arange(1, n+1) / n
- return x, y
- # Compute Empirical Cumulative Distribution Function for versicolor data: x_vers, y_vers
- x_vers, y_vers = ecdf(versicolor_petal_length)
- # Generate plot
- _ = plt.plot(x_vers, y_vers, marker='.', linestyle='none')
- # Label the axes
- _ = plt.xlabel('petal lengths')
- _ = plt.ylabel('ECDF')
- # Display the plot
- plt.show()
- # plot ECDFs for the petal lengths of all three iris species
- x_set, y_set = ecdf(setosa_petal_length)
- x_vers, y_vers = ecdf(versicolor_petal_length)
- x_virg, y_virg = ecdf(virginica_petal_length)
- # Plot all ECDFs on the same plot
- _ = plt.plot(x_set, y_set, marker='.', linestyle='none')
- _ = plt.plot(x_vers, y_vers, marker='.', linestyle='none')
- _ = plt.plot(x_virg, y_virg, marker='.', linestyle='none')
- # Annotate the plot
- plt.legend(('setosa', 'versicolor', 'virginica'), loc='lower right')
- _ = plt.xlabel('petal length (cm)')
- _ = plt.ylabel('ECDF')
- # Display the plot
- plt.show()
- # Compute the mean: mean_length_vers
- mean_length_vers = np.mean(versicolor_petal_length)
- # Print the result with some nice formatting
- print('I. versicolor:', mean_length_vers, 'cm')
- # Specify array of percentiles: percentiles
- percentiles = np.array([2.5, 25, 50, 75, 97.5])
- # Compute percentiles: ptiles_vers
- ptiles_vers = np.percentile(versicolor_petal_length, percentiles)
- # Print the result
- print('Versicolor length percentiles:', ptiles_vers,)
- # To see how the percentiles relate to the ECDF
- # Plot the ECDF
- _ = plt.plot(x_vers, y_vers, '.')
- _ = plt.xlabel('petal length (cm)')
- _ = plt.ylabel('ECDF')
- # Overlay percentiles as red diamonds.
- _ = plt.plot(ptiles_vers, percentiles/100, marker='D', color='red',
- linestyle='none')
- # Show the plot
- plt.show()
- # Create box plot with Seaborn's default settings
- _ = sns.boxplot(x='species', y='petal length (cm)', data =df)
- # Label the axes
- _ = plt.xlabel('species')
- _ = plt.ylabel('petal length (cm)')
- # Show the plot
- plt.show()
- # Array of differences to mean: differences
- differences = versicolor_petal_length - np.mean(versicolor_petal_length)
- # Square the differences: diff_sq
- diff_sq = differences**2
- # Compute the mean square difference: variance_explicit
- variance_explicit = np.mean(diff_sq)
- # Compute the variance using NumPy: variance_np
- variance_np = np.var(versicolor_petal_length)
- # Print the results
- print(variance_np, variance_explicit)
- # Make a scatter plot
- _ = plt.plot(versicolor_petal_length, versicolor_petal_width, marker='.', linestyle='none')
- # Label the axes
- _ = plt.xlabel('Petal length')
- _ = plt.ylabel('Petal Width')
- # Show the result
- plt.show()
- # Compute the covariance matrix: covariance_matrix
- covariance_matrix = np.cov(versicolor_petal_length, versicolor_petal_width)
- # Print covariance matrix
- print(covariance_matrix)
- # Extract covariance of length and width of petals: petal_cov
- petal_cov = covariance_matrix[[0],[1]]
- # Print the length/width covariance
- print(petal_cov)
- def pearson_r(x, y):
- """Compute Pearson correlation coefficient between two arrays."""
- # Compute correlation matrix: corr_mat
- corr_mat = np.corrcoef(x,y)
- # Return entry [0,1]
- return corr_mat[0,1]
- # Compute Pearson correlation coefficient for I. versicolor: r
- r = pearson_r(versicolor_petal_length, versicolor_petal_width)
- # Print the result
- print(r)
Add Comment
Please, Sign In to add comment