Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # -*- coding: utf-8 -*-
- """
- Spyder Editor
- This is a temporary script file.
- """
- import numpy as np
- import pandas as pd
- #read csv file
- dataset = pd.read_csv("C:\\Users\\acer-pc\\Desktop\\lol\\14_page_p1s2_file_2\\Data_Preprocessing\\Data.csv")
- dataset
- #seperating independent columns
- X=dataset.iloc[:,:-1].values
- X
- #seperaating dependent columns
- Y=dataset.iloc[:,3:].values
- Y
- #exercise
- #z=dataset.iloc[0:4,:].values
- #z
- #taking care of missing data
- from sklearn.preprocessing import Imputer
- imputer = Imputer(missing_values="NaN",strategy='mean',axis=0)
- imputer = imputer.fit(X[:,1:3])
- X[:,1:3] = imputer.transform(X[:,1:3])
- X
- #encoding catagorial data
- from sklearn.preprocessing import LabelEncoder,OneHotEncoder
- labelencoder_x=LabelEncoder()
- X[:,0] = labelencoder_x.fit_transform(X[:,0])
- onehotencoder=OneHotEncoder()
- categorical_features=[0]
- X = onehotencoder.fit_transform(X).toarray()
- #for dependent variable
- labelencoder_y=LabelEncoder()
- Y=labelencoder_y.fit_transform(Y)
- #testing and training
- from sklearn.model_selection import train_test_split
- X_train,X_test,Y_train,Y_test=train_test_split(X,Y,test_size=0.2,random_state=0)
- #Feature Scaling
- from sklearn.preprocessing import StandardScaler
- sc_x=StandardScaler()
- X_train=sc_x.fit_transform(X_train)
- X_text=sc_x.transform(X_test)
- ----------------------------------------------------------------------------------------------------------------------------------------------------
- # -*- coding: utf-8 -*-
- """
- Created on Mon Feb 25 16:14:49 2019
- @author: acer-pc
- """
- # -*- coding: utf-8 -*-
- """
- Spyder Editor
- This is a temporary script file.
- """
- import numpy as np
- import pandas as pd
- #read csv file
- dataset1 = pd.read_csv("C:\\Users\\acer-pc\\Desktop\\lol\\14_page_p2s4_file_1 (1)\\Simple_Linear_Regression\\Salary_Data.csv")
- dataset1
- #seperating independent columns (Number of years of experience)
- X1=dataset1.iloc[:,:-1].values
- X1
- #seperaating dependent columns (salary)
- Y1=dataset1.iloc[:,1].values
- Y1
- #testing and training
- from sklearn.model_selection import train_test_split
- X1_train,X1_test,Y1_train,Y1_test=train_test_split(X1,Y1,test_size=0.2,random_state=0)
- #linear Regression
- from sklearn.linear_model import LinearRegression
- regressor=LinearRegression()
- regressor.fit(X1_train,Y1_train)
- #Predicting results
- y_pred = regressor.predict(20.0)
- y_pred
- -------------------------------------------------------------------------------------------------------------
- # -*- coding: utf-8 -*-
- """
- Created on Tue Feb 26 10:50:30 2019
- @author: acer-pc
- """
- import numpy as np
- import pandas as pd
- import matplotlib.pyplot as plt
- dataset2 = pd.read_csv("C:\\Users\\acer-pc\\Desktop\\lol\\Polynomial_Regression\\Position_Salaries.csv")
- dataset2
- #seperating independent columns
- X2=dataset2.iloc[:,1:2].values
- X2
- #seperaating dependent columns
- Y2=dataset2.iloc[:,2:].values
- Y2
- #testing and training
- from sklearn.model_selection import train_test_split
- X2_train,X2_test,Y2_train,Y2_test=train_test_split(X2,Y2,test_size=0.25,random_state=0)
- #linear Regression
- from sklearn.linear_model import LinearRegression
- regressor=LinearRegression()
- regressor.fit(X2_train,Y2_train)
- #Visualizig the Linear Regression
- plt.scatter(X2,Y2,color='red')
- plt.plot(X2,regressor.predict(X2),color='blue')
- plt.title('Truth or Bluff(LinearRegression)')
- plt.xlabel('Position level')
- plt.ylabel('Salary')
- plt.show()
- #fitting polynomial regression
- from sklearn.preprocessing import PolynomialFeatures
- from sklearn.linear_model import LinearRegression
- poly_reg = PolynomialFeatures(degree = 4) #by default degree is 2.
- X2_poly = poly_reg.fit_transform(X2)
- poly_reg.fit(X2_poly,Y2)
- lin_reg_2 = LinearRegression()
- lin_reg_2.fit(X2_poly,Y2)
- #Visualizing the polynomial regression
- plt.scatter(X2,Y2,color='red')
- plt.plot(X2,lin_reg_2.predict(poly_reg.fit_transform(X2)),color='blue')
- plt.title('Truth or bluff (polynomial Regression)')
- plt.xlabel('position level')
- plt.show()
- --------------------------------------------------------------------------------------------
- # -*- coding: utf-8 -*-
- """
- Created on Tue Feb 26 14:37:14 2019
- @author: acer-pc
- """
- # -*- coding: utf-8 -*-
- """
- Spyder Editor
- This is a temporary script file.
- """
- import numpy as np
- import pandas as pd
- import matplotlib.pyplot as plt
- #read csv file
- dataset = pd.read_csv("C:\\Users\\acer-pc\\Desktop\\lol\\K_Nearest_Neighbors\\Social_Network_Ads.csv")
- dataset
- #seperating independent columns
- X=dataset.iloc[:,[2,3]].values
- X
- #seperaating dependent columns
- Y=dataset.iloc[:,4:].values
- Y
- #testing and training
- from sklearn.model_selection import train_test_split
- X_train,X_test,Y_train,Y_test=train_test_split(X,Y,test_size=0.25,random_state=0)
- #Feature Scaling for x
- from sklearn.preprocessing import StandardScaler
- sc_x=StandardScaler()
- X_train=sc_x.fit_transform(X_train)
- X_test=sc_x.transform(X_test)
- #fitting KNeighbor algorithm for Training set
- from sklearn.neighbors import KNeighborsClassifier
- classifier=KNeighborsClassifier(n_neighbors=5,metric='minkowski' , p=2)
- classifier.fit(X_train,Y_train)
- #Predicting results
- y_pred = classifier.predict(X_test)
- y_pred
- #making confussion matric
- from sklearn.metrics import confusion_matrix
- cm=confusion_matrix(Y_test,y_pred)
- #plotting graph
- from matplotlib.colors import ListedColormap
- X_set,Y_set=X_train,Y_train
- X1,X2=np.meshgrid(np.arange(start=X_set[:,0].min()-1,stop=X_set[:,0].max()+1,step=0.01),
- np.arange(start=X_set[:,1].min()-1,stop=X_set[:,1].max()+1,step=0.01))
- plt.contourf(X1,X2,classifier.predict(np.array([X1.ravel(),X2.ravel()]).T).reshape(X1.shape), alpha=0.75,cmap=ListedColormap(('red','green')))
- plt.xlim(X1.min(),X1.max())
- plt.ylim(X2.min(),X2.max())
- for i,j in enumerate(np.unique(Y_set)):
- plt.scatter(X_set[Y_set==j,0],X_set[Y_set==j,1],
- c=ListedColormap(('red','green'))(i),label=j)
- plt.title('K-NN(Training set)')
- plt.ylabel('Estimated Salary')
- plt.legend()
- plt.show()
- ----------------------------------------------------------------------------------------------------------------------------------
- import numpy as np
- import pandas as pd
- import matplotlib.pyplot as plt
- #read csv file
- dataset = pd.read_csv("C:\\Users\\acer-pc\\Desktop\\lol\\P14-K-Means\K_Means\\Mall_Customers.csv")
- dataset
- #seperating independent columns
- X=dataset.iloc[:,[3,4]].values
- X
- #using the elbow method to find the optimal number
- from sklearn.cluster import KMeans
- wcss=[]
- for i in range(1,11):
- kmeans = KMeans(n_clusters = i, init = 'k-means++' , random_state=42)
- kmeans.fit(X)
- wcss.append(kmeans.inertia_)
- plt.plot(range(1,11),wcss)
- plt.title('The elbow method')
- plt.xlabel('Number of cluster')
- plt.ylabel('WCSS')
- plt.show()
- #Fitting K-Means to the dataset
- kmeans = KMeans(n_clusters = 5, init = 'k-means++',random_state=42)
- y_kmeans=kmeans.fit_predict(X)
- #Visualizing cluster
- plt.scatter(X[y_kmeans == 0, 0], X[y_kmeans == 0, 1], s = 100, c = 'red', label = 'Cluster 1')
- plt.scatter(X[y_kmeans == 1, 0], X[y_kmeans == 1, 1], s = 100, c = 'blue', label = 'Cluster 2')
- plt.scatter(X[y_kmeans == 2, 0], X[y_kmeans == 2, 1], s = 100, c = 'green', label = 'Cluster 3')
- plt.scatter(X[y_kmeans == 3, 0], X[y_kmeans == 3, 1], s = 100, c = 'cyan', label = 'Cluster 4')
- plt.scatter(X[y_kmeans == 4, 0], X[y_kmeans == 4, 1], s = 100, c = 'magenta', label = 'Cluster 5')
- plt.scatter(kmeans.cluster_centers_[:, 0], kmeans.cluster_centers_[:, 1], s = 300, c = 'yellow', label = 'Centroids')
- plt.title('Clusters of customers')
- plt.xlabel('Annual Income (k$)')
- plt.ylabel('Spending Score (1-100)')
- plt.legend()
- plt.show()
- #taking care of missing data
- from sklearn.preprocessing import Imputer
- imputer = Imputer(missing_values="NaN",strategy='mean',axis=0)
- imputer = imputer.fit(X[:,1:3])
- X[:,1:3] = imputer.transform(X[:,1:3])
- X
- #encoding catagorial data
- from sklearn.preprocessing import LabelEncoder,OneHotEncoder
- labelencoder_x=LabelEncoder()
- X[:,0] = labelencoder_x.fit_transform(X[:,0])
- onehotencoder=OneHotEncoder()
- categorical_features=[0]
- X = onehotencoder.fit_transform(X).toarray()
- #for dependent variable
- labelencoder_y=LabelEncoder()
- Y=labelencoder_y.fit_transform(Y)
- #splitting dataset testing and training
- from sklearn.model_selection import train_test_split
- X_train,X_test,Y_train,Y_test=train_test_split(X,Y,test_size=0.2,random_state=0)
- #Feature Scaling
- from sklearn.preprocessing import StandardScaler
- sc_x=StandardScaler()
- X_train=sc_x.fit_transform(X_train)
- X_test=sc_x.transform(X_test)
- #linear Regression
- from sklearn.linear_model import LinearRegression
- regressor=LinearRegression()
- regressor.fit(X_train,Y_train)
- #Predicting results
- y_pred = regressor.predict(X_test)
- y_pred
- ------------------------------------------------------------------------------------------------------------------------------
- # -*- coding: utf-8 -*-
- """
- Created on Wed Feb 27 11:50:53 2019
- @author: acer-pc
- """
- import numpy as np
- import pandas as pd
- import matplotlib.pyplot as plt
- #read csv file
- dataset = pd.read_csv("C:\\Users\\acer-pc\\Desktop\\lol\\P14-SVM (1)\\SVM\\Social_Network_Ads.csv")
- dataset
- #seperating independent columns
- X=dataset.iloc[:,[2,3]].values
- X
- #seperaating dependent columns
- Y=dataset.iloc[:,4].values
- Y
- #testing and training
- from sklearn.model_selection import train_test_split
- X_train,X_test,Y_train,Y_test=train_test_split(X,Y,test_size=0.25,random_state=0)
- #Feature Scaling
- from sklearn.preprocessing import StandardScaler
- sc_x=StandardScaler()
- X_train=sc_x.fit_transform(X_train)
- X_test=sc_x.transform(X_test)
- #Fitting SVM to the training set
- from sklearn.svm import SVC
- classifier = SVC(kernel='rbf',random_state=0)
- classifier.fit(X_train,Y_train)
- #Predicting results
- y_pred = classifier.predict(X_test)
- y_pred
- #making confusion matrix
- from sklearn.metrics import confusion_matrix
- cm=confusion_matrix(Y_test,y_pred)
- #plotting the grapgh
- from matplotlib.colors import ListedColormap
- X_set, Y_set = X_test, Y_test
- X1, X2 = np.meshgrid(np.arange(start = X_set[:, 0].min() - 1, stop = X_set[:, 0].max() + 1, step = 0.01),
- np.arange(start = X_set[:, 1].min() - 1, stop = X_set[:, 1].max() + 1, step = 0.01))
- plt.contourf(X1, X2, classifier.predict(np.array([X1.ravel(), X2.ravel()]).T).reshape(X1.shape),
- alpha = 0.75, cmap = ListedColormap(('red', 'green')))
- plt.xlim(X1.min(), X1.max())
- plt.ylim(X2.min(), X2.max())
- for i, j in enumerate(np.unique(Y_set)):
- plt.scatter(X_set[Y_set == j, 0], X_set[Y_set == j, 1],
- c = ListedColormap(('red', 'green'))(i), label = j)
- plt.title('SVM (Test set)')
- plt.xlabel('Age')
- plt.ylabel('Estimated Salary')
- plt.legend()
- plt.show()
- ------------------------------------------------------------------------------------------------------------------------------
- # Natural Language Processing
- # Importing the libraries
- import numpy as np
- import matplotlib.pyplot as plt
- import pandas as pd
- import nltk
- # Importing the dataset
- dataset = pd.read_csv('C:\\Users\\acer-pc\\Desktop\\lol\\P14-Natural-Language-Processing\\Natural_Language_Processing\\Restaurant_Reviews.tsv', delimiter = '\t', quoting = 3)
- dataset
- # Cleaning the texts
- import re
- import nltk
- nltk.download('stopwords')
- from nltk.corpus import stopwords
- from nltk.stem.porter import PorterStemmer
- corpus = []
- for i in range(0, 1000):
- review = re.sub('[^a-zA-Z]', ' ', dataset['Review'][i])
- review = review.lower()
- review = review.split()
- ps = PorterStemmer()
- review = [ps.stem(word) for word in review if not word in set(stopwords.words('english'))]
- review = ' '.join(review)
- corpus.append(review)
- # Creating the Bag of Words model
- from sklearn.feature_extraction.text import CountVectorizer
- cv = CountVectorizer(max_features = 1500)
- X = cv.fit_transform(corpus).toarray()
- y = dataset.iloc[:, 1].values
- # Splitting the dataset into the Training set and Test set
- from sklearn.cross_validation import train_test_split
- X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.20, random_state = 0)
- """# Fitting Naive Bayes to the Training set
- from sklearn.naive_bayes import GaussianNB
- classifier = GaussianNB()
- classifier.fit(X_train, y_train)"""
- #Fitting SVM to the training set
- from sklearn.svm import SVC
- classifier = SVC(kernel='rbf',random_state=0)
- classifier.fit(X_train,y_train)
- # Predicting the Test set results
- y_pred = classifier.predict(X_test)
- # Making the Confusion Matrix
- from sklearn.metrics import confusion_matrix
- cm = confusion_matrix(y_test, y_pred)
- ------------------------------------------------------------------------------------------------------------------------------------
- # -*- coding: utf-8 -*-
- """
- Created on Wed Feb 27 15:11:27 2019
- @author: acer-pc
- """
- from matplotlib import pyplot as plt
- from skimage import data
- from skimage.feature import blob_dog,blob_log,blob_doh
- #blob_log,dog,doh are for mathematical computation for image scanning pixel to pixel
- from math import sqrt
- from skimage.color import rgb2gray
- import glob
- from skimage.io import imread
- #fetching image
- example_file = glob.glob(r"C:\\Users\\acer-pc\\Desktop\\lol\\star1.jpg")[0]
- im = imread(example_file,as_grey=True)
- plt.imshow(im)
- plt.show()
- #Count no. of stars
- blobs_log = blob_log(im,max_sigma=30,num_sigma=10,threshold=.1)
- blobs_log[:,2] = blobs_log[:,2] * sqrt(2)
- stars=len(blobs_log)
- print("Number of start counted :" ,stars)
- #Validated whether we caputured all the stars
- fig,ax = plt.subplots(1,1)
- plt.imshow(im)
- for blob in blobs_log:
- y,x,r=blob
- c= plt.Circle((x,y),r+5,color='lime',linewidth=2,fill=False)
- ax.add_patch(c)
- --------------------------------------------------------------------------------------------------------------------------------
Add Comment
Please, Sign In to add comment