shridharshetti

Machine Learning Full Courses

Mar 1st, 2019
23
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 13.97 KB | None | 0 0
  1. # -*- coding: utf-8 -*-
  2. """
  3. Spyder Editor
  4.  
  5. This is a temporary script file.
  6. """
  7.  
  8. import numpy as np
  9. import pandas as pd
  10.  
  11. #read csv file
  12.  
  13. dataset = pd.read_csv("C:\\Users\\acer-pc\\Desktop\\lol\\14_page_p1s2_file_2\\Data_Preprocessing\\Data.csv")
  14. dataset
  15.  
  16. #seperating independent columns
  17. X=dataset.iloc[:,:-1].values
  18. X
  19.  
  20. #seperaating dependent columns
  21. Y=dataset.iloc[:,3:].values
  22. Y
  23.  
  24. #exercise
  25. #z=dataset.iloc[0:4,:].values
  26. #z
  27.  
  28. #taking care of missing data
  29. from sklearn.preprocessing import Imputer
  30. imputer = Imputer(missing_values="NaN",strategy='mean',axis=0)
  31. imputer = imputer.fit(X[:,1:3])
  32. X[:,1:3] = imputer.transform(X[:,1:3])
  33. X
  34.  
  35. #encoding catagorial data
  36. from sklearn.preprocessing import LabelEncoder,OneHotEncoder
  37. labelencoder_x=LabelEncoder()
  38. X[:,0] = labelencoder_x.fit_transform(X[:,0])
  39. onehotencoder=OneHotEncoder()
  40. categorical_features=[0]
  41. X = onehotencoder.fit_transform(X).toarray()
  42.  
  43. #for dependent variable
  44. labelencoder_y=LabelEncoder()
  45. Y=labelencoder_y.fit_transform(Y)
  46.  
  47. #testing and training
  48. from sklearn.model_selection import train_test_split
  49.  
  50. X_train,X_test,Y_train,Y_test=train_test_split(X,Y,test_size=0.2,random_state=0)
  51.  
  52.  
  53. #Feature Scaling
  54. from sklearn.preprocessing import StandardScaler
  55. sc_x=StandardScaler()
  56. X_train=sc_x.fit_transform(X_train)
  57.  
  58. X_text=sc_x.transform(X_test)
  59.  
  60.  
  61.  
  62. ----------------------------------------------------------------------------------------------------------------------------------------------------
  63.  
  64.  
  65.  
  66. # -*- coding: utf-8 -*-
  67. """
  68. Created on Mon Feb 25 16:14:49 2019
  69.  
  70. @author: acer-pc
  71. """
  72.  
  73. # -*- coding: utf-8 -*-
  74. """
  75. Spyder Editor
  76.  
  77. This is a temporary script file.
  78. """
  79.  
  80. import numpy as np
  81. import pandas as pd
  82.  
  83. #read csv file
  84.  
  85. dataset1 = pd.read_csv("C:\\Users\\acer-pc\\Desktop\\lol\\14_page_p2s4_file_1 (1)\\Simple_Linear_Regression\\Salary_Data.csv")
  86. dataset1
  87.  
  88. #seperating independent columns (Number of years of experience)
  89. X1=dataset1.iloc[:,:-1].values
  90. X1
  91.  
  92. #seperaating dependent columns (salary)
  93. Y1=dataset1.iloc[:,1].values
  94. Y1
  95.  
  96. #testing and training
  97. from sklearn.model_selection import train_test_split
  98. X1_train,X1_test,Y1_train,Y1_test=train_test_split(X1,Y1,test_size=0.2,random_state=0)
  99.  
  100. #linear Regression
  101. from sklearn.linear_model import LinearRegression
  102. regressor=LinearRegression()
  103. regressor.fit(X1_train,Y1_train)
  104.  
  105. #Predicting results
  106. y_pred = regressor.predict(20.0)
  107. y_pred
  108.  
  109. -------------------------------------------------------------------------------------------------------------
  110.  
  111.  
  112. # -*- coding: utf-8 -*-
  113. """
  114. Created on Tue Feb 26 10:50:30 2019
  115.  
  116. @author: acer-pc
  117. """
  118. import numpy as np
  119. import pandas as pd
  120. import matplotlib.pyplot as plt
  121.  
  122. dataset2 = pd.read_csv("C:\\Users\\acer-pc\\Desktop\\lol\\Polynomial_Regression\\Position_Salaries.csv")
  123. dataset2
  124.  
  125. #seperating independent columns
  126. X2=dataset2.iloc[:,1:2].values
  127. X2
  128.  
  129. #seperaating dependent columns
  130. Y2=dataset2.iloc[:,2:].values
  131. Y2
  132.  
  133. #testing and training
  134. from sklearn.model_selection import train_test_split
  135. X2_train,X2_test,Y2_train,Y2_test=train_test_split(X2,Y2,test_size=0.25,random_state=0)
  136.  
  137.  
  138. #linear Regression
  139. from sklearn.linear_model import LinearRegression
  140. regressor=LinearRegression()
  141. regressor.fit(X2_train,Y2_train)
  142.  
  143.  
  144. #Visualizig the Linear Regression
  145. plt.scatter(X2,Y2,color='red')
  146. plt.plot(X2,regressor.predict(X2),color='blue')
  147. plt.title('Truth or Bluff(LinearRegression)')
  148. plt.xlabel('Position level')
  149. plt.ylabel('Salary')
  150. plt.show()
  151.  
  152.  
  153. #fitting polynomial regression
  154. from sklearn.preprocessing import PolynomialFeatures
  155. from sklearn.linear_model import LinearRegression
  156. poly_reg = PolynomialFeatures(degree = 4) #by default degree is 2.
  157. X2_poly = poly_reg.fit_transform(X2)
  158. poly_reg.fit(X2_poly,Y2)
  159. lin_reg_2 = LinearRegression()
  160. lin_reg_2.fit(X2_poly,Y2)
  161.  
  162.  
  163. #Visualizing the polynomial regression
  164. plt.scatter(X2,Y2,color='red')
  165. plt.plot(X2,lin_reg_2.predict(poly_reg.fit_transform(X2)),color='blue')
  166. plt.title('Truth or bluff (polynomial Regression)')
  167. plt.xlabel('position level')
  168. plt.show()
  169.  
  170. --------------------------------------------------------------------------------------------
  171. # -*- coding: utf-8 -*-
  172. """
  173. Created on Tue Feb 26 14:37:14 2019
  174.  
  175. @author: acer-pc
  176. """
  177.  
  178. # -*- coding: utf-8 -*-
  179. """
  180. Spyder Editor
  181.  
  182. This is a temporary script file.
  183. """
  184.  
  185. import numpy as np
  186. import pandas as pd
  187. import matplotlib.pyplot as plt
  188. #read csv file
  189.  
  190. dataset = pd.read_csv("C:\\Users\\acer-pc\\Desktop\\lol\\K_Nearest_Neighbors\\Social_Network_Ads.csv")
  191. dataset
  192.  
  193. #seperating independent columns
  194. X=dataset.iloc[:,[2,3]].values
  195. X
  196.  
  197. #seperaating dependent columns
  198. Y=dataset.iloc[:,4:].values
  199. Y
  200.  
  201.  
  202. #testing and training
  203. from sklearn.model_selection import train_test_split
  204. X_train,X_test,Y_train,Y_test=train_test_split(X,Y,test_size=0.25,random_state=0)
  205.  
  206.  
  207. #Feature Scaling for x
  208. from sklearn.preprocessing import StandardScaler
  209. sc_x=StandardScaler()
  210. X_train=sc_x.fit_transform(X_train)
  211. X_test=sc_x.transform(X_test)
  212.  
  213. #fitting KNeighbor algorithm for Training set
  214. from sklearn.neighbors import KNeighborsClassifier
  215.  
  216. classifier=KNeighborsClassifier(n_neighbors=5,metric='minkowski' , p=2)
  217. classifier.fit(X_train,Y_train)
  218.  
  219. #Predicting results
  220. y_pred = classifier.predict(X_test)
  221.                            
  222. y_pred
  223.  
  224. #making confussion matric
  225.  
  226. from sklearn.metrics import confusion_matrix
  227.  
  228. cm=confusion_matrix(Y_test,y_pred)
  229.  
  230.  
  231. #plotting graph
  232.  
  233. from matplotlib.colors import ListedColormap
  234. X_set,Y_set=X_train,Y_train
  235.  
  236. X1,X2=np.meshgrid(np.arange(start=X_set[:,0].min()-1,stop=X_set[:,0].max()+1,step=0.01),
  237.                   np.arange(start=X_set[:,1].min()-1,stop=X_set[:,1].max()+1,step=0.01))
  238.  
  239. plt.contourf(X1,X2,classifier.predict(np.array([X1.ravel(),X2.ravel()]).T).reshape(X1.shape), alpha=0.75,cmap=ListedColormap(('red','green')))
  240.  
  241. plt.xlim(X1.min(),X1.max())
  242. plt.ylim(X2.min(),X2.max())
  243.  
  244. for i,j in enumerate(np.unique(Y_set)):
  245.     plt.scatter(X_set[Y_set==j,0],X_set[Y_set==j,1],
  246.     c=ListedColormap(('red','green'))(i),label=j)
  247. plt.title('K-NN(Training set)')
  248.  
  249. plt.ylabel('Estimated Salary')
  250. plt.legend()
  251. plt.show()
  252.  
  253.  
  254. ----------------------------------------------------------------------------------------------------------------------------------
  255.  
  256. import numpy as np
  257. import pandas as pd
  258. import matplotlib.pyplot as plt
  259.  
  260.  
  261. #read csv file
  262. dataset = pd.read_csv("C:\\Users\\acer-pc\\Desktop\\lol\\P14-K-Means\K_Means\\Mall_Customers.csv")
  263. dataset
  264.  
  265. #seperating independent columns
  266. X=dataset.iloc[:,[3,4]].values
  267. X
  268.  
  269. #using the elbow method to find the optimal number
  270. from sklearn.cluster import KMeans
  271. wcss=[]
  272. for i in range(1,11):
  273.     kmeans = KMeans(n_clusters = i, init = 'k-means++' , random_state=42)
  274.     kmeans.fit(X)
  275.     wcss.append(kmeans.inertia_)
  276. plt.plot(range(1,11),wcss)
  277. plt.title('The elbow method')
  278. plt.xlabel('Number of cluster')
  279. plt.ylabel('WCSS')
  280. plt.show()
  281.  
  282. #Fitting K-Means to the dataset
  283. kmeans = KMeans(n_clusters = 5, init = 'k-means++',random_state=42)
  284. y_kmeans=kmeans.fit_predict(X)
  285.  
  286. #Visualizing cluster
  287. plt.scatter(X[y_kmeans == 0, 0], X[y_kmeans == 0, 1], s = 100, c = 'red', label = 'Cluster 1')
  288. plt.scatter(X[y_kmeans == 1, 0], X[y_kmeans == 1, 1], s = 100, c = 'blue', label = 'Cluster 2')
  289. plt.scatter(X[y_kmeans == 2, 0], X[y_kmeans == 2, 1], s = 100, c = 'green', label = 'Cluster 3')
  290. plt.scatter(X[y_kmeans == 3, 0], X[y_kmeans == 3, 1], s = 100, c = 'cyan', label = 'Cluster 4')
  291. plt.scatter(X[y_kmeans == 4, 0], X[y_kmeans == 4, 1], s = 100, c = 'magenta', label = 'Cluster 5')
  292. plt.scatter(kmeans.cluster_centers_[:, 0], kmeans.cluster_centers_[:, 1], s = 300, c = 'yellow', label = 'Centroids')
  293. plt.title('Clusters of customers')
  294. plt.xlabel('Annual Income (k$)')
  295. plt.ylabel('Spending Score (1-100)')
  296. plt.legend()
  297. plt.show()
  298.  
  299.  
  300. #taking care of missing data
  301. from sklearn.preprocessing import Imputer
  302. imputer = Imputer(missing_values="NaN",strategy='mean',axis=0)
  303. imputer = imputer.fit(X[:,1:3])
  304. X[:,1:3] = imputer.transform(X[:,1:3])
  305. X
  306.  
  307. #encoding catagorial data
  308. from sklearn.preprocessing import LabelEncoder,OneHotEncoder
  309. labelencoder_x=LabelEncoder()
  310. X[:,0] = labelencoder_x.fit_transform(X[:,0])
  311. onehotencoder=OneHotEncoder()
  312. categorical_features=[0]
  313. X = onehotencoder.fit_transform(X).toarray()
  314.  
  315. #for dependent variable
  316. labelencoder_y=LabelEncoder()
  317. Y=labelencoder_y.fit_transform(Y)
  318.  
  319. #splitting dataset testing and training
  320. from sklearn.model_selection import train_test_split
  321. X_train,X_test,Y_train,Y_test=train_test_split(X,Y,test_size=0.2,random_state=0)
  322.  
  323.  
  324. #Feature Scaling
  325. from sklearn.preprocessing import StandardScaler
  326. sc_x=StandardScaler()
  327. X_train=sc_x.fit_transform(X_train)
  328. X_test=sc_x.transform(X_test)
  329.  
  330.  
  331. #linear Regression
  332. from sklearn.linear_model import LinearRegression
  333. regressor=LinearRegression()
  334. regressor.fit(X_train,Y_train)
  335.  
  336. #Predicting results
  337. y_pred = regressor.predict(X_test)
  338.                            
  339. y_pred
  340.  
  341. ------------------------------------------------------------------------------------------------------------------------------
  342.  
  343. # -*- coding: utf-8 -*-
  344. """
  345. Created on Wed Feb 27 11:50:53 2019
  346.  
  347. @author: acer-pc
  348. """
  349.  
  350. import numpy as np
  351. import pandas as pd
  352. import matplotlib.pyplot as plt
  353. #read csv file
  354.  
  355. dataset = pd.read_csv("C:\\Users\\acer-pc\\Desktop\\lol\\P14-SVM (1)\\SVM\\Social_Network_Ads.csv")
  356. dataset
  357.  
  358. #seperating independent columns
  359. X=dataset.iloc[:,[2,3]].values
  360. X
  361.  
  362. #seperaating dependent columns
  363. Y=dataset.iloc[:,4].values
  364. Y
  365.  
  366. #testing and training
  367. from sklearn.model_selection import train_test_split
  368. X_train,X_test,Y_train,Y_test=train_test_split(X,Y,test_size=0.25,random_state=0)
  369.  
  370.  
  371. #Feature Scaling
  372. from sklearn.preprocessing import StandardScaler
  373. sc_x=StandardScaler()
  374. X_train=sc_x.fit_transform(X_train)
  375. X_test=sc_x.transform(X_test)
  376.  
  377. #Fitting SVM to the training set
  378. from sklearn.svm import SVC
  379. classifier = SVC(kernel='rbf',random_state=0)
  380. classifier.fit(X_train,Y_train)
  381.  
  382. #Predicting results
  383. y_pred = classifier.predict(X_test)
  384.                            
  385. y_pred
  386.  
  387. #making confusion matrix
  388. from sklearn.metrics import confusion_matrix
  389. cm=confusion_matrix(Y_test,y_pred)
  390.  
  391. #plotting the grapgh
  392. from matplotlib.colors import ListedColormap
  393. X_set, Y_set = X_test, Y_test
  394. X1, X2 = np.meshgrid(np.arange(start = X_set[:, 0].min() - 1, stop = X_set[:, 0].max() + 1, step = 0.01),
  395.                      np.arange(start = X_set[:, 1].min() - 1, stop = X_set[:, 1].max() + 1, step = 0.01))
  396. plt.contourf(X1, X2, classifier.predict(np.array([X1.ravel(), X2.ravel()]).T).reshape(X1.shape),
  397.              alpha = 0.75, cmap = ListedColormap(('red', 'green')))
  398. plt.xlim(X1.min(), X1.max())
  399. plt.ylim(X2.min(), X2.max())
  400. for i, j in enumerate(np.unique(Y_set)):
  401.     plt.scatter(X_set[Y_set == j, 0], X_set[Y_set == j, 1],
  402.                 c = ListedColormap(('red', 'green'))(i), label = j)
  403. plt.title('SVM (Test set)')
  404. plt.xlabel('Age')
  405. plt.ylabel('Estimated Salary')
  406. plt.legend()
  407. plt.show()
  408.  
  409.  
  410.  
  411. ------------------------------------------------------------------------------------------------------------------------------
  412.  
  413.  
  414. # Natural Language Processing
  415.  
  416. # Importing the libraries
  417. import numpy as np
  418. import matplotlib.pyplot as plt
  419. import pandas as pd
  420. import nltk
  421.  
  422. # Importing the dataset
  423. dataset = pd.read_csv('C:\\Users\\acer-pc\\Desktop\\lol\\P14-Natural-Language-Processing\\Natural_Language_Processing\\Restaurant_Reviews.tsv', delimiter = '\t', quoting = 3)
  424. dataset
  425. # Cleaning the texts
  426. import re
  427. import nltk
  428. nltk.download('stopwords')
  429. from nltk.corpus import stopwords
  430. from nltk.stem.porter import PorterStemmer
  431. corpus = []
  432. for i in range(0, 1000):
  433.     review = re.sub('[^a-zA-Z]', ' ', dataset['Review'][i])
  434.     review = review.lower()
  435.    
  436.     review = review.split()
  437.     ps = PorterStemmer()            
  438.     review = [ps.stem(word) for word in review if not word in set(stopwords.words('english'))]
  439.     review = ' '.join(review)
  440.     corpus.append(review)
  441.  
  442. # Creating the Bag of Words model
  443. from sklearn.feature_extraction.text import CountVectorizer
  444. cv = CountVectorizer(max_features = 1500)
  445. X = cv.fit_transform(corpus).toarray()
  446. y = dataset.iloc[:, 1].values
  447.  
  448. # Splitting the dataset into the Training set and Test set
  449. from sklearn.cross_validation import train_test_split
  450. X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.20, random_state = 0)
  451.  
  452. """# Fitting Naive Bayes to the Training set
  453. from sklearn.naive_bayes import GaussianNB
  454. classifier = GaussianNB()
  455. classifier.fit(X_train, y_train)"""
  456.  
  457. #Fitting SVM to the training set
  458. from sklearn.svm import SVC
  459. classifier = SVC(kernel='rbf',random_state=0)
  460. classifier.fit(X_train,y_train)
  461.  
  462.  
  463. # Predicting the Test set results
  464. y_pred = classifier.predict(X_test)
  465.  
  466. # Making the Confusion Matrix
  467. from sklearn.metrics import confusion_matrix
  468. cm = confusion_matrix(y_test, y_pred)
  469.  
  470. ------------------------------------------------------------------------------------------------------------------------------------
  471.  
  472. # -*- coding: utf-8 -*-
  473. """
  474. Created on Wed Feb 27 15:11:27 2019
  475.  
  476. @author: acer-pc
  477. """
  478. from matplotlib import pyplot as plt
  479. from skimage import data
  480. from skimage.feature import blob_dog,blob_log,blob_doh
  481. #blob_log,dog,doh are for mathematical computation for image scanning pixel to pixel
  482. from math import sqrt
  483. from skimage.color import rgb2gray
  484. import glob
  485. from skimage.io import imread
  486.  
  487. #fetching image
  488. example_file = glob.glob(r"C:\\Users\\acer-pc\\Desktop\\lol\\star1.jpg")[0]
  489. im = imread(example_file,as_grey=True)
  490. plt.imshow(im)
  491. plt.show()
  492.  
  493. #Count no. of stars
  494. blobs_log = blob_log(im,max_sigma=30,num_sigma=10,threshold=.1)
  495. blobs_log[:,2] = blobs_log[:,2] * sqrt(2)
  496. stars=len(blobs_log)
  497. print("Number of start counted :" ,stars)
  498.  
  499. #Validated whether we caputured all the stars
  500. fig,ax = plt.subplots(1,1)
  501. plt.imshow(im)
  502. for blob in blobs_log:
  503.     y,x,r=blob
  504.     c= plt.Circle((x,y),r+5,color='lime',linewidth=2,fill=False)
  505.     ax.add_patch(c)
  506.    
  507.  
  508.   --------------------------------------------------------------------------------------------------------------------------------
Add Comment
Please, Sign In to add comment