Advertisement
Guest User

Untitled

a guest
Jan 23rd, 2020
120
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 20.23 KB | None | 0 0
  1. #%%
  2.  
  3. #%%
  4.  
  5. ################################# CAMERA ANECOICA ############################################
  6.  
  7. import pandas as pd # Pandas dataframe
  8. from scapy.layers.dot11 import Dot11ProbeReq, Dot11ProbeResp, Dot11Beacon, Dot11AssoResp, RadioTap, Dot11Elt
  9. from scapy.packet import ls
  10. from scapy.utils import rdpcap
  11. from sklearn.decomposition import PCA
  12. from sklearn.preprocessing import StandardScaler
  13. import seaborn as sns
  14. import numpy as np
  15. from raw_data.oui import mac_lookup
  16. import os
  17. from datetime import datetime
  18. from sklearn.cluster import DBSCAN
  19.  
  20. #%%
  21.  
  22. MAX_SN = 4096 # Max value for the 802.11 sequence number
  23.  
  24. def extractSN(sc):
  25. hexSC = '0' * (4 - len(hex(sc)[2:])) + hex(sc)[2:] # "normalize" to four digit hexadecimal number
  26. sn = int(hexSC[:-1], 16)
  27. return sn
  28.  
  29.  
  30. '''
  31. addr1 Destination MAC address
  32. addr2 Source MAC address of sender
  33. addr3 MAC address of Access Point
  34. '''
  35.  
  36. #%%
  37.  
  38. def pcapng_to_dataframe(pcap):
  39. packet = []
  40. ap = []
  41. mac_in_chiaro = []
  42. access_point = []
  43. time_list = [p.time for p in pcap]
  44. time_list = np.array(time_list)
  45. time_threshold = time_list.min() + (15*60)
  46. for pkt in pcap:
  47. ##### rimozione primi 5 minuti di scansione ############
  48. if pkt.time > time_threshold:
  49. # if pkt.haslayer(Dot11Beacon or Dot11ProbeResp or Dot11AssoResp):
  50. if pkt.haslayer(Dot11Beacon):
  51. if hasattr(pkt, 'addr2'):
  52. ap.append(pkt.addr2)
  53. # print("source address :", pkt.addr2)
  54. if hasattr(pkt, 'addr1')and pkt.addr1 != 'ff:ff:ff:ff:ff:ff':
  55. mac_in_chiaro.append(pkt.addr1)
  56. # print("mac in chiaro: ", pkt.addr1)
  57. if hasattr(pkt, 'addr3'):
  58. access_point.append(pkt.addr3)
  59. # print("Access Point address :", pkt.addr3)
  60.  
  61. if pkt.haslayer(Dot11ProbeReq):
  62. mac = pkt.addr2
  63. seq = extractSN(pkt.SC)
  64. power = pkt.dBm_AntSignal
  65. # print("pkt", mac)
  66. while pkt:
  67. if ('ID' and 'len' and 'info') in pkt.fields.keys():
  68. packet.append([mac, seq, pkt.fields['ID'], pkt.fields['len'], power])
  69. pkt = pkt.payload
  70.  
  71. pkts_df = pd.DataFrame(packet)
  72. pkts_df = pkts_df.drop_duplicates()
  73. pkts_df.sort_values(by=[0,1])
  74. pkts_df.columns = ['mac', 'seq', 'id', 'len', 'power']
  75.  
  76. ap = set(ap)
  77.  
  78. # print(len(pkts_df.mac.unique()))
  79.  
  80. pkts_df = pkts_df[~pkts_df.mac.isin(ap)]
  81.  
  82. # print(len(pkts_df.mac.unique()))
  83.  
  84. # pkts_df = pkts_df[pkts_df.id != 221] # Wi-Fi Protected Access
  85. # pkts_df = pkts_df[pkts_df.id != 0]
  86. # pkts_df = pkts_df[pkts_df.id != 238]
  87. # prova = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 42, 48, 50]
  88. # pkts_df = pkts_df[pkts_df.id.isin(prova)]
  89.  
  90. return ap, mac_in_chiaro, pkts_df
  91.  
  92.  
  93. #%%
  94.  
  95. # SKIP! SKIP! SKIP! SKIP! SKIP! SKIP! SKIP! SKIP! SKIP! SKIP! SKIP! SKIP!
  96. ###################### DATASET DESCRIPTION ###########################
  97.  
  98. my_dir = "raw_data/01_prova.pcapng"
  99.  
  100. for filename in os.listdir(my_dir):
  101. print("File capture : ", filename)
  102. filename = my_dir+filename
  103.  
  104. pcap = rdpcap(filename)
  105.  
  106. ap, mac_in_chiaro, df = pcapng_to_dataframe(pcap)
  107.  
  108. # DIMENSIONE DEL FILE
  109. file_dimension = os.path.getsize(filename)
  110. print('File dimension :' , round(file_dimension/1024, 2), "KB")
  111.  
  112. # DURATA DELLA SCANSIONE # aggiunti 3600 per compensare l'ora di errore per Linux/Windows
  113. first = round(pcap[0].time + 3600, 2)
  114. last = round(pcap[len(pcap)-1].time + 3600, 2)
  115. first_dateTime = datetime.fromtimestamp(first)
  116. last_dateTime = datetime.fromtimestamp(last)
  117. print("Start capture =", first_dateTime)
  118. print("Stop capture =", last_dateTime)
  119. print("Capture Lenght :", round(last - first, 2), "sec -", round((last - first)/60, 2), "min")
  120.  
  121. print("Number of packets : ", len(df))
  122. print("Number of AP mac : ", len(ap))
  123. print("Number of mac_in_chiaro : ", len(mac_in_chiaro))
  124. print("Number of mac : ", len(df.mac.unique()))
  125.  
  126. ## Randomico o no
  127. df['random'] = df['mac'].apply(lambda x: (bin(int(x[:2], 16))[2:].zfill(8)[6:] in ['10', '11']))
  128.  
  129. print("MAC non randomici: ", len(df[df.random == False].mac.unique()))
  130. print("MAC randomici: ", len(df[df.random == True].mac.unique()))
  131. print("\n")
  132.  
  133. #%%
  134.  
  135. filename = "raw_data/anecoica_20_01/01_prova.pcapng"
  136.  
  137. print("File capture : ", filename)
  138.  
  139. pcap = rdpcap(filename)
  140.  
  141. #%%
  142.  
  143. ap, mac_in_chiaro, df = pcapng_to_dataframe(pcap)
  144.  
  145. #%%
  146.  
  147. ############################ TEST CON TIME AND SEQ ################################
  148.  
  149. def derandomization_with_seq(pcap):
  150. packet = []
  151. ap = []
  152. mac_in_chiaro = []
  153. access_point = []
  154. time_list = [p.time for p in pcap]
  155. time_list = np.array(time_list)
  156. time_threshold = time_list.min() + (15*60)
  157. for pkt in pcap:
  158. ##### rimozione primi 5 minuti di scansione ############
  159. if pkt.time > time_threshold:
  160. # if pkt.haslayer(Dot11Beacon or Dot11ProbeResp or Dot11AssoResp):
  161. if pkt.haslayer(Dot11Beacon):
  162. if hasattr(pkt, 'addr2'):
  163. ap.append(pkt.addr2)
  164. # print("source address :", pkt.addr2)
  165. if hasattr(pkt, 'addr1')and pkt.addr1 != 'ff:ff:ff:ff:ff:ff':
  166. mac_in_chiaro.append(pkt.addr1)
  167. # print("mac in chiaro: ", pkt.addr1)
  168. if hasattr(pkt, 'addr3'):
  169. access_point.append(pkt.addr3)
  170. # print("Access Point address :", pkt.addr3)
  171.  
  172. if pkt.haslayer(Dot11ProbeReq):
  173.  
  174. mac = pkt.addr2
  175. seq = extractSN(pkt.SC)
  176. power = pkt.dBm_AntSignal
  177. # print("pkt", mac)
  178. while pkt:
  179. if ('ID' and 'len' and 'info') in pkt.fields.keys():
  180. packet.append([mac, seq, pkt.fields['ID'], pkt.fields['len'], power, pkt.time])
  181. pkt = pkt.payload
  182.  
  183.  
  184.  
  185. pkts_df = pd.DataFrame(packet)
  186. pkts_df = pkts_df.drop_duplicates()
  187. pkts_df.sort_values(by=[0,1])
  188. pkts_df.columns = ['mac', 'seq', 'id', 'len', 'power', 'time']
  189.  
  190. ap = set(ap)
  191.  
  192. # print(len(pkts_df.mac.unique()))
  193.  
  194. pkts_df = pkts_df[~pkts_df.mac.isin(ap)]
  195.  
  196. # print(len(pkts_df.mac.unique()))
  197.  
  198. # pkts_df = pkts_df[pkts_df.id != 221] # Wi-Fi Protected Access
  199. # pkts_df = pkts_df[pkts_df.id != 0]
  200. # pkts_df = pkts_df[pkts_df.id != 238]
  201. # prova = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 42, 48, 50]
  202. # pkts_df = pkts_df[pkts_df.id.isin(prova)]
  203.  
  204. return ap, mac_in_chiaro, pkts_df
  205.  
  206. ap, mac_in_chiaro, df = derandomization_with_seq(pcap)
  207.  
  208. #%%
  209.  
  210. import matplotlib.pyplot as plt
  211.  
  212. df_prova = df.drop(columns=['id','len','power']).iloc[0:50, :].drop_duplicates() #.iloc[:50, :]
  213.  
  214. sns.catplot(x='time', y='seq', hue='mac', data=df_prova)
  215. plt.show()
  216.  
  217. #%%
  218.  
  219. ############################ ID ELEMENT SELECTION ################################
  220.  
  221. # pkts_df = pkts_df[pkts_df.id != 221] # Wi-Fi Protected Access
  222. # pkts_df = pkts_df[pkts_df.id != 0]
  223. # pkts_df = pkts_df[pkts_df.id != 238]
  224. # prova = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 42, 48, 50]
  225. # pkts_df = pkts_df[pkts_df.id.isin(prova)]
  226.  
  227.  
  228. #%%
  229.  
  230.  
  231. # DIMENSIONE DEL FILE
  232. file_dimension = os.path.getsize(filename)
  233. print('File dimension :' , round(file_dimension/1024, 2), "KB")
  234.  
  235. # DURATA DELLA SCANSIONE # aggiunti 3600 per compensare l'ora di errore per Linux/Windows
  236. first = round(pcap[0].time + 3600, 2)
  237. last = round(pcap[len(pcap)-1].time + 3600, 2)
  238. first_dateTime = datetime.fromtimestamp(first)
  239. last_dateTime = datetime.fromtimestamp(last)
  240. print("Start capture =", first_dateTime)
  241. print("Stop capture =", last_dateTime)
  242. print("Capture Length :", round(last - first, 2), "sec -", round((last - first)/60, 2), "min")
  243.  
  244. print("Number of packets : ", len(df))
  245. print("Number of AP mac : ", len(ap))
  246. print("Number of mac_in_chiaro : ", len(mac_in_chiaro))
  247. print("Number of mac : ", len(df.mac.unique()))
  248.  
  249. ## Randomico o no
  250. df['random'] = df['mac'].apply(lambda x: (bin(int(x[:2], 16))[2:].zfill(8)[6:] in ['10', '11']))
  251.  
  252. print("MAC non randomici: ", len(df[df.random == False].mac.unique()))
  253. print("MAC randomici: ", len(df[df.random == True].mac.unique()))
  254.  
  255.  
  256. #%%
  257.  
  258. df_norand = df[df.random == False]
  259. df_rand = df[df.random == True]
  260.  
  261. print("Non randomici: \n", df_norand.sample(10))
  262.  
  263.  
  264. #%%
  265.  
  266. ######################### PANORAMICA DATASET NON RANDOMICI ##################################
  267.  
  268. df_norand_vendor = pd.DataFrame(df_norand)
  269. df_norand_vendor['vendor'] = [str(mac_lookup(x)) for x in df_norand_vendor.mac]
  270.  
  271. print(df_norand_vendor.head())
  272.  
  273. sns.catplot(y='vendor', kind="count", data=df_norand_vendor)
  274.  
  275. #%%
  276.  
  277. ######################### PANORAMICA DATASET RANDOMICI ##################################
  278.  
  279.  
  280. df_rand_vendor = pd.DataFrame(df_rand)
  281. df_rand_vendor['vendor'] = [str(mac_lookup(x)) for x in df_rand_vendor.mac]
  282.  
  283. print(df_rand_vendor.head())
  284.  
  285. sns.catplot(y='vendor', kind="count", data=df_rand_vendor)
  286.  
  287. #%%
  288.  
  289. print(df_rand.drop(columns=['seq', 'power', 'time','random']).iloc[0:50, :].drop_duplicates())
  290.  
  291.  
  292. #%%
  293.  
  294. # print(df_test_seq[(df_test_seq.id == 50 & df_test_seq.len == 0) | (df_test_seq.id == 0 & df_test_seq.len == 0)].drop(columns=['seq', 'power', 'time']).iloc[0:50, :].drop_duplicates())
  295.  
  296. print(df_rand.loc[(df_rand['id'] == 50) & (df_rand['len'] == 4)].mac.unique())
  297.  
  298. mac_list = df_rand.loc[(df_rand['id'] == 50) & (df_rand['len'] == 4)].mac.unique()
  299.  
  300. #%%
  301.  
  302. df_mac_list = df_rand.loc[(df_rand['id'] == 50) & (df_rand['len'] == 4)]
  303.  
  304. #%%
  305.  
  306. sns.catplot(x='time', y='seq', hue='mac', data= df_mac_list)
  307.  
  308.  
  309. #%%
  310. df = df_rand
  311.  
  312. for mac in df.mac.unique():
  313. # print(mac, df.sort_values(by='id')[df.mac == mac][['id','len']])
  314. df_temp = df.sort_values(by='id')[df.mac == mac][['seq', 'id', 'len','power']]
  315. df_temp = df_temp.drop_duplicates()
  316. # print(mac, "\n", df_temp.values)
  317.  
  318. # Computing how many entries there are for each mac_address
  319. df_temp = df.groupby(df['id']).count()
  320.  
  321. # Discarding mac_addresses where there is only one value
  322. df = df[~df['id'].isin(df_temp[df_temp.mac.isin([1])].index)]
  323.  
  324. df_temp = df[['mac','id','len','power']].sort_values(by=['mac','id','len','power']).drop_duplicates(keep='first')
  325. df_temp.groupby('mac').count().sort_values(by='id')
  326.  
  327. print(df_temp)
  328.  
  329. #%%
  330.  
  331. total = []
  332. for mac in df_temp.mac.unique():
  333. lista= []
  334. for id in df_temp.id.unique():
  335. # print(df_temp[(df_temp['id']==id) & (df_temp['mac']==mac)].len)
  336. if df_temp[(df_temp['id']==id) & (df_temp['mac']==mac)].len.empty:
  337. lista.append(0)
  338. else:
  339. lista.append(df_temp.loc[(df_temp['id']==id) & (df_temp['mac']==mac), 'len'].values)
  340. seq_mean = df_temp[df_temp['mac']==mac].mean()
  341. total.append(lista)
  342.  
  343. #%%
  344.  
  345. df_new = pd.DataFrame(total)
  346. df_new.index = df_temp.mac.unique()
  347. df_new.columns = ["id_"+str(x) for x in df_temp.id.unique()]
  348.  
  349. print(df_new.head())
  350.  
  351. #%%
  352.  
  353. # view the dataset
  354. print(df_new.head(3))
  355.  
  356. df_new_expanded = df_new
  357.  
  358. for column in df_new.columns:
  359. # if len(column) >= 2:
  360. # print("poba")
  361. # expand df_new_expanded.tags into its own dataframe
  362. tags = df_new[column].apply(pd.Series)
  363. if tags.shape[1] > 1:
  364. # rename each variable is tags
  365. tags = tags.rename(columns = lambda x : column + "_" + str(x)).fillna(0)
  366. # print("\n TAGS: \n", tags.head(3))
  367. # join the tags dataframe back to the original dataframe
  368. df_new_expanded = pd.concat([df_new_expanded.loc[:, df_new_expanded.columns != column], tags[:]], axis=1)
  369. # print("\n df: \n", df_new_expanded.head(3))
  370. # drop the original column
  371. # df_new_expanded = df_new_expanded.drop(columns=[column])
  372.  
  373. df_new_expanded = df_new_expanded.astype(int)
  374.  
  375. print(df_new_expanded.head(3))
  376.  
  377. #%%
  378.  
  379. ################### DROPPING COLUMNS WHERE VALUES ARE ALL EQUAL ####################
  380.  
  381. df_ichis = df_new_expanded
  382.  
  383. print(df_ichis.head(3))
  384. print("\n\nN_FEATURES = ", len(df_ichis.columns))
  385.  
  386. nunique = df_ichis.apply(pd.Series.nunique)
  387. cols_to_drop = nunique[nunique == 1].index
  388. df_ichis = df_ichis.drop(cols_to_drop, axis=1)
  389.  
  390. print("\n\nN_FEATURES = ", len(df_ichis.columns))
  391.  
  392.  
  393. print(df_ichis)
  394.  
  395.  
  396.  
  397. #%%
  398.  
  399. ############### TAKING IN ACCOUNT ONLY COLUMNS WHERE THERE ARE MINIMUM 23 VALUES ##################
  400.  
  401. indici = np.argwhere(np.count_nonzero(df_ichis, axis=0)>23)
  402. idxcolumn = np.hstack(indici)
  403.  
  404. print(idxcolumn)
  405. print(df_ichis.iloc[:, idxcolumn])
  406. df_tantesperanze = df_ichis.iloc[:, idxcolumn]
  407.  
  408. #%%
  409.  
  410. f = df_tantesperanze.iloc[:, 0]
  411.  
  412. #%%
  413.  
  414. sns.distplot(f[f!=0], bins=10)
  415.  
  416.  
  417. #%%
  418.  
  419. for column in df_tantesperanze.columns:
  420. f = df_tantesperanze.loc[:, column]
  421. sns.distplot(f)
  422. f.plot.hist(bins=12, alpha=0.5)
  423.  
  424.  
  425. #%%
  426.  
  427. ################################## DBASCAN TEST PARAMETERS ####################################
  428. df_ichis = df_tantesperanze
  429.  
  430. X = StandardScaler().fit_transform(df_ichis)
  431.  
  432. print("\n\nN_FEATURES = ", len(df_ichis.columns))
  433. print("\n\nN_SAMPLES = ", len(df_ichis))
  434.  
  435. for min_samples in range(2,6,1):
  436. for eps in range(1,100,1):
  437. eps = eps/100
  438. # Compute DBSCAN
  439. db = DBSCAN(eps=eps, min_samples=min_samples).fit(X)
  440. core_samples_mask = np.zeros_like(db.labels_, dtype=bool)
  441. core_samples_mask[db.core_sample_indices_] = True
  442. labels = db.labels_
  443. # Number of clusters in labels, ignoring noise if present.
  444. n_clusters_ = len(set(labels)) - (1 if -1 in labels else 0)
  445. #print("\nPARAMETERS: eps = {:f} min_samples = {:f}".format(eps, min_samples))
  446. if n_clusters_ > 12:
  447. print("N_CLUSTERS = ", n_clusters_)
  448. print("Parametri: eps = {eps} min_samples = {min_samples}".format(eps=eps, min_samples=min_samples))
  449.  
  450.  
  451. #%%
  452.  
  453. ############################ OPTICS ###########################################
  454.  
  455. from sklearn.cluster import OPTICS, cluster_optics_dbscan
  456. import matplotlib.gridspec as gridspec
  457. import matplotlib.pyplot as plt
  458.  
  459.  
  460. df_ichis = df_tantesperanze
  461.  
  462. X = StandardScaler().fit_transform(df_ichis)
  463.  
  464.  
  465. clust = OPTICS(min_samples=2, xi=0.1, min_cluster_size=0.001)
  466.  
  467. # Run the fit
  468. clust.fit(X)
  469. eps_1 = 2
  470. labels_050 = cluster_optics_dbscan(reachability=clust.reachability_,
  471. core_distances=clust.core_distances_,
  472. ordering=clust.ordering_, eps=eps_1)
  473. eps_2 = 0.1
  474. labels_200 = cluster_optics_dbscan(reachability=clust.reachability_,
  475. core_distances=clust.core_distances_,
  476. ordering=clust.ordering_, eps=eps_2)
  477.  
  478. space = np.arange(len(X))
  479. reachability = clust.reachability_[clust.ordering_]
  480. labels = clust.labels_[clust.ordering_]
  481.  
  482. plt.figure(figsize=(20, 14))
  483. G = gridspec.GridSpec(2, 3)
  484. ax1 = plt.subplot(G[0, :])
  485. ax2 = plt.subplot(G[1, 0])
  486. ax3 = plt.subplot(G[1, 1])
  487. ax4 = plt.subplot(G[1, 2])
  488.  
  489.  
  490. import random
  491.  
  492. n_clusters = len(set(clust.labels_)) - (1 if -1 in clust.labels_ else 0)
  493. # Reachability plot
  494. for klass in range(0, n_clusters):
  495. r = random.randint(0,255)
  496. g = random.randint(0,255)
  497. b = random.randint(0,255)
  498. rgb = [r,g,b]
  499. Xk = space[labels == klass]
  500. Rk = reachability[labels == klass]
  501. ax1.plot(Xk, Rk, rgb, alpha=0.3)
  502. ax1.plot(space[labels == -1], reachability[labels == -1], 'k.', alpha=0.3)
  503. ax1.plot(space, np.full_like(space, float(eps_1), dtype=float), 'k-', alpha=0.5)
  504. ax1.plot(space, np.full_like(space, float(eps_2), dtype=float), 'k-.', alpha=0.5)
  505. ax1.set_ylabel('Reachability (epsilon distance)')
  506. ax1.set_title('Reachability Plot')
  507. ax1.set_ylim(-0.1, 2)
  508.  
  509.  
  510. # OPTICS
  511. colors = ['g.', 'r.', 'b.', 'y.', 'c.']
  512. for klass, color in zip(range(0, 5), colors):
  513. Xk = X[clust.labels_ == klass]
  514. ax2.plot(Xk[:, 0], Xk[:, 1], color, alpha=0.3)
  515. ax2.plot(X[clust.labels_ == -1, 0], X[clust.labels_ == -1, 1], 'k+', alpha=0.1)
  516. n_clusters = len(set(clust.labels_)) - (1 if -1 in clust.labels_ else 0)
  517. ax2.set_title('cluster {} Automatic Clustering\nOPTICS xi'.format(n_clusters))
  518.  
  519. # DBSCAN at 0.5
  520. colors = ['g', 'greenyellow', 'olive', 'r', 'b', 'c']
  521. for klass, color in zip(range(0, 6), colors):
  522. Xk = X[labels_050 == klass]
  523. ax3.plot(Xk[:, 0], Xk[:, 1], color, alpha=0.3, marker='.')
  524. ax3.plot(X[labels_050 == -1, 0], X[labels_050 == -1, 1], 'k+', alpha=0.1)
  525. n_clusters = len(set(labels_050)) - (1 if -1 in labels_050 else 0)
  526. ax3.set_title('cluster {} Clustering at {} epsilon cut\nDBSCAN'.format(n_clusters,eps_1))
  527.  
  528. # DBSCAN at 2.
  529. colors = ['g.', 'm.', 'y.', 'c.']
  530. for klass, color in zip(range(0, 4), colors):
  531. Xk = X[labels_200 == klass]
  532. ax4.plot(Xk[:, 0], Xk[:, 1], color, alpha=0.3)
  533. ax4.plot(X[labels_200 == -1, 0], X[labels_200 == -1, 1], 'k+', alpha=0.1)
  534. n_clusters = len(set(labels_200)) - (1 if -1 in labels_200 else 0)
  535. ax4.set_title('cluster {} Clustering at {} epsilon cut\nDBSCAN'.format(n_clusters,eps_2))
  536. plt.savefig('f2')
  537. plt.tight_layout()
  538. plt.show()
  539.  
  540. #%%
  541.  
  542. from sklearn.cluster import OPTICS, cluster_optics_dbscan
  543.  
  544. df_ichis = df_tantesperanze
  545.  
  546. X = StandardScaler().fit_transform(df_ichis)
  547.  
  548. np.seterr(divide='ignore')
  549.  
  550. for min_cluster_size in range(1,10,1):
  551. min_cluster_size = min_cluster_size/10
  552. for xi in range(1,100,1):
  553. xi = xi/100
  554. clust = OPTICS(min_samples=2, xi=xi, min_cluster_size=min_cluster_size)
  555.  
  556. # Run the fit
  557. clust.fit(X)
  558. eps_1 = 2
  559. labels_050 = cluster_optics_dbscan(reachability=clust.reachability_,
  560. core_distances=clust.core_distances_,
  561. ordering=clust.ordering_, eps=eps_1)
  562. eps_2 = 0.1
  563. labels_200 = cluster_optics_dbscan(reachability=clust.reachability_,
  564. core_distances=clust.core_distances_,
  565. ordering=clust.ordering_, eps=eps_2)
  566.  
  567. n_clusters = len(set(clust.labels_)) - (1 if -1 in clust.labels_ else 0)
  568.  
  569. if n_clusters == 14:
  570. # print("N_CLUSTERS = ", n_clusters)
  571. print("Parametri: xi = {xi} min_cluster_size = {min_cluster_size}".format(xi=xi, min_cluster_size=min_cluster_size))
  572.  
  573. #%%
  574.  
  575.  
  576. ########################### PCA ######################################
  577. df_ichis = df_tantesperanze
  578.  
  579. pca = PCA(n_components=2)
  580. print(df_ichis.head(3))
  581.  
  582. X = StandardScaler().fit_transform(df_ichis)
  583. principal_components = pca.fit_transform(X)
  584. df_pca = pd.DataFrame(data = principal_components,columns=['component1', 'component2'])
  585.  
  586. print(df_pca.head(5))
  587.  
  588. etichette_optics = clust.labels_
  589.  
  590. #%%
  591.  
  592. df_pca.plot()
  593.  
  594. sns.scatterplot(df_pca)
  595.  
  596. #%%
  597.  
  598. plt.scatter(df_pca['component1'], df_pca['component2'], c= etichette_optics, s=50, alpha=0.5)
  599. # plt.scatter(centroids[:, 0], centroids[:, 1], c='red', s=50)
  600. # plt.ylim(-5, 25)
  601. # plt.xlim(-5, 40)
  602.  
  603. #%%
  604.  
  605. ############################# K-MEANS ####################################################
  606.  
  607. from pandas import DataFrame
  608. import matplotlib.pyplot as plt
  609. from sklearn.cluster import KMeans
  610.  
  611. # Data = {'x': [25,34,22,27,33,33,31,22,35,34,67,54,57,43,50,57,59,52,65,47,49,48,35,33,44,45,38,43,51,46],
  612. # 'y': [79,51,53,78,59,74,73,57,69,75,51,32,40,47,53,36,35,58,59,50,25,20,14,12,20,5,29,27,8,7]
  613. # }
  614. #
  615. #df_kmeans = DataFrame(Data,columns=['x','y'])
  616.  
  617.  
  618. print(df_pca.head(5))
  619.  
  620. df_kmeans = df_pca
  621.  
  622. kmeans = KMeans(n_clusters=23).fit(df_ichis)
  623. centroids = kmeans.cluster_centers_
  624. print(centroids)
  625. print(kmeans.labels_)
  626.  
  627.  
  628. plt.scatter(df_kmeans['component1'], df_kmeans['component2'], c= kmeans.labels_.astype(float), s=50, alpha=0.5)
  629. plt.scatter(centroids[:, 0], centroids[:, 1], c='red', s=50)
  630. plt.ylim(-5, 25)
  631.  
  632. plt.xlim(-5, 40)
  633.  
  634. #%%
  635.  
  636. time_list = [p.time for p in pcap]
  637.  
  638. time_list = np.array(time_list)
  639.  
  640. min_time = time_list.min()
  641. time_threshold = time_list.min() + (5*60)
  642.  
  643.  
  644. #%%
  645.  
  646. [str(mac_lookup(x)) for x in ap]
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement