Advertisement
Guest User

Untitled

a guest
Mar 20th, 2019
65
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 35.47 KB | None | 0 0
  1. {
  2. "cells": [
  3. {
  4. "cell_type": "code",
  5. "execution_count": 88,
  6. "metadata": {},
  7. "outputs": [],
  8. "source": [
  9. "import numpy as np\n",
  10. "import pandas as pd\n",
  11. "import matplotlib.pyplot as plt\n",
  12. "%matplotlib inline"
  13. ]
  14. },
  15. {
  16. "cell_type": "code",
  17. "execution_count": 89,
  18. "metadata": {},
  19. "outputs": [],
  20. "source": [
  21. "df = pd.read_csv('a1 (1).csv')"
  22. ]
  23. },
  24. {
  25. "cell_type": "code",
  26. "execution_count": 90,
  27. "metadata": {},
  28. "outputs": [
  29. {
  30. "data": {
  31. "text/html": [
  32. "<div>\n",
  33. "<style scoped>\n",
  34. " .dataframe tbody tr th:only-of-type {\n",
  35. " vertical-align: middle;\n",
  36. " }\n",
  37. "\n",
  38. " .dataframe tbody tr th {\n",
  39. " vertical-align: top;\n",
  40. " }\n",
  41. "\n",
  42. " .dataframe thead th {\n",
  43. " text-align: right;\n",
  44. " }\n",
  45. "</style>\n",
  46. "<table border=\"1\" class=\"dataframe\">\n",
  47. " <thead>\n",
  48. " <tr style=\"text-align: right;\">\n",
  49. " <th></th>\n",
  50. " <th>Unnamed: 0</th>\n",
  51. " <th>No.</th>\n",
  52. " <th>Time</th>\n",
  53. " <th>Source</th>\n",
  54. " <th>Destination</th>\n",
  55. " <th>Protocol</th>\n",
  56. " <th>Length</th>\n",
  57. " <th>Info</th>\n",
  58. " </tr>\n",
  59. " </thead>\n",
  60. " <tbody>\n",
  61. " <tr>\n",
  62. " <th>0</th>\n",
  63. " <td>0</td>\n",
  64. " <td>1</td>\n",
  65. " <td>0.000000</td>\n",
  66. " <td>1</td>\n",
  67. " <td>8</td>\n",
  68. " <td>1</td>\n",
  69. " <td>0.733333</td>\n",
  70. " <td>11</td>\n",
  71. " </tr>\n",
  72. " <tr>\n",
  73. " <th>1</th>\n",
  74. " <td>1</td>\n",
  75. " <td>2</td>\n",
  76. " <td>0.000322</td>\n",
  77. " <td>2</td>\n",
  78. " <td>8</td>\n",
  79. " <td>1</td>\n",
  80. " <td>0.733333</td>\n",
  81. " <td>11</td>\n",
  82. " </tr>\n",
  83. " <tr>\n",
  84. " <th>2</th>\n",
  85. " <td>2</td>\n",
  86. " <td>3</td>\n",
  87. " <td>0.007518</td>\n",
  88. " <td>1</td>\n",
  89. " <td>8</td>\n",
  90. " <td>1</td>\n",
  91. " <td>0.700000</td>\n",
  92. " <td>8</td>\n",
  93. " </tr>\n",
  94. " <tr>\n",
  95. " <th>3</th>\n",
  96. " <td>3</td>\n",
  97. " <td>4</td>\n",
  98. " <td>0.011652</td>\n",
  99. " <td>3</td>\n",
  100. " <td>11</td>\n",
  101. " <td>2</td>\n",
  102. " <td>0.166667</td>\n",
  103. " <td>1</td>\n",
  104. " </tr>\n",
  105. " <tr>\n",
  106. " <th>4</th>\n",
  107. " <td>4</td>\n",
  108. " <td>5</td>\n",
  109. " <td>0.000189</td>\n",
  110. " <td>6</td>\n",
  111. " <td>10</td>\n",
  112. " <td>4</td>\n",
  113. " <td>0.533333</td>\n",
  114. " <td>3</td>\n",
  115. " </tr>\n",
  116. " </tbody>\n",
  117. "</table>\n",
  118. "</div>"
  119. ],
  120. "text/plain": [
  121. " Unnamed: 0 No. Time Source Destination Protocol Length Info\n",
  122. "0 0 1 0.000000 1 8 1 0.733333 11\n",
  123. "1 1 2 0.000322 2 8 1 0.733333 11\n",
  124. "2 2 3 0.007518 1 8 1 0.700000 8\n",
  125. "3 3 4 0.011652 3 11 2 0.166667 1\n",
  126. "4 4 5 0.000189 6 10 4 0.533333 3"
  127. ]
  128. },
  129. "execution_count": 90,
  130. "metadata": {},
  131. "output_type": "execute_result"
  132. }
  133. ],
  134. "source": [
  135. "df.head()"
  136. ]
  137. },
  138. {
  139. "cell_type": "code",
  140. "execution_count": 91,
  141. "metadata": {},
  142. "outputs": [
  143. {
  144. "data": {
  145. "text/html": [
  146. "<div>\n",
  147. "<style scoped>\n",
  148. " .dataframe tbody tr th:only-of-type {\n",
  149. " vertical-align: middle;\n",
  150. " }\n",
  151. "\n",
  152. " .dataframe tbody tr th {\n",
  153. " vertical-align: top;\n",
  154. " }\n",
  155. "\n",
  156. " .dataframe thead th {\n",
  157. " text-align: right;\n",
  158. " }\n",
  159. "</style>\n",
  160. "<table border=\"1\" class=\"dataframe\">\n",
  161. " <thead>\n",
  162. " <tr style=\"text-align: right;\">\n",
  163. " <th></th>\n",
  164. " <th>Unnamed: 0</th>\n",
  165. " <th>No.</th>\n",
  166. " <th>Time</th>\n",
  167. " <th>Source</th>\n",
  168. " <th>Destination</th>\n",
  169. " <th>Protocol</th>\n",
  170. " <th>Length</th>\n",
  171. " <th>Info</th>\n",
  172. " </tr>\n",
  173. " </thead>\n",
  174. " <tbody>\n",
  175. " <tr>\n",
  176. " <th>62261</th>\n",
  177. " <td>91322</td>\n",
  178. " <td>91323</td>\n",
  179. " <td>0.052006</td>\n",
  180. " <td>2</td>\n",
  181. " <td>8</td>\n",
  182. " <td>1</td>\n",
  183. " <td>0.7</td>\n",
  184. " <td>2</td>\n",
  185. " </tr>\n",
  186. " <tr>\n",
  187. " <th>62262</th>\n",
  188. " <td>91323</td>\n",
  189. " <td>91324</td>\n",
  190. " <td>0.947014</td>\n",
  191. " <td>1</td>\n",
  192. " <td>8</td>\n",
  193. " <td>1</td>\n",
  194. " <td>0.7</td>\n",
  195. " <td>8</td>\n",
  196. " </tr>\n",
  197. " <tr>\n",
  198. " <th>62263</th>\n",
  199. " <td>91324</td>\n",
  200. " <td>91325</td>\n",
  201. " <td>0.052536</td>\n",
  202. " <td>2</td>\n",
  203. " <td>8</td>\n",
  204. " <td>1</td>\n",
  205. " <td>0.7</td>\n",
  206. " <td>2</td>\n",
  207. " </tr>\n",
  208. " <tr>\n",
  209. " <th>62264</th>\n",
  210. " <td>91325</td>\n",
  211. " <td>91326</td>\n",
  212. " <td>0.946974</td>\n",
  213. " <td>1</td>\n",
  214. " <td>8</td>\n",
  215. " <td>1</td>\n",
  216. " <td>0.7</td>\n",
  217. " <td>8</td>\n",
  218. " </tr>\n",
  219. " <tr>\n",
  220. " <th>62265</th>\n",
  221. " <td>91326</td>\n",
  222. " <td>91327</td>\n",
  223. " <td>0.052726</td>\n",
  224. " <td>2</td>\n",
  225. " <td>8</td>\n",
  226. " <td>1</td>\n",
  227. " <td>0.7</td>\n",
  228. " <td>2</td>\n",
  229. " </tr>\n",
  230. " </tbody>\n",
  231. "</table>\n",
  232. "</div>"
  233. ],
  234. "text/plain": [
  235. " Unnamed: 0 No. Time Source Destination Protocol Length \\\n",
  236. "62261 91322 91323 0.052006 2 8 1 0.7 \n",
  237. "62262 91323 91324 0.947014 1 8 1 0.7 \n",
  238. "62263 91324 91325 0.052536 2 8 1 0.7 \n",
  239. "62264 91325 91326 0.946974 1 8 1 0.7 \n",
  240. "62265 91326 91327 0.052726 2 8 1 0.7 \n",
  241. "\n",
  242. " Info \n",
  243. "62261 2 \n",
  244. "62262 8 \n",
  245. "62263 2 \n",
  246. "62264 8 \n",
  247. "62265 2 "
  248. ]
  249. },
  250. "execution_count": 91,
  251. "metadata": {},
  252. "output_type": "execute_result"
  253. }
  254. ],
  255. "source": [
  256. "df.tail()"
  257. ]
  258. },
  259. {
  260. "cell_type": "code",
  261. "execution_count": 92,
  262. "metadata": {},
  263. "outputs": [],
  264. "source": [
  265. "df.drop('No.',axis=1,inplace=True)\n"
  266. ]
  267. },
  268. {
  269. "cell_type": "code",
  270. "execution_count": 93,
  271. "metadata": {},
  272. "outputs": [
  273. {
  274. "data": {
  275. "text/html": [
  276. "<div>\n",
  277. "<style scoped>\n",
  278. " .dataframe tbody tr th:only-of-type {\n",
  279. " vertical-align: middle;\n",
  280. " }\n",
  281. "\n",
  282. " .dataframe tbody tr th {\n",
  283. " vertical-align: top;\n",
  284. " }\n",
  285. "\n",
  286. " .dataframe thead th {\n",
  287. " text-align: right;\n",
  288. " }\n",
  289. "</style>\n",
  290. "<table border=\"1\" class=\"dataframe\">\n",
  291. " <thead>\n",
  292. " <tr style=\"text-align: right;\">\n",
  293. " <th></th>\n",
  294. " <th>Unnamed: 0</th>\n",
  295. " <th>Time</th>\n",
  296. " <th>Source</th>\n",
  297. " <th>Destination</th>\n",
  298. " <th>Protocol</th>\n",
  299. " <th>Length</th>\n",
  300. " <th>Info</th>\n",
  301. " </tr>\n",
  302. " </thead>\n",
  303. " <tbody>\n",
  304. " <tr>\n",
  305. " <th>0</th>\n",
  306. " <td>0</td>\n",
  307. " <td>0.000000</td>\n",
  308. " <td>1</td>\n",
  309. " <td>8</td>\n",
  310. " <td>1</td>\n",
  311. " <td>0.733333</td>\n",
  312. " <td>11</td>\n",
  313. " </tr>\n",
  314. " <tr>\n",
  315. " <th>1</th>\n",
  316. " <td>1</td>\n",
  317. " <td>0.000322</td>\n",
  318. " <td>2</td>\n",
  319. " <td>8</td>\n",
  320. " <td>1</td>\n",
  321. " <td>0.733333</td>\n",
  322. " <td>11</td>\n",
  323. " </tr>\n",
  324. " <tr>\n",
  325. " <th>2</th>\n",
  326. " <td>2</td>\n",
  327. " <td>0.007518</td>\n",
  328. " <td>1</td>\n",
  329. " <td>8</td>\n",
  330. " <td>1</td>\n",
  331. " <td>0.700000</td>\n",
  332. " <td>8</td>\n",
  333. " </tr>\n",
  334. " <tr>\n",
  335. " <th>3</th>\n",
  336. " <td>3</td>\n",
  337. " <td>0.011652</td>\n",
  338. " <td>3</td>\n",
  339. " <td>11</td>\n",
  340. " <td>2</td>\n",
  341. " <td>0.166667</td>\n",
  342. " <td>1</td>\n",
  343. " </tr>\n",
  344. " <tr>\n",
  345. " <th>4</th>\n",
  346. " <td>4</td>\n",
  347. " <td>0.000189</td>\n",
  348. " <td>6</td>\n",
  349. " <td>10</td>\n",
  350. " <td>4</td>\n",
  351. " <td>0.533333</td>\n",
  352. " <td>3</td>\n",
  353. " </tr>\n",
  354. " </tbody>\n",
  355. "</table>\n",
  356. "</div>"
  357. ],
  358. "text/plain": [
  359. " Unnamed: 0 Time Source Destination Protocol Length Info\n",
  360. "0 0 0.000000 1 8 1 0.733333 11\n",
  361. "1 1 0.000322 2 8 1 0.733333 11\n",
  362. "2 2 0.007518 1 8 1 0.700000 8\n",
  363. "3 3 0.011652 3 11 2 0.166667 1\n",
  364. "4 4 0.000189 6 10 4 0.533333 3"
  365. ]
  366. },
  367. "execution_count": 93,
  368. "metadata": {},
  369. "output_type": "execute_result"
  370. }
  371. ],
  372. "source": [
  373. "df.head()"
  374. ]
  375. },
  376. {
  377. "cell_type": "code",
  378. "execution_count": 94,
  379. "metadata": {},
  380. "outputs": [],
  381. "source": [
  382. "df.drop('Unnamed: 0',axis=1,inplace=True)\n"
  383. ]
  384. },
  385. {
  386. "cell_type": "code",
  387. "execution_count": 95,
  388. "metadata": {},
  389. "outputs": [
  390. {
  391. "data": {
  392. "text/html": [
  393. "<div>\n",
  394. "<style scoped>\n",
  395. " .dataframe tbody tr th:only-of-type {\n",
  396. " vertical-align: middle;\n",
  397. " }\n",
  398. "\n",
  399. " .dataframe tbody tr th {\n",
  400. " vertical-align: top;\n",
  401. " }\n",
  402. "\n",
  403. " .dataframe thead th {\n",
  404. " text-align: right;\n",
  405. " }\n",
  406. "</style>\n",
  407. "<table border=\"1\" class=\"dataframe\">\n",
  408. " <thead>\n",
  409. " <tr style=\"text-align: right;\">\n",
  410. " <th></th>\n",
  411. " <th>Time</th>\n",
  412. " <th>Source</th>\n",
  413. " <th>Destination</th>\n",
  414. " <th>Protocol</th>\n",
  415. " <th>Length</th>\n",
  416. " <th>Info</th>\n",
  417. " </tr>\n",
  418. " </thead>\n",
  419. " <tbody>\n",
  420. " <tr>\n",
  421. " <th>0</th>\n",
  422. " <td>0.000000</td>\n",
  423. " <td>1</td>\n",
  424. " <td>8</td>\n",
  425. " <td>1</td>\n",
  426. " <td>0.733333</td>\n",
  427. " <td>11</td>\n",
  428. " </tr>\n",
  429. " <tr>\n",
  430. " <th>1</th>\n",
  431. " <td>0.000322</td>\n",
  432. " <td>2</td>\n",
  433. " <td>8</td>\n",
  434. " <td>1</td>\n",
  435. " <td>0.733333</td>\n",
  436. " <td>11</td>\n",
  437. " </tr>\n",
  438. " <tr>\n",
  439. " <th>2</th>\n",
  440. " <td>0.007518</td>\n",
  441. " <td>1</td>\n",
  442. " <td>8</td>\n",
  443. " <td>1</td>\n",
  444. " <td>0.700000</td>\n",
  445. " <td>8</td>\n",
  446. " </tr>\n",
  447. " <tr>\n",
  448. " <th>3</th>\n",
  449. " <td>0.011652</td>\n",
  450. " <td>3</td>\n",
  451. " <td>11</td>\n",
  452. " <td>2</td>\n",
  453. " <td>0.166667</td>\n",
  454. " <td>1</td>\n",
  455. " </tr>\n",
  456. " <tr>\n",
  457. " <th>4</th>\n",
  458. " <td>0.000189</td>\n",
  459. " <td>6</td>\n",
  460. " <td>10</td>\n",
  461. " <td>4</td>\n",
  462. " <td>0.533333</td>\n",
  463. " <td>3</td>\n",
  464. " </tr>\n",
  465. " </tbody>\n",
  466. "</table>\n",
  467. "</div>"
  468. ],
  469. "text/plain": [
  470. " Time Source Destination Protocol Length Info\n",
  471. "0 0.000000 1 8 1 0.733333 11\n",
  472. "1 0.000322 2 8 1 0.733333 11\n",
  473. "2 0.007518 1 8 1 0.700000 8\n",
  474. "3 0.011652 3 11 2 0.166667 1\n",
  475. "4 0.000189 6 10 4 0.533333 3"
  476. ]
  477. },
  478. "execution_count": 95,
  479. "metadata": {},
  480. "output_type": "execute_result"
  481. }
  482. ],
  483. "source": [
  484. "df.head()"
  485. ]
  486. },
  487. {
  488. "cell_type": "code",
  489. "execution_count": 96,
  490. "metadata": {},
  491. "outputs": [
  492. {
  493. "name": "stdout",
  494. "output_type": "stream",
  495. "text": [
  496. "<class 'pandas.core.frame.DataFrame'>\n",
  497. "RangeIndex: 62266 entries, 0 to 62265\n",
  498. "Data columns (total 6 columns):\n",
  499. "Time 62266 non-null float64\n",
  500. "Source 62266 non-null int64\n",
  501. "Destination 62266 non-null int64\n",
  502. "Protocol 62266 non-null int64\n",
  503. "Length 62266 non-null float64\n",
  504. "Info 62266 non-null int64\n",
  505. "dtypes: float64(2), int64(4)\n",
  506. "memory usage: 2.9 MB\n"
  507. ]
  508. }
  509. ],
  510. "source": [
  511. "df.info()"
  512. ]
  513. },
  514. {
  515. "cell_type": "code",
  516. "execution_count": 97,
  517. "metadata": {},
  518. "outputs": [],
  519. "source": [
  520. "from kmodes.kmodes import KModes"
  521. ]
  522. },
  523. {
  524. "cell_type": "code",
  525. "execution_count": 98,
  526. "metadata": {},
  527. "outputs": [],
  528. "source": [
  529. "km = KModes(n_clusters=2, init='Huang', n_init=5, verbose=1)\n"
  530. ]
  531. },
  532. {
  533. "cell_type": "code",
  534. "execution_count": 99,
  535. "metadata": {},
  536. "outputs": [
  537. {
  538. "name": "stderr",
  539. "output_type": "stream",
  540. "text": [
  541. "/home/terminatorash2199/anaconda3/lib/python3.6/site-packages/kmodes/util/__init__.py:70: FutureWarning: arrays to stack must be passed as a \"sequence\" type such as list or tuple. Support for non-sequence iterables such as generators is deprecated as of NumPy 1.16 and will raise an error in the future.\n",
  542. " return np.vstack({tuple(row) for row in a})\n"
  543. ]
  544. },
  545. {
  546. "name": "stdout",
  547. "output_type": "stream",
  548. "text": [
  549. "Init: initializing centroids\n",
  550. "Init: initializing clusters\n",
  551. "Starting iterations...\n",
  552. "Run 1, iteration: 1/100, moves: 16541, cost: 202102.0\n",
  553. "Run 1, iteration: 2/100, moves: 0, cost: 202102.0\n",
  554. "Init: initializing centroids\n",
  555. "Init: initializing clusters\n",
  556. "Starting iterations...\n",
  557. "Run 2, iteration: 1/100, moves: 0, cost: 180015.0\n",
  558. "Init: initializing centroids\n",
  559. "Init: initializing clusters\n",
  560. "Starting iterations...\n",
  561. "Run 3, iteration: 1/100, moves: 15474, cost: 205554.0\n",
  562. "Run 3, iteration: 2/100, moves: 8367, cost: 202477.0\n",
  563. "Run 3, iteration: 3/100, moves: 80, cost: 202477.0\n",
  564. "Init: initializing centroids\n",
  565. "Init: initializing clusters\n",
  566. "Starting iterations...\n",
  567. "Run 4, iteration: 1/100, moves: 0, cost: 180015.0\n",
  568. "Init: initializing centroids\n",
  569. "Init: initializing clusters\n",
  570. "Starting iterations...\n",
  571. "Run 5, iteration: 1/100, moves: 0, cost: 202102.0\n",
  572. "Best run was number 2\n"
  573. ]
  574. }
  575. ],
  576. "source": [
  577. "clusters = km.fit_predict(df)\n"
  578. ]
  579. },
  580. {
  581. "cell_type": "code",
  582. "execution_count": 100,
  583. "metadata": {},
  584. "outputs": [
  585. {
  586. "name": "stdout",
  587. "output_type": "stream",
  588. "text": [
  589. "[[2.60000000e-04 3.00000000e+00 5.00000000e+00 2.00000000e+00\n",
  590. " 1.66666667e-01 1.00000000e+00]\n",
  591. " [3.22000000e-04 2.00000000e+00 8.00000000e+00 1.00000000e+00\n",
  592. " 7.00000000e-01 8.00000000e+00]]\n"
  593. ]
  594. }
  595. ],
  596. "source": [
  597. "print(km.cluster_centroids_)"
  598. ]
  599. },
  600. {
  601. "cell_type": "code",
  602. "execution_count": 101,
  603. "metadata": {},
  604. "outputs": [
  605. {
  606. "data": {
  607. "text/plain": [
  608. "array([1, 1, 1, ..., 1, 1, 1], dtype=uint8)"
  609. ]
  610. },
  611. "execution_count": 101,
  612. "metadata": {},
  613. "output_type": "execute_result"
  614. }
  615. ],
  616. "source": [
  617. "clusters"
  618. ]
  619. },
  620. {
  621. "cell_type": "code",
  622. "execution_count": 102,
  623. "metadata": {},
  624. "outputs": [
  625. {
  626. "data": {
  627. "text/plain": [
  628. "numpy.ndarray"
  629. ]
  630. },
  631. "execution_count": 102,
  632. "metadata": {},
  633. "output_type": "execute_result"
  634. }
  635. ],
  636. "source": [
  637. "type(clusters)"
  638. ]
  639. },
  640. {
  641. "cell_type": "code",
  642. "execution_count": 103,
  643. "metadata": {},
  644. "outputs": [],
  645. "source": [
  646. "df2 = pd.DataFrame(clusters)"
  647. ]
  648. },
  649. {
  650. "cell_type": "code",
  651. "execution_count": 104,
  652. "metadata": {},
  653. "outputs": [
  654. {
  655. "data": {
  656. "text/plain": [
  657. "pandas.core.frame.DataFrame"
  658. ]
  659. },
  660. "execution_count": 104,
  661. "metadata": {},
  662. "output_type": "execute_result"
  663. }
  664. ],
  665. "source": [
  666. "type(df2)"
  667. ]
  668. },
  669. {
  670. "cell_type": "code",
  671. "execution_count": 105,
  672. "metadata": {},
  673. "outputs": [],
  674. "source": [
  675. "df5 = df.join(pd.DataFrame(clusters))"
  676. ]
  677. },
  678. {
  679. "cell_type": "code",
  680. "execution_count": 106,
  681. "metadata": {},
  682. "outputs": [
  683. {
  684. "data": {
  685. "text/html": [
  686. "<div>\n",
  687. "<style scoped>\n",
  688. " .dataframe tbody tr th:only-of-type {\n",
  689. " vertical-align: middle;\n",
  690. " }\n",
  691. "\n",
  692. " .dataframe tbody tr th {\n",
  693. " vertical-align: top;\n",
  694. " }\n",
  695. "\n",
  696. " .dataframe thead th {\n",
  697. " text-align: right;\n",
  698. " }\n",
  699. "</style>\n",
  700. "<table border=\"1\" class=\"dataframe\">\n",
  701. " <thead>\n",
  702. " <tr style=\"text-align: right;\">\n",
  703. " <th></th>\n",
  704. " <th>Time</th>\n",
  705. " <th>Source</th>\n",
  706. " <th>Destination</th>\n",
  707. " <th>Protocol</th>\n",
  708. " <th>Length</th>\n",
  709. " <th>Info</th>\n",
  710. " <th>0</th>\n",
  711. " </tr>\n",
  712. " </thead>\n",
  713. " <tbody>\n",
  714. " <tr>\n",
  715. " <th>0</th>\n",
  716. " <td>0.000000</td>\n",
  717. " <td>1</td>\n",
  718. " <td>8</td>\n",
  719. " <td>1</td>\n",
  720. " <td>0.733333</td>\n",
  721. " <td>11</td>\n",
  722. " <td>1</td>\n",
  723. " </tr>\n",
  724. " <tr>\n",
  725. " <th>1</th>\n",
  726. " <td>0.000322</td>\n",
  727. " <td>2</td>\n",
  728. " <td>8</td>\n",
  729. " <td>1</td>\n",
  730. " <td>0.733333</td>\n",
  731. " <td>11</td>\n",
  732. " <td>1</td>\n",
  733. " </tr>\n",
  734. " <tr>\n",
  735. " <th>2</th>\n",
  736. " <td>0.007518</td>\n",
  737. " <td>1</td>\n",
  738. " <td>8</td>\n",
  739. " <td>1</td>\n",
  740. " <td>0.700000</td>\n",
  741. " <td>8</td>\n",
  742. " <td>1</td>\n",
  743. " </tr>\n",
  744. " <tr>\n",
  745. " <th>3</th>\n",
  746. " <td>0.011652</td>\n",
  747. " <td>3</td>\n",
  748. " <td>11</td>\n",
  749. " <td>2</td>\n",
  750. " <td>0.166667</td>\n",
  751. " <td>1</td>\n",
  752. " <td>0</td>\n",
  753. " </tr>\n",
  754. " <tr>\n",
  755. " <th>4</th>\n",
  756. " <td>0.000189</td>\n",
  757. " <td>6</td>\n",
  758. " <td>10</td>\n",
  759. " <td>4</td>\n",
  760. " <td>0.533333</td>\n",
  761. " <td>3</td>\n",
  762. " <td>0</td>\n",
  763. " </tr>\n",
  764. " </tbody>\n",
  765. "</table>\n",
  766. "</div>"
  767. ],
  768. "text/plain": [
  769. " Time Source Destination Protocol Length Info 0\n",
  770. "0 0.000000 1 8 1 0.733333 11 1\n",
  771. "1 0.000322 2 8 1 0.733333 11 1\n",
  772. "2 0.007518 1 8 1 0.700000 8 1\n",
  773. "3 0.011652 3 11 2 0.166667 1 0\n",
  774. "4 0.000189 6 10 4 0.533333 3 0"
  775. ]
  776. },
  777. "execution_count": 106,
  778. "metadata": {},
  779. "output_type": "execute_result"
  780. }
  781. ],
  782. "source": [
  783. "df5.head()"
  784. ]
  785. },
  786. {
  787. "cell_type": "code",
  788. "execution_count": 107,
  789. "metadata": {},
  790. "outputs": [
  791. {
  792. "name": "stdout",
  793. "output_type": "stream",
  794. "text": [
  795. "<class 'pandas.core.frame.DataFrame'>\n",
  796. "RangeIndex: 62266 entries, 0 to 62265\n",
  797. "Data columns (total 7 columns):\n",
  798. "Time 62266 non-null float64\n",
  799. "Source 62266 non-null int64\n",
  800. "Destination 62266 non-null int64\n",
  801. "Protocol 62266 non-null int64\n",
  802. "Length 62266 non-null float64\n",
  803. "Info 62266 non-null int64\n",
  804. "0 62266 non-null uint8\n",
  805. "dtypes: float64(2), int64(4), uint8(1)\n",
  806. "memory usage: 2.9 MB\n"
  807. ]
  808. }
  809. ],
  810. "source": [
  811. "df5.info()"
  812. ]
  813. },
  814. {
  815. "cell_type": "code",
  816. "execution_count": 108,
  817. "metadata": {},
  818. "outputs": [
  819. {
  820. "data": {
  821. "text/html": [
  822. "<div>\n",
  823. "<style scoped>\n",
  824. " .dataframe tbody tr th:only-of-type {\n",
  825. " vertical-align: middle;\n",
  826. " }\n",
  827. "\n",
  828. " .dataframe tbody tr th {\n",
  829. " vertical-align: top;\n",
  830. " }\n",
  831. "\n",
  832. " .dataframe thead th {\n",
  833. " text-align: right;\n",
  834. " }\n",
  835. "</style>\n",
  836. "<table border=\"1\" class=\"dataframe\">\n",
  837. " <thead>\n",
  838. " <tr style=\"text-align: right;\">\n",
  839. " <th></th>\n",
  840. " <th>Time</th>\n",
  841. " <th>Source</th>\n",
  842. " <th>Destination</th>\n",
  843. " <th>Protocol</th>\n",
  844. " <th>Length</th>\n",
  845. " <th>Info</th>\n",
  846. " <th>0</th>\n",
  847. " </tr>\n",
  848. " </thead>\n",
  849. " <tbody>\n",
  850. " <tr>\n",
  851. " <th>62261</th>\n",
  852. " <td>0.052006</td>\n",
  853. " <td>2</td>\n",
  854. " <td>8</td>\n",
  855. " <td>1</td>\n",
  856. " <td>0.7</td>\n",
  857. " <td>2</td>\n",
  858. " <td>1</td>\n",
  859. " </tr>\n",
  860. " <tr>\n",
  861. " <th>62262</th>\n",
  862. " <td>0.947014</td>\n",
  863. " <td>1</td>\n",
  864. " <td>8</td>\n",
  865. " <td>1</td>\n",
  866. " <td>0.7</td>\n",
  867. " <td>8</td>\n",
  868. " <td>1</td>\n",
  869. " </tr>\n",
  870. " <tr>\n",
  871. " <th>62263</th>\n",
  872. " <td>0.052536</td>\n",
  873. " <td>2</td>\n",
  874. " <td>8</td>\n",
  875. " <td>1</td>\n",
  876. " <td>0.7</td>\n",
  877. " <td>2</td>\n",
  878. " <td>1</td>\n",
  879. " </tr>\n",
  880. " <tr>\n",
  881. " <th>62264</th>\n",
  882. " <td>0.946974</td>\n",
  883. " <td>1</td>\n",
  884. " <td>8</td>\n",
  885. " <td>1</td>\n",
  886. " <td>0.7</td>\n",
  887. " <td>8</td>\n",
  888. " <td>1</td>\n",
  889. " </tr>\n",
  890. " <tr>\n",
  891. " <th>62265</th>\n",
  892. " <td>0.052726</td>\n",
  893. " <td>2</td>\n",
  894. " <td>8</td>\n",
  895. " <td>1</td>\n",
  896. " <td>0.7</td>\n",
  897. " <td>2</td>\n",
  898. " <td>1</td>\n",
  899. " </tr>\n",
  900. " </tbody>\n",
  901. "</table>\n",
  902. "</div>"
  903. ],
  904. "text/plain": [
  905. " Time Source Destination Protocol Length Info 0\n",
  906. "62261 0.052006 2 8 1 0.7 2 1\n",
  907. "62262 0.947014 1 8 1 0.7 8 1\n",
  908. "62263 0.052536 2 8 1 0.7 2 1\n",
  909. "62264 0.946974 1 8 1 0.7 8 1\n",
  910. "62265 0.052726 2 8 1 0.7 2 1"
  911. ]
  912. },
  913. "execution_count": 108,
  914. "metadata": {},
  915. "output_type": "execute_result"
  916. }
  917. ],
  918. "source": [
  919. "df5.tail()"
  920. ]
  921. },
  922. {
  923. "cell_type": "code",
  924. "execution_count": 109,
  925. "metadata": {},
  926. "outputs": [],
  927. "source": [
  928. "df5.to_csv('kmode.csv')"
  929. ]
  930. },
  931. {
  932. "cell_type": "code",
  933. "execution_count": 110,
  934. "metadata": {},
  935. "outputs": [],
  936. "source": [
  937. "from keras import layers, optimizers, regularizers\n",
  938. "from keras.layers import Dense, Dropout, BatchNormalization, Activation\n",
  939. "from keras.models import Sequential\n",
  940. "\n",
  941. "from keras.utils import plot_model\n",
  942. "#from kt_utils import *\n",
  943. "import keras.backend as K\n",
  944. "\n",
  945. "import seaborn as sns\n",
  946. "\n",
  947. "from sklearn import preprocessing, model_selection \n",
  948. "\n",
  949. "import matplotlib.pyplot as plt\n",
  950. "from matplotlib.pyplot import imshow"
  951. ]
  952. },
  953. {
  954. "cell_type": "code",
  955. "execution_count": 111,
  956. "metadata": {},
  957. "outputs": [
  958. {
  959. "name": "stdout",
  960. "output_type": "stream",
  961. "text": [
  962. "(49812, 6) (49812, 0) (12454, 6) (12454, 0)\n"
  963. ]
  964. }
  965. ],
  966. "source": [
  967. "X = df5.iloc[:,0:6].values # first columns\n",
  968. "Y = df5.iloc[:,7:].values # last columns\n",
  969. "\n",
  970. "X = preprocessing.normalize(X, axis = 0)\n",
  971. "\n",
  972. "X_train,X_test,Y_train,Y_test = model_selection.train_test_split(X,Y,test_size=0.2)\n",
  973. "\n",
  974. "print(X_train.shape,Y_train.shape,X_test.shape,Y_test.shape)\n"
  975. ]
  976. },
  977. {
  978. "cell_type": "code",
  979. "execution_count": 112,
  980. "metadata": {},
  981. "outputs": [],
  982. "source": [
  983. "df5[0] = df5[0].astype(int)"
  984. ]
  985. },
  986. {
  987. "cell_type": "code",
  988. "execution_count": 113,
  989. "metadata": {},
  990. "outputs": [],
  991. "source": [
  992. "# df5[0] =df5[0].astype(int)\n",
  993. "# df5 = pd.get_dummies(df5, columns=[0])\n",
  994. "# df5.head(5)"
  995. ]
  996. },
  997. {
  998. "cell_type": "code",
  999. "execution_count": 114,
  1000. "metadata": {},
  1001. "outputs": [
  1002. {
  1003. "name": "stdout",
  1004. "output_type": "stream",
  1005. "text": [
  1006. "(49812, 6) (49812, 0) (12454, 6) (12454, 0)\n"
  1007. ]
  1008. }
  1009. ],
  1010. "source": [
  1011. "X = df5.iloc[:,0:6].values # first columns\n",
  1012. "Y = df5.iloc[:,7:].values # last columns\n",
  1013. "\n",
  1014. "X = preprocessing.normalize(X, axis = 0)\n",
  1015. "\n",
  1016. "X_train,X_test,Y_train,Y_test = model_selection.train_test_split(X,Y,test_size=0.2)\n",
  1017. "\n",
  1018. "print(X_train.shape,Y_train.shape,X_test.shape,Y_test.shape)\n"
  1019. ]
  1020. },
  1021. {
  1022. "cell_type": "code",
  1023. "execution_count": 115,
  1024. "metadata": {},
  1025. "outputs": [],
  1026. "source": [
  1027. "winemod1 = Sequential()\n",
  1028. "# layer 1\n",
  1029. "winemod1.add(Dense(30, input_dim=6, activation='relu', name='fc0',kernel_regularizer=regularizers.l2(0.01)))\n",
  1030. "winemod1.add(BatchNormalization(momentum=0.99, epsilon=0.001))\n",
  1031. "winemod1.add(Activation('relu'))\n",
  1032. "winemod1.add(Dense(2, name='fc3',bias_initializer='zeros'))\n",
  1033. "winemod1.add(Activation('softmax'))\n",
  1034. "\n",
  1035. "\n",
  1036. "# #layer 2\n",
  1037. "# winemod1.add(Dense(50, name='fc1',bias_initializer='zeros'))\n",
  1038. "# winemod1.add(BatchNormalization(momentum=0.99, epsilon=0.001))\n",
  1039. "# winemod1.add(Activation('tanh'))\n",
  1040. "# winemod1.add(Dropout(0.5))\n",
  1041. "# #layer 3\n",
  1042. "# winemod1.add(Dense(100, name='fc2',bias_initializer='zeros'))\n",
  1043. "# winemod1.add(BatchNormalization(momentum=0.99, epsilon=0.001))\n",
  1044. "# winemod1.add(Activation('relu'))\n",
  1045. "# winemod1.add(Dropout(0.5))\n",
  1046. "# #layer 4\n",
  1047. "# winemod1.add(Dense(2, name='fc3',bias_initializer='zeros'))\n",
  1048. "# winemod1.add(BatchNormalization(momentum=0.99, epsilon=0.001))\n",
  1049. "# winemod1.add(Activation('softmax'))"
  1050. ]
  1051. },
  1052. {
  1053. "cell_type": "code",
  1054. "execution_count": 116,
  1055. "metadata": {},
  1056. "outputs": [
  1057. {
  1058. "name": "stdout",
  1059. "output_type": "stream",
  1060. "text": [
  1061. "_________________________________________________________________\n",
  1062. "Layer (type) Output Shape Param # \n",
  1063. "=================================================================\n",
  1064. "fc0 (Dense) (None, 30) 210 \n",
  1065. "_________________________________________________________________\n",
  1066. "batch_normalization_22 (Batc (None, 30) 120 \n",
  1067. "_________________________________________________________________\n",
  1068. "activation_18 (Activation) (None, 30) 0 \n",
  1069. "_________________________________________________________________\n",
  1070. "fc3 (Dense) (None, 2) 62 \n",
  1071. "_________________________________________________________________\n",
  1072. "activation_19 (Activation) (None, 2) 0 \n",
  1073. "=================================================================\n",
  1074. "Total params: 392\n",
  1075. "Trainable params: 332\n",
  1076. "Non-trainable params: 60\n",
  1077. "_________________________________________________________________\n"
  1078. ]
  1079. }
  1080. ],
  1081. "source": [
  1082. "winemod1.summary()\n"
  1083. ]
  1084. },
  1085. {
  1086. "cell_type": "code",
  1087. "execution_count": 117,
  1088. "metadata": {},
  1089. "outputs": [],
  1090. "source": [
  1091. "Adam = optimizers.Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0)\n",
  1092. "winemod1.compile(optimizer = Adam, loss = \"sparse_categorical_crossentropy\", metrics = [\"categorical_accuracy\"])"
  1093. ]
  1094. },
  1095. {
  1096. "cell_type": "code",
  1097. "execution_count": 118,
  1098. "metadata": {},
  1099. "outputs": [
  1100. {
  1101. "ename": "ValueError",
  1102. "evalue": "Error when checking target: expected activation_19 to have shape (1,) but got array with shape (0,)",
  1103. "output_type": "error",
  1104. "traceback": [
  1105. "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
  1106. "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)",
  1107. "\u001b[0;32m<ipython-input-118-0c2ba3d9d5cd>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mwinemod1\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mX_train\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mY_train\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mepochs\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;36m200\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mverbose\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mbatch_size\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;36m64\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mvalidation_data\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX_test\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mY_test\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
  1108. "\u001b[0;32m~/anaconda3/lib/python3.6/site-packages/keras/engine/training.py\u001b[0m in \u001b[0;36mfit\u001b[0;34m(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, **kwargs)\u001b[0m\n\u001b[1;32m 950\u001b[0m \u001b[0msample_weight\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0msample_weight\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 951\u001b[0m \u001b[0mclass_weight\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mclass_weight\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 952\u001b[0;31m batch_size=batch_size)\n\u001b[0m\u001b[1;32m 953\u001b[0m \u001b[0;31m# Prepare validation data.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 954\u001b[0m \u001b[0mdo_validation\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mFalse\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
  1109. "\u001b[0;32m~/anaconda3/lib/python3.6/site-packages/keras/engine/training.py\u001b[0m in \u001b[0;36m_standardize_user_data\u001b[0;34m(self, x, y, sample_weight, class_weight, check_array_lengths, batch_size)\u001b[0m\n\u001b[1;32m 787\u001b[0m \u001b[0mfeed_output_shapes\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 788\u001b[0m \u001b[0mcheck_batch_axis\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mFalse\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;31m# Don't enforce the batch size.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 789\u001b[0;31m exception_prefix='target')\n\u001b[0m\u001b[1;32m 790\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 791\u001b[0m \u001b[0;31m# Generate sample-wise weight values given the `sample_weight` and\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
  1110. "\u001b[0;32m~/anaconda3/lib/python3.6/site-packages/keras/engine/training_utils.py\u001b[0m in \u001b[0;36mstandardize_input_data\u001b[0;34m(data, names, shapes, check_batch_axis, exception_prefix)\u001b[0m\n\u001b[1;32m 136\u001b[0m \u001b[0;34m': expected '\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0mnames\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0;34m' to have shape '\u001b[0m \u001b[0;34m+\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 137\u001b[0m \u001b[0mstr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mshape\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0;34m' but got array with shape '\u001b[0m \u001b[0;34m+\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 138\u001b[0;31m str(data_shape))\n\u001b[0m\u001b[1;32m 139\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mdata\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 140\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
  1111. "\u001b[0;31mValueError\u001b[0m: Error when checking target: expected activation_19 to have shape (1,) but got array with shape (0,)"
  1112. ]
  1113. }
  1114. ],
  1115. "source": [
  1116. "winemod1.fit(x = X_train, y = Y_train, epochs = 200,verbose=1, batch_size = 64,validation_data=(X_test, Y_test))"
  1117. ]
  1118. },
  1119. {
  1120. "cell_type": "code",
  1121. "execution_count": null,
  1122. "metadata": {},
  1123. "outputs": [],
  1124. "source": []
  1125. },
  1126. {
  1127. "cell_type": "code",
  1128. "execution_count": null,
  1129. "metadata": {},
  1130. "outputs": [],
  1131. "source": []
  1132. }
  1133. ],
  1134. "metadata": {
  1135. "kernelspec": {
  1136. "display_name": "Python 3",
  1137. "language": "python",
  1138. "name": "python3"
  1139. },
  1140. "language_info": {
  1141. "codemirror_mode": {
  1142. "name": "ipython",
  1143. "version": 3
  1144. },
  1145. "file_extension": ".py",
  1146. "mimetype": "text/x-python",
  1147. "name": "python",
  1148. "nbconvert_exporter": "python",
  1149. "pygments_lexer": "ipython3",
  1150. "version": "3.6.5"
  1151. }
  1152. },
  1153. "nbformat": 4,
  1154. "nbformat_minor": 2
  1155. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement