Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- {
- "cells": [
- {
- "cell_type": "code",
- "execution_count": 88,
- "metadata": {},
- "outputs": [],
- "source": [
- "import numpy as np\n",
- "import pandas as pd\n",
- "import matplotlib.pyplot as plt\n",
- "%matplotlib inline"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 89,
- "metadata": {},
- "outputs": [],
- "source": [
- "df = pd.read_csv('a1 (1).csv')"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 90,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "<div>\n",
- "<style scoped>\n",
- " .dataframe tbody tr th:only-of-type {\n",
- " vertical-align: middle;\n",
- " }\n",
- "\n",
- " .dataframe tbody tr th {\n",
- " vertical-align: top;\n",
- " }\n",
- "\n",
- " .dataframe thead th {\n",
- " text-align: right;\n",
- " }\n",
- "</style>\n",
- "<table border=\"1\" class=\"dataframe\">\n",
- " <thead>\n",
- " <tr style=\"text-align: right;\">\n",
- " <th></th>\n",
- " <th>Unnamed: 0</th>\n",
- " <th>No.</th>\n",
- " <th>Time</th>\n",
- " <th>Source</th>\n",
- " <th>Destination</th>\n",
- " <th>Protocol</th>\n",
- " <th>Length</th>\n",
- " <th>Info</th>\n",
- " </tr>\n",
- " </thead>\n",
- " <tbody>\n",
- " <tr>\n",
- " <th>0</th>\n",
- " <td>0</td>\n",
- " <td>1</td>\n",
- " <td>0.000000</td>\n",
- " <td>1</td>\n",
- " <td>8</td>\n",
- " <td>1</td>\n",
- " <td>0.733333</td>\n",
- " <td>11</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>1</th>\n",
- " <td>1</td>\n",
- " <td>2</td>\n",
- " <td>0.000322</td>\n",
- " <td>2</td>\n",
- " <td>8</td>\n",
- " <td>1</td>\n",
- " <td>0.733333</td>\n",
- " <td>11</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>2</th>\n",
- " <td>2</td>\n",
- " <td>3</td>\n",
- " <td>0.007518</td>\n",
- " <td>1</td>\n",
- " <td>8</td>\n",
- " <td>1</td>\n",
- " <td>0.700000</td>\n",
- " <td>8</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>3</th>\n",
- " <td>3</td>\n",
- " <td>4</td>\n",
- " <td>0.011652</td>\n",
- " <td>3</td>\n",
- " <td>11</td>\n",
- " <td>2</td>\n",
- " <td>0.166667</td>\n",
- " <td>1</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>4</th>\n",
- " <td>4</td>\n",
- " <td>5</td>\n",
- " <td>0.000189</td>\n",
- " <td>6</td>\n",
- " <td>10</td>\n",
- " <td>4</td>\n",
- " <td>0.533333</td>\n",
- " <td>3</td>\n",
- " </tr>\n",
- " </tbody>\n",
- "</table>\n",
- "</div>"
- ],
- "text/plain": [
- " Unnamed: 0 No. Time Source Destination Protocol Length Info\n",
- "0 0 1 0.000000 1 8 1 0.733333 11\n",
- "1 1 2 0.000322 2 8 1 0.733333 11\n",
- "2 2 3 0.007518 1 8 1 0.700000 8\n",
- "3 3 4 0.011652 3 11 2 0.166667 1\n",
- "4 4 5 0.000189 6 10 4 0.533333 3"
- ]
- },
- "execution_count": 90,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "df.head()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 91,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "<div>\n",
- "<style scoped>\n",
- " .dataframe tbody tr th:only-of-type {\n",
- " vertical-align: middle;\n",
- " }\n",
- "\n",
- " .dataframe tbody tr th {\n",
- " vertical-align: top;\n",
- " }\n",
- "\n",
- " .dataframe thead th {\n",
- " text-align: right;\n",
- " }\n",
- "</style>\n",
- "<table border=\"1\" class=\"dataframe\">\n",
- " <thead>\n",
- " <tr style=\"text-align: right;\">\n",
- " <th></th>\n",
- " <th>Unnamed: 0</th>\n",
- " <th>No.</th>\n",
- " <th>Time</th>\n",
- " <th>Source</th>\n",
- " <th>Destination</th>\n",
- " <th>Protocol</th>\n",
- " <th>Length</th>\n",
- " <th>Info</th>\n",
- " </tr>\n",
- " </thead>\n",
- " <tbody>\n",
- " <tr>\n",
- " <th>62261</th>\n",
- " <td>91322</td>\n",
- " <td>91323</td>\n",
- " <td>0.052006</td>\n",
- " <td>2</td>\n",
- " <td>8</td>\n",
- " <td>1</td>\n",
- " <td>0.7</td>\n",
- " <td>2</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>62262</th>\n",
- " <td>91323</td>\n",
- " <td>91324</td>\n",
- " <td>0.947014</td>\n",
- " <td>1</td>\n",
- " <td>8</td>\n",
- " <td>1</td>\n",
- " <td>0.7</td>\n",
- " <td>8</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>62263</th>\n",
- " <td>91324</td>\n",
- " <td>91325</td>\n",
- " <td>0.052536</td>\n",
- " <td>2</td>\n",
- " <td>8</td>\n",
- " <td>1</td>\n",
- " <td>0.7</td>\n",
- " <td>2</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>62264</th>\n",
- " <td>91325</td>\n",
- " <td>91326</td>\n",
- " <td>0.946974</td>\n",
- " <td>1</td>\n",
- " <td>8</td>\n",
- " <td>1</td>\n",
- " <td>0.7</td>\n",
- " <td>8</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>62265</th>\n",
- " <td>91326</td>\n",
- " <td>91327</td>\n",
- " <td>0.052726</td>\n",
- " <td>2</td>\n",
- " <td>8</td>\n",
- " <td>1</td>\n",
- " <td>0.7</td>\n",
- " <td>2</td>\n",
- " </tr>\n",
- " </tbody>\n",
- "</table>\n",
- "</div>"
- ],
- "text/plain": [
- " Unnamed: 0 No. Time Source Destination Protocol Length \\\n",
- "62261 91322 91323 0.052006 2 8 1 0.7 \n",
- "62262 91323 91324 0.947014 1 8 1 0.7 \n",
- "62263 91324 91325 0.052536 2 8 1 0.7 \n",
- "62264 91325 91326 0.946974 1 8 1 0.7 \n",
- "62265 91326 91327 0.052726 2 8 1 0.7 \n",
- "\n",
- " Info \n",
- "62261 2 \n",
- "62262 8 \n",
- "62263 2 \n",
- "62264 8 \n",
- "62265 2 "
- ]
- },
- "execution_count": 91,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "df.tail()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 92,
- "metadata": {},
- "outputs": [],
- "source": [
- "df.drop('No.',axis=1,inplace=True)\n"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 93,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "<div>\n",
- "<style scoped>\n",
- " .dataframe tbody tr th:only-of-type {\n",
- " vertical-align: middle;\n",
- " }\n",
- "\n",
- " .dataframe tbody tr th {\n",
- " vertical-align: top;\n",
- " }\n",
- "\n",
- " .dataframe thead th {\n",
- " text-align: right;\n",
- " }\n",
- "</style>\n",
- "<table border=\"1\" class=\"dataframe\">\n",
- " <thead>\n",
- " <tr style=\"text-align: right;\">\n",
- " <th></th>\n",
- " <th>Unnamed: 0</th>\n",
- " <th>Time</th>\n",
- " <th>Source</th>\n",
- " <th>Destination</th>\n",
- " <th>Protocol</th>\n",
- " <th>Length</th>\n",
- " <th>Info</th>\n",
- " </tr>\n",
- " </thead>\n",
- " <tbody>\n",
- " <tr>\n",
- " <th>0</th>\n",
- " <td>0</td>\n",
- " <td>0.000000</td>\n",
- " <td>1</td>\n",
- " <td>8</td>\n",
- " <td>1</td>\n",
- " <td>0.733333</td>\n",
- " <td>11</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>1</th>\n",
- " <td>1</td>\n",
- " <td>0.000322</td>\n",
- " <td>2</td>\n",
- " <td>8</td>\n",
- " <td>1</td>\n",
- " <td>0.733333</td>\n",
- " <td>11</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>2</th>\n",
- " <td>2</td>\n",
- " <td>0.007518</td>\n",
- " <td>1</td>\n",
- " <td>8</td>\n",
- " <td>1</td>\n",
- " <td>0.700000</td>\n",
- " <td>8</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>3</th>\n",
- " <td>3</td>\n",
- " <td>0.011652</td>\n",
- " <td>3</td>\n",
- " <td>11</td>\n",
- " <td>2</td>\n",
- " <td>0.166667</td>\n",
- " <td>1</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>4</th>\n",
- " <td>4</td>\n",
- " <td>0.000189</td>\n",
- " <td>6</td>\n",
- " <td>10</td>\n",
- " <td>4</td>\n",
- " <td>0.533333</td>\n",
- " <td>3</td>\n",
- " </tr>\n",
- " </tbody>\n",
- "</table>\n",
- "</div>"
- ],
- "text/plain": [
- " Unnamed: 0 Time Source Destination Protocol Length Info\n",
- "0 0 0.000000 1 8 1 0.733333 11\n",
- "1 1 0.000322 2 8 1 0.733333 11\n",
- "2 2 0.007518 1 8 1 0.700000 8\n",
- "3 3 0.011652 3 11 2 0.166667 1\n",
- "4 4 0.000189 6 10 4 0.533333 3"
- ]
- },
- "execution_count": 93,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "df.head()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 94,
- "metadata": {},
- "outputs": [],
- "source": [
- "df.drop('Unnamed: 0',axis=1,inplace=True)\n"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 95,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "<div>\n",
- "<style scoped>\n",
- " .dataframe tbody tr th:only-of-type {\n",
- " vertical-align: middle;\n",
- " }\n",
- "\n",
- " .dataframe tbody tr th {\n",
- " vertical-align: top;\n",
- " }\n",
- "\n",
- " .dataframe thead th {\n",
- " text-align: right;\n",
- " }\n",
- "</style>\n",
- "<table border=\"1\" class=\"dataframe\">\n",
- " <thead>\n",
- " <tr style=\"text-align: right;\">\n",
- " <th></th>\n",
- " <th>Time</th>\n",
- " <th>Source</th>\n",
- " <th>Destination</th>\n",
- " <th>Protocol</th>\n",
- " <th>Length</th>\n",
- " <th>Info</th>\n",
- " </tr>\n",
- " </thead>\n",
- " <tbody>\n",
- " <tr>\n",
- " <th>0</th>\n",
- " <td>0.000000</td>\n",
- " <td>1</td>\n",
- " <td>8</td>\n",
- " <td>1</td>\n",
- " <td>0.733333</td>\n",
- " <td>11</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>1</th>\n",
- " <td>0.000322</td>\n",
- " <td>2</td>\n",
- " <td>8</td>\n",
- " <td>1</td>\n",
- " <td>0.733333</td>\n",
- " <td>11</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>2</th>\n",
- " <td>0.007518</td>\n",
- " <td>1</td>\n",
- " <td>8</td>\n",
- " <td>1</td>\n",
- " <td>0.700000</td>\n",
- " <td>8</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>3</th>\n",
- " <td>0.011652</td>\n",
- " <td>3</td>\n",
- " <td>11</td>\n",
- " <td>2</td>\n",
- " <td>0.166667</td>\n",
- " <td>1</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>4</th>\n",
- " <td>0.000189</td>\n",
- " <td>6</td>\n",
- " <td>10</td>\n",
- " <td>4</td>\n",
- " <td>0.533333</td>\n",
- " <td>3</td>\n",
- " </tr>\n",
- " </tbody>\n",
- "</table>\n",
- "</div>"
- ],
- "text/plain": [
- " Time Source Destination Protocol Length Info\n",
- "0 0.000000 1 8 1 0.733333 11\n",
- "1 0.000322 2 8 1 0.733333 11\n",
- "2 0.007518 1 8 1 0.700000 8\n",
- "3 0.011652 3 11 2 0.166667 1\n",
- "4 0.000189 6 10 4 0.533333 3"
- ]
- },
- "execution_count": 95,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "df.head()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 96,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "<class 'pandas.core.frame.DataFrame'>\n",
- "RangeIndex: 62266 entries, 0 to 62265\n",
- "Data columns (total 6 columns):\n",
- "Time 62266 non-null float64\n",
- "Source 62266 non-null int64\n",
- "Destination 62266 non-null int64\n",
- "Protocol 62266 non-null int64\n",
- "Length 62266 non-null float64\n",
- "Info 62266 non-null int64\n",
- "dtypes: float64(2), int64(4)\n",
- "memory usage: 2.9 MB\n"
- ]
- }
- ],
- "source": [
- "df.info()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 97,
- "metadata": {},
- "outputs": [],
- "source": [
- "from kmodes.kmodes import KModes"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 98,
- "metadata": {},
- "outputs": [],
- "source": [
- "km = KModes(n_clusters=2, init='Huang', n_init=5, verbose=1)\n"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 99,
- "metadata": {},
- "outputs": [
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "/home/terminatorash2199/anaconda3/lib/python3.6/site-packages/kmodes/util/__init__.py:70: FutureWarning: arrays to stack must be passed as a \"sequence\" type such as list or tuple. Support for non-sequence iterables such as generators is deprecated as of NumPy 1.16 and will raise an error in the future.\n",
- " return np.vstack({tuple(row) for row in a})\n"
- ]
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Init: initializing centroids\n",
- "Init: initializing clusters\n",
- "Starting iterations...\n",
- "Run 1, iteration: 1/100, moves: 16541, cost: 202102.0\n",
- "Run 1, iteration: 2/100, moves: 0, cost: 202102.0\n",
- "Init: initializing centroids\n",
- "Init: initializing clusters\n",
- "Starting iterations...\n",
- "Run 2, iteration: 1/100, moves: 0, cost: 180015.0\n",
- "Init: initializing centroids\n",
- "Init: initializing clusters\n",
- "Starting iterations...\n",
- "Run 3, iteration: 1/100, moves: 15474, cost: 205554.0\n",
- "Run 3, iteration: 2/100, moves: 8367, cost: 202477.0\n",
- "Run 3, iteration: 3/100, moves: 80, cost: 202477.0\n",
- "Init: initializing centroids\n",
- "Init: initializing clusters\n",
- "Starting iterations...\n",
- "Run 4, iteration: 1/100, moves: 0, cost: 180015.0\n",
- "Init: initializing centroids\n",
- "Init: initializing clusters\n",
- "Starting iterations...\n",
- "Run 5, iteration: 1/100, moves: 0, cost: 202102.0\n",
- "Best run was number 2\n"
- ]
- }
- ],
- "source": [
- "clusters = km.fit_predict(df)\n"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 100,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "[[2.60000000e-04 3.00000000e+00 5.00000000e+00 2.00000000e+00\n",
- " 1.66666667e-01 1.00000000e+00]\n",
- " [3.22000000e-04 2.00000000e+00 8.00000000e+00 1.00000000e+00\n",
- " 7.00000000e-01 8.00000000e+00]]\n"
- ]
- }
- ],
- "source": [
- "print(km.cluster_centroids_)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 101,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "array([1, 1, 1, ..., 1, 1, 1], dtype=uint8)"
- ]
- },
- "execution_count": 101,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "clusters"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 102,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "numpy.ndarray"
- ]
- },
- "execution_count": 102,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "type(clusters)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 103,
- "metadata": {},
- "outputs": [],
- "source": [
- "df2 = pd.DataFrame(clusters)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 104,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "pandas.core.frame.DataFrame"
- ]
- },
- "execution_count": 104,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "type(df2)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 105,
- "metadata": {},
- "outputs": [],
- "source": [
- "df5 = df.join(pd.DataFrame(clusters))"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 106,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "<div>\n",
- "<style scoped>\n",
- " .dataframe tbody tr th:only-of-type {\n",
- " vertical-align: middle;\n",
- " }\n",
- "\n",
- " .dataframe tbody tr th {\n",
- " vertical-align: top;\n",
- " }\n",
- "\n",
- " .dataframe thead th {\n",
- " text-align: right;\n",
- " }\n",
- "</style>\n",
- "<table border=\"1\" class=\"dataframe\">\n",
- " <thead>\n",
- " <tr style=\"text-align: right;\">\n",
- " <th></th>\n",
- " <th>Time</th>\n",
- " <th>Source</th>\n",
- " <th>Destination</th>\n",
- " <th>Protocol</th>\n",
- " <th>Length</th>\n",
- " <th>Info</th>\n",
- " <th>0</th>\n",
- " </tr>\n",
- " </thead>\n",
- " <tbody>\n",
- " <tr>\n",
- " <th>0</th>\n",
- " <td>0.000000</td>\n",
- " <td>1</td>\n",
- " <td>8</td>\n",
- " <td>1</td>\n",
- " <td>0.733333</td>\n",
- " <td>11</td>\n",
- " <td>1</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>1</th>\n",
- " <td>0.000322</td>\n",
- " <td>2</td>\n",
- " <td>8</td>\n",
- " <td>1</td>\n",
- " <td>0.733333</td>\n",
- " <td>11</td>\n",
- " <td>1</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>2</th>\n",
- " <td>0.007518</td>\n",
- " <td>1</td>\n",
- " <td>8</td>\n",
- " <td>1</td>\n",
- " <td>0.700000</td>\n",
- " <td>8</td>\n",
- " <td>1</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>3</th>\n",
- " <td>0.011652</td>\n",
- " <td>3</td>\n",
- " <td>11</td>\n",
- " <td>2</td>\n",
- " <td>0.166667</td>\n",
- " <td>1</td>\n",
- " <td>0</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>4</th>\n",
- " <td>0.000189</td>\n",
- " <td>6</td>\n",
- " <td>10</td>\n",
- " <td>4</td>\n",
- " <td>0.533333</td>\n",
- " <td>3</td>\n",
- " <td>0</td>\n",
- " </tr>\n",
- " </tbody>\n",
- "</table>\n",
- "</div>"
- ],
- "text/plain": [
- " Time Source Destination Protocol Length Info 0\n",
- "0 0.000000 1 8 1 0.733333 11 1\n",
- "1 0.000322 2 8 1 0.733333 11 1\n",
- "2 0.007518 1 8 1 0.700000 8 1\n",
- "3 0.011652 3 11 2 0.166667 1 0\n",
- "4 0.000189 6 10 4 0.533333 3 0"
- ]
- },
- "execution_count": 106,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "df5.head()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 107,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "<class 'pandas.core.frame.DataFrame'>\n",
- "RangeIndex: 62266 entries, 0 to 62265\n",
- "Data columns (total 7 columns):\n",
- "Time 62266 non-null float64\n",
- "Source 62266 non-null int64\n",
- "Destination 62266 non-null int64\n",
- "Protocol 62266 non-null int64\n",
- "Length 62266 non-null float64\n",
- "Info 62266 non-null int64\n",
- "0 62266 non-null uint8\n",
- "dtypes: float64(2), int64(4), uint8(1)\n",
- "memory usage: 2.9 MB\n"
- ]
- }
- ],
- "source": [
- "df5.info()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 108,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "<div>\n",
- "<style scoped>\n",
- " .dataframe tbody tr th:only-of-type {\n",
- " vertical-align: middle;\n",
- " }\n",
- "\n",
- " .dataframe tbody tr th {\n",
- " vertical-align: top;\n",
- " }\n",
- "\n",
- " .dataframe thead th {\n",
- " text-align: right;\n",
- " }\n",
- "</style>\n",
- "<table border=\"1\" class=\"dataframe\">\n",
- " <thead>\n",
- " <tr style=\"text-align: right;\">\n",
- " <th></th>\n",
- " <th>Time</th>\n",
- " <th>Source</th>\n",
- " <th>Destination</th>\n",
- " <th>Protocol</th>\n",
- " <th>Length</th>\n",
- " <th>Info</th>\n",
- " <th>0</th>\n",
- " </tr>\n",
- " </thead>\n",
- " <tbody>\n",
- " <tr>\n",
- " <th>62261</th>\n",
- " <td>0.052006</td>\n",
- " <td>2</td>\n",
- " <td>8</td>\n",
- " <td>1</td>\n",
- " <td>0.7</td>\n",
- " <td>2</td>\n",
- " <td>1</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>62262</th>\n",
- " <td>0.947014</td>\n",
- " <td>1</td>\n",
- " <td>8</td>\n",
- " <td>1</td>\n",
- " <td>0.7</td>\n",
- " <td>8</td>\n",
- " <td>1</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>62263</th>\n",
- " <td>0.052536</td>\n",
- " <td>2</td>\n",
- " <td>8</td>\n",
- " <td>1</td>\n",
- " <td>0.7</td>\n",
- " <td>2</td>\n",
- " <td>1</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>62264</th>\n",
- " <td>0.946974</td>\n",
- " <td>1</td>\n",
- " <td>8</td>\n",
- " <td>1</td>\n",
- " <td>0.7</td>\n",
- " <td>8</td>\n",
- " <td>1</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>62265</th>\n",
- " <td>0.052726</td>\n",
- " <td>2</td>\n",
- " <td>8</td>\n",
- " <td>1</td>\n",
- " <td>0.7</td>\n",
- " <td>2</td>\n",
- " <td>1</td>\n",
- " </tr>\n",
- " </tbody>\n",
- "</table>\n",
- "</div>"
- ],
- "text/plain": [
- " Time Source Destination Protocol Length Info 0\n",
- "62261 0.052006 2 8 1 0.7 2 1\n",
- "62262 0.947014 1 8 1 0.7 8 1\n",
- "62263 0.052536 2 8 1 0.7 2 1\n",
- "62264 0.946974 1 8 1 0.7 8 1\n",
- "62265 0.052726 2 8 1 0.7 2 1"
- ]
- },
- "execution_count": 108,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "df5.tail()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 109,
- "metadata": {},
- "outputs": [],
- "source": [
- "df5.to_csv('kmode.csv')"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 110,
- "metadata": {},
- "outputs": [],
- "source": [
- "from keras import layers, optimizers, regularizers\n",
- "from keras.layers import Dense, Dropout, BatchNormalization, Activation\n",
- "from keras.models import Sequential\n",
- "\n",
- "from keras.utils import plot_model\n",
- "#from kt_utils import *\n",
- "import keras.backend as K\n",
- "\n",
- "import seaborn as sns\n",
- "\n",
- "from sklearn import preprocessing, model_selection \n",
- "\n",
- "import matplotlib.pyplot as plt\n",
- "from matplotlib.pyplot import imshow"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 111,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "(49812, 6) (49812, 0) (12454, 6) (12454, 0)\n"
- ]
- }
- ],
- "source": [
- "X = df5.iloc[:,0:6].values # first columns\n",
- "Y = df5.iloc[:,7:].values # last columns\n",
- "\n",
- "X = preprocessing.normalize(X, axis = 0)\n",
- "\n",
- "X_train,X_test,Y_train,Y_test = model_selection.train_test_split(X,Y,test_size=0.2)\n",
- "\n",
- "print(X_train.shape,Y_train.shape,X_test.shape,Y_test.shape)\n"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 112,
- "metadata": {},
- "outputs": [],
- "source": [
- "df5[0] = df5[0].astype(int)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 113,
- "metadata": {},
- "outputs": [],
- "source": [
- "# df5[0] =df5[0].astype(int)\n",
- "# df5 = pd.get_dummies(df5, columns=[0])\n",
- "# df5.head(5)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 114,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "(49812, 6) (49812, 0) (12454, 6) (12454, 0)\n"
- ]
- }
- ],
- "source": [
- "X = df5.iloc[:,0:6].values # first columns\n",
- "Y = df5.iloc[:,7:].values # last columns\n",
- "\n",
- "X = preprocessing.normalize(X, axis = 0)\n",
- "\n",
- "X_train,X_test,Y_train,Y_test = model_selection.train_test_split(X,Y,test_size=0.2)\n",
- "\n",
- "print(X_train.shape,Y_train.shape,X_test.shape,Y_test.shape)\n"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 115,
- "metadata": {},
- "outputs": [],
- "source": [
- "winemod1 = Sequential()\n",
- "# layer 1\n",
- "winemod1.add(Dense(30, input_dim=6, activation='relu', name='fc0',kernel_regularizer=regularizers.l2(0.01)))\n",
- "winemod1.add(BatchNormalization(momentum=0.99, epsilon=0.001))\n",
- "winemod1.add(Activation('relu'))\n",
- "winemod1.add(Dense(2, name='fc3',bias_initializer='zeros'))\n",
- "winemod1.add(Activation('softmax'))\n",
- "\n",
- "\n",
- "# #layer 2\n",
- "# winemod1.add(Dense(50, name='fc1',bias_initializer='zeros'))\n",
- "# winemod1.add(BatchNormalization(momentum=0.99, epsilon=0.001))\n",
- "# winemod1.add(Activation('tanh'))\n",
- "# winemod1.add(Dropout(0.5))\n",
- "# #layer 3\n",
- "# winemod1.add(Dense(100, name='fc2',bias_initializer='zeros'))\n",
- "# winemod1.add(BatchNormalization(momentum=0.99, epsilon=0.001))\n",
- "# winemod1.add(Activation('relu'))\n",
- "# winemod1.add(Dropout(0.5))\n",
- "# #layer 4\n",
- "# winemod1.add(Dense(2, name='fc3',bias_initializer='zeros'))\n",
- "# winemod1.add(BatchNormalization(momentum=0.99, epsilon=0.001))\n",
- "# winemod1.add(Activation('softmax'))"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 116,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "_________________________________________________________________\n",
- "Layer (type) Output Shape Param # \n",
- "=================================================================\n",
- "fc0 (Dense) (None, 30) 210 \n",
- "_________________________________________________________________\n",
- "batch_normalization_22 (Batc (None, 30) 120 \n",
- "_________________________________________________________________\n",
- "activation_18 (Activation) (None, 30) 0 \n",
- "_________________________________________________________________\n",
- "fc3 (Dense) (None, 2) 62 \n",
- "_________________________________________________________________\n",
- "activation_19 (Activation) (None, 2) 0 \n",
- "=================================================================\n",
- "Total params: 392\n",
- "Trainable params: 332\n",
- "Non-trainable params: 60\n",
- "_________________________________________________________________\n"
- ]
- }
- ],
- "source": [
- "winemod1.summary()\n"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 117,
- "metadata": {},
- "outputs": [],
- "source": [
- "Adam = optimizers.Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0)\n",
- "winemod1.compile(optimizer = Adam, loss = \"sparse_categorical_crossentropy\", metrics = [\"categorical_accuracy\"])"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 118,
- "metadata": {},
- "outputs": [
- {
- "ename": "ValueError",
- "evalue": "Error when checking target: expected activation_19 to have shape (1,) but got array with shape (0,)",
- "output_type": "error",
- "traceback": [
- "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
- "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)",
- "\u001b[0;32m<ipython-input-118-0c2ba3d9d5cd>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mwinemod1\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mX_train\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mY_train\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mepochs\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;36m200\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mverbose\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mbatch_size\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;36m64\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mvalidation_data\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX_test\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mY_test\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
- "\u001b[0;32m~/anaconda3/lib/python3.6/site-packages/keras/engine/training.py\u001b[0m in \u001b[0;36mfit\u001b[0;34m(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, **kwargs)\u001b[0m\n\u001b[1;32m 950\u001b[0m \u001b[0msample_weight\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0msample_weight\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 951\u001b[0m \u001b[0mclass_weight\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mclass_weight\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 952\u001b[0;31m batch_size=batch_size)\n\u001b[0m\u001b[1;32m 953\u001b[0m \u001b[0;31m# Prepare validation data.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 954\u001b[0m \u001b[0mdo_validation\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mFalse\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
- "\u001b[0;32m~/anaconda3/lib/python3.6/site-packages/keras/engine/training.py\u001b[0m in \u001b[0;36m_standardize_user_data\u001b[0;34m(self, x, y, sample_weight, class_weight, check_array_lengths, batch_size)\u001b[0m\n\u001b[1;32m 787\u001b[0m \u001b[0mfeed_output_shapes\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 788\u001b[0m \u001b[0mcheck_batch_axis\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mFalse\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;31m# Don't enforce the batch size.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 789\u001b[0;31m exception_prefix='target')\n\u001b[0m\u001b[1;32m 790\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 791\u001b[0m \u001b[0;31m# Generate sample-wise weight values given the `sample_weight` and\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
- "\u001b[0;32m~/anaconda3/lib/python3.6/site-packages/keras/engine/training_utils.py\u001b[0m in \u001b[0;36mstandardize_input_data\u001b[0;34m(data, names, shapes, check_batch_axis, exception_prefix)\u001b[0m\n\u001b[1;32m 136\u001b[0m \u001b[0;34m': expected '\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0mnames\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0;34m' to have shape '\u001b[0m \u001b[0;34m+\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 137\u001b[0m \u001b[0mstr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mshape\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0;34m' but got array with shape '\u001b[0m \u001b[0;34m+\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 138\u001b[0;31m str(data_shape))\n\u001b[0m\u001b[1;32m 139\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mdata\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 140\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
- "\u001b[0;31mValueError\u001b[0m: Error when checking target: expected activation_19 to have shape (1,) but got array with shape (0,)"
- ]
- }
- ],
- "source": [
- "winemod1.fit(x = X_train, y = Y_train, epochs = 200,verbose=1, batch_size = 64,validation_data=(X_test, Y_test))"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": []
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": []
- }
- ],
- "metadata": {
- "kernelspec": {
- "display_name": "Python 3",
- "language": "python",
- "name": "python3"
- },
- "language_info": {
- "codemirror_mode": {
- "name": "ipython",
- "version": 3
- },
- "file_extension": ".py",
- "mimetype": "text/x-python",
- "name": "python",
- "nbconvert_exporter": "python",
- "pygments_lexer": "ipython3",
- "version": "3.6.5"
- }
- },
- "nbformat": 4,
- "nbformat_minor": 2
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement