Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- {
- "cells": [
- {
- "cell_type": "code",
- "execution_count": 1,
- "metadata": {
- "button": false,
- "collapsed": false,
- "deletable": true,
- "new_sheet": false,
- "run_control": {
- "read_only": false
- }
- },
- "outputs": [],
- "source": [
- "import pandas as pd\n",
- "\n",
- "\n",
- "import numpy as np\n",
- "from tqdm import tqdm_notebook\n",
- "from collections import Counter"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 2,
- "metadata": {
- "button": false,
- "collapsed": false,
- "deletable": true,
- "new_sheet": false,
- "run_control": {
- "read_only": false
- }
- },
- "outputs": [],
- "source": [
- "location = 'kaggle/'\n",
- "events = pd.DataFrame.from_csv(location+\"user_activity.csv\",index_col=None)\n",
- "structure = pd.DataFrame.from_csv(location+\"structure.csv\",index_col=None)\n",
- "targets = pd.DataFrame.from_csv(location+\"targets.csv\",index_col=None)\n",
- "events_test = pd.DataFrame.from_csv(location+\"user_activity_test.csv\",index_col=None)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 497,
- "metadata": {
- "collapsed": false
- },
- "outputs": [],
- "source": [
- "counter = Counter(events[events.user_id.isin(set(targets[targets.passed == 1].user_id.values))].drop_duplicates('user_id').step_id)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 498,
- "metadata": {
- "collapsed": false
- },
- "outputs": [],
- "source": [
- "cool_feature = defaultdict(lambda: 0)\n",
- "\n",
- "for i in counter:\n",
- " cool_feature[i] = counter[i]/659"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 499,
- "metadata": {
- "collapsed": false
- },
- "outputs": [],
- "source": [
- "#\n",
- "# ПОСОРТИМ ПО ВРЕМЕНИ\n",
- "#\n",
- "\n",
- "events.sort_values(by='time', inplace=True)\n",
- "\n",
- "#\n",
- "# ПОСОРТИМ, А ПОТОМ СОХРАНИМ ВЕКТОР\n",
- "#\n",
- "structure.sort_values(['module_position','lesson_position', 'step_position'], inplace=True)\n",
- "vec = structure.step_id.values.tolist()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 500,
- "metadata": {
- "collapsed": false
- },
- "outputs": [],
- "source": [
- "desc = {y:x for x,y in enumerate(set(events.action))}\n",
- "events.action = list(map(lambda x: desc[x], events.action))\n",
- "\n",
- "\n",
- "desc = {y:x for x,y in enumerate(set(events.step_type))}\n",
- "events.step_type = list(map(lambda x: desc[x], events.step_type))"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 501,
- "metadata": {
- "collapsed": true
- },
- "outputs": [],
- "source": [
- "desc = {y:x for x,y in enumerate(set(events_test.action))}\n",
- "events_test.action = list(map(lambda x: desc[x], events_test.action))\n",
- "\n",
- "\n",
- "desc = {y:x for x,y in enumerate(set(events_test.step_type))}\n",
- "events_test.step_type = list(map(lambda x: desc[x], events_test.step_type))"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 502,
- "metadata": {
- "collapsed": true
- },
- "outputs": [],
- "source": [
- "ALL_CURSES = structure.size\n",
- "ALL_USERS = list(set(events_test.user_id)) + list(set(events.user_id))"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 503,
- "metadata": {
- "collapsed": false
- },
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "\n",
- "\n"
- ]
- }
- ],
- "source": [
- "user_event_pd_frame = {}\n",
- "\n",
- "for user in tqdm_notebook(set(events_test.user_id)):\n",
- " user_event_pd_frame[user] = events_test[events_test.user_id == user]\n",
- "\n",
- "for user in tqdm_notebook(set(events.user_id)):\n",
- " user_event_pd_frame[user] = events[events.user_id == user]\n"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 9,
- "metadata": {
- "collapsed": false
- },
- "outputs": [],
- "source": [
- "user_actions_count = {}\n",
- "\n",
- "for user in ALL_USERS:\n",
- " user_actions_count[user] = Counter(user_event_pd_frame[user].action)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 10,
- "metadata": {
- "collapsed": true
- },
- "outputs": [],
- "source": [
- "action_costs = {x:y for x,y in zip(structure.step_id, structure.step_cost)}"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 13,
- "metadata": {
- "collapsed": false
- },
- "outputs": [
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "Widget Javascript not detected. It may not be installed properly. Did you enable the widgetsnbextension? If not, then run \"jupyter nbextension enable --py --sys-prefix widgetsnbextension\"\n"
- ]
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "\n"
- ]
- }
- ],
- "source": [
- "fast_struct = {}\n",
- "for user in tqdm_notebook(ALL_USERS):\n",
- " fast_struct[user] = structure[structure.step_id.isin(set(user_event_pd_frame[user].step_id))]"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 14,
- "metadata": {
- "collapsed": false
- },
- "outputs": [
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "Widget Javascript not detected. It may not be installed properly. Did you enable the widgetsnbextension? If not, then run \"jupyter nbextension enable --py --sys-prefix widgetsnbextension\"\n"
- ]
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "\n"
- ]
- }
- ],
- "source": [
- "from collections import defaultdict\n",
- "user_passed_actions = defaultdict(lambda: [])\n",
- "\n",
- "for user in tqdm_notebook(ALL_USERS):\n",
- " frame = user_event_pd_frame[user]\n",
- " user_passed_actions[user] = frame[frame.action == 2].step_id"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 15,
- "metadata": {
- "collapsed": false
- },
- "outputs": [],
- "source": [
- "MAX_STEP_COST_SUM = max([sum([action_costs[action] for action in user_passed_actions[user]]) for user in ALL_USERS])"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 16,
- "metadata": {
- "collapsed": true
- },
- "outputs": [],
- "source": [
- "costed_passed_user_actions = {}\n",
- "\n",
- "for user in ALL_USERS:\n",
- " user_whole_passed_actions_count = 0\n",
- " user_costed_passed_actions = 0\n",
- " for action in user_passed_actions[user]:\n",
- " user_whole_passed_actions_count+=1\n",
- " \n",
- " if action_costs[action] > 0:\n",
- " user_costed_passed_actions+=1\n",
- " \n",
- " if user_costed_passed_actions==0:\n",
- " costed_passed_user_actions[user]=0\n",
- " else:\n",
- " costed_passed_user_actions[user]=user_whole_passed_actions_count/user_costed_passed_actions"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 17,
- "metadata": {
- "collapsed": false
- },
- "outputs": [
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "Widget Javascript not detected. It may not be installed properly. Did you enable the widgetsnbextension? If not, then run \"jupyter nbextension enable --py --sys-prefix widgetsnbextension\"\n"
- ]
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "\n"
- ]
- }
- ],
- "source": [
- "FuckUpTimePart = {}\n",
- "\n",
- "for user in tqdm_notebook(ALL_USERS):\n",
- " a = sorted(user_event_pd_frame[user].time)\n",
- " a = list(map(lambda x: x-a[0], a))\n",
- " if a != []:\n",
- " _all = list(range(0,a[-1]+1))\n",
- " _all_lenght = len(_all)\n",
- "\n",
- " FuckUpTimePart[user] = len(_all)-len(set(a))/_all_lenght/100\n",
- " else:\n",
- " FuckUpTimePart[user] = 0"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 18,
- "metadata": {
- "collapsed": false
- },
- "outputs": [
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "Widget Javascript not detected. It may not be installed properly. Did you enable the widgetsnbextension? If not, then run \"jupyter nbextension enable --py --sys-prefix widgetsnbextension\"\n"
- ]
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "Widget Javascript not detected. It may not be installed properly. Did you enable the widgetsnbextension? If not, then run \"jupyter nbextension enable --py --sys-prefix widgetsnbextension\"\n"
- ]
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "\n"
- ]
- }
- ],
- "source": [
- "user_viewed_sum = {}\n",
- "for user in tqdm_notebook(ALL_USERS):\n",
- " ev = user_event_pd_frame[user]\n",
- " user_viewed_sum[user] = sum(ev[ev.action == 1].step_cost)\n",
- "\n",
- "user_passed_sum = {}\n",
- "for user in tqdm_notebook(ALL_USERS):\n",
- " ev = user_event_pd_frame[user]\n",
- " user_passed_sum[user] = sum(ev[ev.action == 2].step_cost)\n"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 19,
- "metadata": {
- "collapsed": true
- },
- "outputs": [],
- "source": [
- "structure.sort_values(['module_position','lesson_position', 'step_position'], inplace=True)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 20,
- "metadata": {
- "collapsed": true
- },
- "outputs": [],
- "source": [
- "step_id_position = structure.step_id.values.tolist()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 729,
- "metadata": {
- "collapsed": false
- },
- "outputs": [],
- "source": [
- "def gen_features(us_id, test = False):\n",
- " now_ev = user_event_pd_frame[us_id]\n",
- " \n",
- " now_x = []\n",
- " \n",
- " for i in now_ev.describe().values:\n",
- " now_x.extend(i)\n",
- " \n",
- " tmp = []\n",
- " for i in now_ev.step_id:\n",
- " tmp.append(cool_feature[i])\n",
- " \n",
- " now_x.append(max(tmp))\n",
- " now_x.append(FuckUpTimePart[us_id])\n",
- " now_x.append(user_passed_sum[us_id])\n",
- " now_x.append(user_viewed_sum[us_id])\n",
- " now_x.append(costed_passed_user_actions[us_id])\n",
- " now_x.append(now_ev.step_cost.sum())\n",
- " \n",
- " tmp = user_actions_count[us_id]\n",
- " now_x.extend([tmp[i] for i in tmp])\n",
- " \n",
- " for i in now_ev.describe().values:\n",
- " now_x.extend(i)\n",
- " \n",
- " now_x.append(now_ev.step_type.max())\n",
- " now_x.append(now_ev.step_type.min())\n",
- " now_x.append(now_ev.step_cost.sum())\n",
- " now_x.append(now_ev.step_cost.mean())\n",
- " \n",
- " \n",
- " return (us_id, now_x)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 708,
- "metadata": {
- "collapsed": false
- },
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "\n"
- ]
- }
- ],
- "source": [
- "X = []\n",
- "Y = []\n",
- "for us_id in tqdm_notebook(set(events.user_id.tolist())):\n",
- " f = gen_features(us_id)\n",
- " X.append(f[1])\n",
- " Y.append(targets[targets.user_id == f[0]].passed.values[0])"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 709,
- "metadata": {
- "collapsed": false
- },
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "\n"
- ]
- }
- ],
- "source": [
- "X_test = []\n",
- "ind = []\n",
- "for us_id in tqdm_notebook(set(events_test.user_id.tolist())):\n",
- " f = gen_features(us_id,True)\n",
- " ind.append(f[0])\n",
- " X_test.append(f[1])"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 710,
- "metadata": {
- "collapsed": false
- },
- "outputs": [],
- "source": [
- "#\n",
- "# in X we have NAN features, we need to feel it, \n",
- "# let's do it with pandas\n",
- "# \n",
- "X = pd.DataFrame(X)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 711,
- "metadata": {
- "collapsed": true
- },
- "outputs": [],
- "source": [
- "X = X.fillna(method='bfill')"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 712,
- "metadata": {
- "collapsed": false
- },
- "outputs": [],
- "source": [
- "X = X.fillna(0)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 713,
- "metadata": {
- "collapsed": true
- },
- "outputs": [],
- "source": [
- "X_test = pd.DataFrame(X_test).fillna(method='bfill')\n",
- "X_test = X_test.fillna(0)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 714,
- "metadata": {
- "collapsed": false
- },
- "outputs": [],
- "source": [
- "X = X.values\n",
- "X = X.astype(np.float32)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 715,
- "metadata": {
- "collapsed": false
- },
- "outputs": [],
- "source": [
- "X_test = X_test.as_matrix()\n",
- "\n",
- "Y = np.ravel(Y)\n",
- "Y = Y.astype(np.int32)\n",
- "\n",
- "\n",
- "X_test = X_test.astype(np.float32)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 716,
- "metadata": {
- "collapsed": true
- },
- "outputs": [],
- "source": [
- "def feature_normalize(feature_index, features_array):\n",
- " f3_max = max([a[feature_index] for a in features_array])\n",
- " f3_min = min([a[feature_index] for a in features_array])\n",
- "\n",
- " for row in features_array:\n",
- " if (f3_max - f3_min) != 0:\n",
- " row[feature_index] = (row[feature_index] - f3_min) / (f3_max - f3_min)\n",
- " else:\n",
- " pass\n",
- " \n",
- " return features_array"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 717,
- "metadata": {
- "collapsed": false
- },
- "outputs": [],
- "source": [
- "for i in range(len(X[0])):\n",
- " X = feature_normalize(i, X)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 718,
- "metadata": {
- "collapsed": false
- },
- "outputs": [],
- "source": [
- "for i in range(len(X_test[0])):\n",
- " X_test = feature_normalize(i, X_test)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 719,
- "metadata": {
- "button": false,
- "collapsed": false,
- "deletable": true,
- "new_sheet": false,
- "run_control": {
- "read_only": false
- }
- },
- "outputs": [],
- "source": [
- "from sklearn.cross_validation import train_test_split\n",
- "from sklearn.metrics import f1_score\n",
- "\n",
- "Xtr,Xval,Ytr,Yval = train_test_split(X,Y,test_size=0.1,random_state=128)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 720,
- "metadata": {
- "collapsed": true
- },
- "outputs": [],
- "source": [
- "import theano\n",
- "import theano.tensor as T\n",
- "import lasagne"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 721,
- "metadata": {
- "collapsed": true
- },
- "outputs": [],
- "source": [
- "input_X = T.vector(\"X\")\n",
- "target_y = T.scalar(dtype='int32')"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 722,
- "metadata": {
- "collapsed": true
- },
- "outputs": [],
- "source": [
- "from lasagne.updates import sgd\n",
- "from lasagne.nonlinearities import leaky_rectify, softmax, tanh, elu\n",
- "from lasagne.layers import InputLayer, DenseLayer\n",
- "\n",
- "l_in = InputLayer(shape=(None,len(X[0])))\n",
- "hl = DenseLayer(incoming=l_in, num_units=100) \n",
- "bb = lasagne.layers.batch_norm(hl)\n",
- "hl = DenseLayer(incoming=hl, num_units=60)\n",
- "bb = lasagne.layers.batch_norm(hl)\n",
- "hl = DenseLayer(incoming=bb, num_units=20)\n",
- "bb = lasagne.layers.batch_norm(hl)\n",
- "l_out = DenseLayer(incoming=bb, num_units=2, nonlinearity=softmax, name='outputlayer') "
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 723,
- "metadata": {
- "collapsed": true
- },
- "outputs": [],
- "source": [
- "sym_x = T.matrix('X')\n",
- "sym_t = T.ivector('target')\n",
- "\n",
- "train_out = lasagne.layers.get_output(l_out, {l_in: sym_x}, deterministic=False)\n",
- "eval_out = lasagne.layers.get_output(l_out, {l_in: sym_x}, deterministic=True)\n",
- "\n",
- "all_params = lasagne.layers.get_all_params(l_out, trainable=True)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 724,
- "metadata": {
- "collapsed": true
- },
- "outputs": [],
- "source": [
- "cost_train = T.nnet.categorical_crossentropy(train_out, sym_t).mean()\n",
- "cost_eval = T.nnet.categorical_crossentropy(eval_out, sym_t).mean()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 725,
- "metadata": {
- "collapsed": true
- },
- "outputs": [],
- "source": [
- "all_grads = T.grad(cost_train, all_params)\n",
- "updates = lasagne.updates.adagrad(all_grads, all_params, learning_rate=0.1)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 726,
- "metadata": {
- "collapsed": true
- },
- "outputs": [],
- "source": [
- "f_eval = theano.function(inputs=[sym_x, sym_t],\n",
- " outputs=[cost_eval, eval_out])\n",
- "\n",
- "f_train = theano.function(inputs=[sym_x, sym_t],\n",
- " outputs=[cost_train, eval_out],\n",
- " updates=updates)\n",
- "\n",
- "f_pred = theano.function(inputs=[sym_x],\n",
- " outputs=eval_out)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 727,
- "metadata": {
- "collapsed": true
- },
- "outputs": [],
- "source": [
- "def iterate_minibatches(inputs, targets, batchsize, inputs_new=None):\n",
- " assert len(inputs) == len(targets)\n",
- " if inputs_new != None:\n",
- " assert len(inputs_new) == len(inputs_new)\n",
- " indices = np.arange(len(inputs))\n",
- " np.random.shuffle(indices)\n",
- " for start_idx in range(0, len(inputs) - batchsize + 1, batchsize):\n",
- " excerpt = indices[start_idx:start_idx + batchsize]\n",
- " if inputs_new != None:\n",
- " yield inputs[excerpt], inputs_new[excerpt], targets[excerpt]\n",
- " else:\n",
- " yield inputs[excerpt], targets[excerpt]"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 739,
- "metadata": {
- "collapsed": false
- },
- "outputs": [
- {
- "ename": "KeyboardInterrupt",
- "evalue": "",
- "output_type": "error",
- "traceback": [
- "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
- "\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)",
- "\u001b[0;32m<ipython-input-739-4cf751260849>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mepoch\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mrange\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m1000\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mx\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0my\u001b[0m \u001b[0;32min\u001b[0m \u001b[0miterate_minibatches\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0marray\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0marray\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mY\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mdtype\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mint32\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m10\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 6\u001b[0;31m \u001b[0mf_train\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlist\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mlist\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0my\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 7\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mold_score\u001b[0m \u001b[0;34m<\u001b[0m \u001b[0mnow_score\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 8\u001b[0m \u001b[0mold_score\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnow_score\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
- "\u001b[0;32m/home/a.tvorozhkov/anaconda3/lib/python3.5/site-packages/theano/compile/function_module.py\u001b[0m in \u001b[0;36m__call__\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 871\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 872\u001b[0m \u001b[0moutputs\u001b[0m \u001b[0;34m=\u001b[0m\u001b[0;31m\\\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 873\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfn\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0moutput_subset\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mNone\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;31m\\\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 874\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfn\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0moutput_subset\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0moutput_subset\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 875\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mException\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
- "\u001b[0;31mKeyboardInterrupt\u001b[0m: "
- ]
- }
- ],
- "source": [
- "old_score = 0\n",
- "answer = []\n",
- "predicted = []\n",
- "for epoch in range(1000):\n",
- " for x,y in iterate_minibatches(np.array(X),np.array(Y,dtype=np.int32), 10):\n",
- " f_train(list(x),list(y))\n",
- " if old_score < now_score:\n",
- " old_score = now_score\n",
- " answer = [i.argmax() for i in f_pred(X_test)]\n",
- " predicted = f_pred(X_test)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 732,
- "metadata": {
- "collapsed": false
- },
- "outputs": [],
- "source": [
- "a = pd.DataFrame()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 733,
- "metadata": {
- "collapsed": false
- },
- "outputs": [],
- "source": [
- "a['passed'] = answer"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 734,
- "metadata": {
- "collapsed": true
- },
- "outputs": [],
- "source": [
- "a['user_id'] = ind"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 735,
- "metadata": {
- "collapsed": true
- },
- "outputs": [],
- "source": [
- "a = a.set_index('user_id')"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 736,
- "metadata": {
- "collapsed": false
- },
- "outputs": [
- {
- "data": {
- "text/html": [
- "<div>\n",
- "<table border=\"1\" class=\"dataframe\">\n",
- " <thead>\n",
- " <tr style=\"text-align: right;\">\n",
- " <th></th>\n",
- " <th>passed</th>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>user_id</th>\n",
- " <th></th>\n",
- " </tr>\n",
- " </thead>\n",
- " <tbody>\n",
- " <tr>\n",
- " <th>8193</th>\n",
- " <td>0</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>16387</th>\n",
- " <td>0</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>8196</th>\n",
- " <td>0</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>5</th>\n",
- " <td>0</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>9</th>\n",
- " <td>0</td>\n",
- " </tr>\n",
- " </tbody>\n",
- "</table>\n",
- "</div>"
- ],
- "text/plain": [
- " passed\n",
- "user_id \n",
- "8193 0\n",
- "16387 0\n",
- "8196 0\n",
- "5 0\n",
- "9 0"
- ]
- },
- "execution_count": 736,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "a.head()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 738,
- "metadata": {
- "collapsed": false
- },
- "outputs": [],
- "source": [
- "a.to_csv('red.csv')"
- ]
- }
- ],
- "metadata": {
- "anaconda-cloud": {},
- "kernelspec": {
- "display_name": "Python [default]",
- "language": "python",
- "name": "python3"
- },
- "language_info": {
- "codemirror_mode": {
- "name": "ipython",
- "version": 3
- },
- "file_extension": ".py",
- "mimetype": "text/x-python",
- "name": "python",
- "nbconvert_exporter": "python",
- "pygments_lexer": "ipython3",
- "version": "3.5.2"
- },
- "widgets": {
- "state": {
- "00cc5f7a46ca4be0bba00e6ceb579bb5": {
- "views": [
- {
- "cell_index": 8
- }
- ]
- },
- "0c40a99c801d49128662aad82934a1aa": {
- "views": [
- {
- "cell_index": 8
- }
- ]
- },
- "1c8753f3411d463580879b08e2ddedb2": {
- "views": [
- {
- "cell_index": 6
- }
- ]
- },
- "2924a75eb0994a4ea19e4a69da2c9369": {
- "views": [
- {
- "cell_index": 9
- }
- ]
- },
- "29f9da92b18a453f933638b9c2405c2d": {
- "views": [
- {
- "cell_index": 20
- }
- ]
- },
- "400480f182d74426902a7a22d237df49": {
- "views": [
- {
- "cell_index": 8
- }
- ]
- },
- "5042d02270ee4d70a11abc437292a005": {
- "views": [
- {
- "cell_index": 10
- }
- ]
- },
- "9b2eaa75f9cb4ed69f45edcc18a1bcfa": {
- "views": [
- {
- "cell_index": 8
- }
- ]
- },
- "ad42efd44c2c4869b5165cb9b28fed70": {
- "views": [
- {
- "cell_index": 8
- }
- ]
- },
- "c5ab72890efb4c47b71cfe4efffc746f": {
- "views": [
- {
- "cell_index": 7
- }
- ]
- },
- "cb56ba5222384625a58b9fcd5a4edfd6": {
- "views": [
- {
- "cell_index": 21
- }
- ]
- },
- "dff634abe09c40bb8e328e7bf43c47df": {
- "views": [
- {
- "cell_index": 6
- }
- ]
- },
- "e53fe8aabd744a11b7d3ce29f3d89cd8": {
- "views": [
- {
- "cell_index": 14
- }
- ]
- },
- "f199d361089443e0beec5c02c35f5394": {
- "views": [
- {
- "cell_index": 8
- }
- ]
- },
- "f59ec1ad186d410aadf1e71bf09e6804": {
- "views": [
- {
- "cell_index": 13
- }
- ]
- },
- "f715a71e5cad4bf89dae0a3b1f0fe3d5": {
- "views": [
- {
- "cell_index": 8
- }
- ]
- },
- "f73d00d1f55f48c8bd04e41b2f4100ac": {
- "views": [
- {
- "cell_index": 14
- }
- ]
- },
- "fde5d46ad89d410585f882b641c279b9": {
- "views": [
- {
- "cell_index": 13
- }
- ]
- }
- },
- "version": "1.2.0"
- }
- },
- "nbformat": 4,
- "nbformat_minor": 1
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement