Advertisement
Guest User

Untitled

a guest
Dec 4th, 2016
68
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 26.67 KB | None | 0 0
  1. {
  2. "cells": [
  3. {
  4. "cell_type": "code",
  5. "execution_count": 1,
  6. "metadata": {
  7. "button": false,
  8. "collapsed": false,
  9. "deletable": true,
  10. "new_sheet": false,
  11. "run_control": {
  12. "read_only": false
  13. }
  14. },
  15. "outputs": [],
  16. "source": [
  17. "import pandas as pd\n",
  18. "\n",
  19. "\n",
  20. "import numpy as np\n",
  21. "from tqdm import tqdm_notebook\n",
  22. "from collections import Counter"
  23. ]
  24. },
  25. {
  26. "cell_type": "code",
  27. "execution_count": 2,
  28. "metadata": {
  29. "button": false,
  30. "collapsed": false,
  31. "deletable": true,
  32. "new_sheet": false,
  33. "run_control": {
  34. "read_only": false
  35. }
  36. },
  37. "outputs": [],
  38. "source": [
  39. "location = 'kaggle/'\n",
  40. "events = pd.DataFrame.from_csv(location+\"user_activity.csv\",index_col=None)\n",
  41. "structure = pd.DataFrame.from_csv(location+\"structure.csv\",index_col=None)\n",
  42. "targets = pd.DataFrame.from_csv(location+\"targets.csv\",index_col=None)\n",
  43. "events_test = pd.DataFrame.from_csv(location+\"user_activity_test.csv\",index_col=None)"
  44. ]
  45. },
  46. {
  47. "cell_type": "code",
  48. "execution_count": 497,
  49. "metadata": {
  50. "collapsed": false
  51. },
  52. "outputs": [],
  53. "source": [
  54. "counter = Counter(events[events.user_id.isin(set(targets[targets.passed == 1].user_id.values))].drop_duplicates('user_id').step_id)"
  55. ]
  56. },
  57. {
  58. "cell_type": "code",
  59. "execution_count": 498,
  60. "metadata": {
  61. "collapsed": false
  62. },
  63. "outputs": [],
  64. "source": [
  65. "cool_feature = defaultdict(lambda: 0)\n",
  66. "\n",
  67. "for i in counter:\n",
  68. " cool_feature[i] = counter[i]/659"
  69. ]
  70. },
  71. {
  72. "cell_type": "code",
  73. "execution_count": 499,
  74. "metadata": {
  75. "collapsed": false
  76. },
  77. "outputs": [],
  78. "source": [
  79. "#\n",
  80. "# ПОСОРТИМ ПО ВРЕМЕНИ\n",
  81. "#\n",
  82. "\n",
  83. "events.sort_values(by='time', inplace=True)\n",
  84. "\n",
  85. "#\n",
  86. "# ПОСОРТИМ, А ПОТОМ СОХРАНИМ ВЕКТОР\n",
  87. "#\n",
  88. "structure.sort_values(['module_position','lesson_position', 'step_position'], inplace=True)\n",
  89. "vec = structure.step_id.values.tolist()"
  90. ]
  91. },
  92. {
  93. "cell_type": "code",
  94. "execution_count": 500,
  95. "metadata": {
  96. "collapsed": false
  97. },
  98. "outputs": [],
  99. "source": [
  100. "desc = {y:x for x,y in enumerate(set(events.action))}\n",
  101. "events.action = list(map(lambda x: desc[x], events.action))\n",
  102. "\n",
  103. "\n",
  104. "desc = {y:x for x,y in enumerate(set(events.step_type))}\n",
  105. "events.step_type = list(map(lambda x: desc[x], events.step_type))"
  106. ]
  107. },
  108. {
  109. "cell_type": "code",
  110. "execution_count": 501,
  111. "metadata": {
  112. "collapsed": true
  113. },
  114. "outputs": [],
  115. "source": [
  116. "desc = {y:x for x,y in enumerate(set(events_test.action))}\n",
  117. "events_test.action = list(map(lambda x: desc[x], events_test.action))\n",
  118. "\n",
  119. "\n",
  120. "desc = {y:x for x,y in enumerate(set(events_test.step_type))}\n",
  121. "events_test.step_type = list(map(lambda x: desc[x], events_test.step_type))"
  122. ]
  123. },
  124. {
  125. "cell_type": "code",
  126. "execution_count": 502,
  127. "metadata": {
  128. "collapsed": true
  129. },
  130. "outputs": [],
  131. "source": [
  132. "ALL_CURSES = structure.size\n",
  133. "ALL_USERS = list(set(events_test.user_id)) + list(set(events.user_id))"
  134. ]
  135. },
  136. {
  137. "cell_type": "code",
  138. "execution_count": 503,
  139. "metadata": {
  140. "collapsed": false
  141. },
  142. "outputs": [
  143. {
  144. "name": "stdout",
  145. "output_type": "stream",
  146. "text": [
  147. "\n",
  148. "\n"
  149. ]
  150. }
  151. ],
  152. "source": [
  153. "user_event_pd_frame = {}\n",
  154. "\n",
  155. "for user in tqdm_notebook(set(events_test.user_id)):\n",
  156. " user_event_pd_frame[user] = events_test[events_test.user_id == user]\n",
  157. "\n",
  158. "for user in tqdm_notebook(set(events.user_id)):\n",
  159. " user_event_pd_frame[user] = events[events.user_id == user]\n"
  160. ]
  161. },
  162. {
  163. "cell_type": "code",
  164. "execution_count": 9,
  165. "metadata": {
  166. "collapsed": false
  167. },
  168. "outputs": [],
  169. "source": [
  170. "user_actions_count = {}\n",
  171. "\n",
  172. "for user in ALL_USERS:\n",
  173. " user_actions_count[user] = Counter(user_event_pd_frame[user].action)"
  174. ]
  175. },
  176. {
  177. "cell_type": "code",
  178. "execution_count": 10,
  179. "metadata": {
  180. "collapsed": true
  181. },
  182. "outputs": [],
  183. "source": [
  184. "action_costs = {x:y for x,y in zip(structure.step_id, structure.step_cost)}"
  185. ]
  186. },
  187. {
  188. "cell_type": "code",
  189. "execution_count": 13,
  190. "metadata": {
  191. "collapsed": false
  192. },
  193. "outputs": [
  194. {
  195. "name": "stderr",
  196. "output_type": "stream",
  197. "text": [
  198. "Widget Javascript not detected. It may not be installed properly. Did you enable the widgetsnbextension? If not, then run \"jupyter nbextension enable --py --sys-prefix widgetsnbextension\"\n"
  199. ]
  200. },
  201. {
  202. "name": "stdout",
  203. "output_type": "stream",
  204. "text": [
  205. "\n"
  206. ]
  207. }
  208. ],
  209. "source": [
  210. "fast_struct = {}\n",
  211. "for user in tqdm_notebook(ALL_USERS):\n",
  212. " fast_struct[user] = structure[structure.step_id.isin(set(user_event_pd_frame[user].step_id))]"
  213. ]
  214. },
  215. {
  216. "cell_type": "code",
  217. "execution_count": 14,
  218. "metadata": {
  219. "collapsed": false
  220. },
  221. "outputs": [
  222. {
  223. "name": "stderr",
  224. "output_type": "stream",
  225. "text": [
  226. "Widget Javascript not detected. It may not be installed properly. Did you enable the widgetsnbextension? If not, then run \"jupyter nbextension enable --py --sys-prefix widgetsnbextension\"\n"
  227. ]
  228. },
  229. {
  230. "name": "stdout",
  231. "output_type": "stream",
  232. "text": [
  233. "\n"
  234. ]
  235. }
  236. ],
  237. "source": [
  238. "from collections import defaultdict\n",
  239. "user_passed_actions = defaultdict(lambda: [])\n",
  240. "\n",
  241. "for user in tqdm_notebook(ALL_USERS):\n",
  242. " frame = user_event_pd_frame[user]\n",
  243. " user_passed_actions[user] = frame[frame.action == 2].step_id"
  244. ]
  245. },
  246. {
  247. "cell_type": "code",
  248. "execution_count": 15,
  249. "metadata": {
  250. "collapsed": false
  251. },
  252. "outputs": [],
  253. "source": [
  254. "MAX_STEP_COST_SUM = max([sum([action_costs[action] for action in user_passed_actions[user]]) for user in ALL_USERS])"
  255. ]
  256. },
  257. {
  258. "cell_type": "code",
  259. "execution_count": 16,
  260. "metadata": {
  261. "collapsed": true
  262. },
  263. "outputs": [],
  264. "source": [
  265. "costed_passed_user_actions = {}\n",
  266. "\n",
  267. "for user in ALL_USERS:\n",
  268. " user_whole_passed_actions_count = 0\n",
  269. " user_costed_passed_actions = 0\n",
  270. " for action in user_passed_actions[user]:\n",
  271. " user_whole_passed_actions_count+=1\n",
  272. " \n",
  273. " if action_costs[action] > 0:\n",
  274. " user_costed_passed_actions+=1\n",
  275. " \n",
  276. " if user_costed_passed_actions==0:\n",
  277. " costed_passed_user_actions[user]=0\n",
  278. " else:\n",
  279. " costed_passed_user_actions[user]=user_whole_passed_actions_count/user_costed_passed_actions"
  280. ]
  281. },
  282. {
  283. "cell_type": "code",
  284. "execution_count": 17,
  285. "metadata": {
  286. "collapsed": false
  287. },
  288. "outputs": [
  289. {
  290. "name": "stderr",
  291. "output_type": "stream",
  292. "text": [
  293. "Widget Javascript not detected. It may not be installed properly. Did you enable the widgetsnbextension? If not, then run \"jupyter nbextension enable --py --sys-prefix widgetsnbextension\"\n"
  294. ]
  295. },
  296. {
  297. "name": "stdout",
  298. "output_type": "stream",
  299. "text": [
  300. "\n"
  301. ]
  302. }
  303. ],
  304. "source": [
  305. "FuckUpTimePart = {}\n",
  306. "\n",
  307. "for user in tqdm_notebook(ALL_USERS):\n",
  308. " a = sorted(user_event_pd_frame[user].time)\n",
  309. " a = list(map(lambda x: x-a[0], a))\n",
  310. " if a != []:\n",
  311. " _all = list(range(0,a[-1]+1))\n",
  312. " _all_lenght = len(_all)\n",
  313. "\n",
  314. " FuckUpTimePart[user] = len(_all)-len(set(a))/_all_lenght/100\n",
  315. " else:\n",
  316. " FuckUpTimePart[user] = 0"
  317. ]
  318. },
  319. {
  320. "cell_type": "code",
  321. "execution_count": 18,
  322. "metadata": {
  323. "collapsed": false
  324. },
  325. "outputs": [
  326. {
  327. "name": "stderr",
  328. "output_type": "stream",
  329. "text": [
  330. "Widget Javascript not detected. It may not be installed properly. Did you enable the widgetsnbextension? If not, then run \"jupyter nbextension enable --py --sys-prefix widgetsnbextension\"\n"
  331. ]
  332. },
  333. {
  334. "name": "stdout",
  335. "output_type": "stream",
  336. "text": [
  337. "\n"
  338. ]
  339. },
  340. {
  341. "name": "stderr",
  342. "output_type": "stream",
  343. "text": [
  344. "Widget Javascript not detected. It may not be installed properly. Did you enable the widgetsnbextension? If not, then run \"jupyter nbextension enable --py --sys-prefix widgetsnbextension\"\n"
  345. ]
  346. },
  347. {
  348. "name": "stdout",
  349. "output_type": "stream",
  350. "text": [
  351. "\n"
  352. ]
  353. }
  354. ],
  355. "source": [
  356. "user_viewed_sum = {}\n",
  357. "for user in tqdm_notebook(ALL_USERS):\n",
  358. " ev = user_event_pd_frame[user]\n",
  359. " user_viewed_sum[user] = sum(ev[ev.action == 1].step_cost)\n",
  360. "\n",
  361. "user_passed_sum = {}\n",
  362. "for user in tqdm_notebook(ALL_USERS):\n",
  363. " ev = user_event_pd_frame[user]\n",
  364. " user_passed_sum[user] = sum(ev[ev.action == 2].step_cost)\n"
  365. ]
  366. },
  367. {
  368. "cell_type": "code",
  369. "execution_count": 19,
  370. "metadata": {
  371. "collapsed": true
  372. },
  373. "outputs": [],
  374. "source": [
  375. "structure.sort_values(['module_position','lesson_position', 'step_position'], inplace=True)"
  376. ]
  377. },
  378. {
  379. "cell_type": "code",
  380. "execution_count": 20,
  381. "metadata": {
  382. "collapsed": true
  383. },
  384. "outputs": [],
  385. "source": [
  386. "step_id_position = structure.step_id.values.tolist()"
  387. ]
  388. },
  389. {
  390. "cell_type": "code",
  391. "execution_count": 729,
  392. "metadata": {
  393. "collapsed": false
  394. },
  395. "outputs": [],
  396. "source": [
  397. "def gen_features(us_id, test = False):\n",
  398. " now_ev = user_event_pd_frame[us_id]\n",
  399. " \n",
  400. " now_x = []\n",
  401. " \n",
  402. " for i in now_ev.describe().values:\n",
  403. " now_x.extend(i)\n",
  404. " \n",
  405. " tmp = []\n",
  406. " for i in now_ev.step_id:\n",
  407. " tmp.append(cool_feature[i])\n",
  408. " \n",
  409. " now_x.append(max(tmp))\n",
  410. " now_x.append(FuckUpTimePart[us_id])\n",
  411. " now_x.append(user_passed_sum[us_id])\n",
  412. " now_x.append(user_viewed_sum[us_id])\n",
  413. " now_x.append(costed_passed_user_actions[us_id])\n",
  414. " now_x.append(now_ev.step_cost.sum())\n",
  415. " \n",
  416. " tmp = user_actions_count[us_id]\n",
  417. " now_x.extend([tmp[i] for i in tmp])\n",
  418. " \n",
  419. " for i in now_ev.describe().values:\n",
  420. " now_x.extend(i)\n",
  421. " \n",
  422. " now_x.append(now_ev.step_type.max())\n",
  423. " now_x.append(now_ev.step_type.min())\n",
  424. " now_x.append(now_ev.step_cost.sum())\n",
  425. " now_x.append(now_ev.step_cost.mean())\n",
  426. " \n",
  427. " \n",
  428. " return (us_id, now_x)"
  429. ]
  430. },
  431. {
  432. "cell_type": "code",
  433. "execution_count": 708,
  434. "metadata": {
  435. "collapsed": false
  436. },
  437. "outputs": [
  438. {
  439. "name": "stdout",
  440. "output_type": "stream",
  441. "text": [
  442. "\n"
  443. ]
  444. }
  445. ],
  446. "source": [
  447. "X = []\n",
  448. "Y = []\n",
  449. "for us_id in tqdm_notebook(set(events.user_id.tolist())):\n",
  450. " f = gen_features(us_id)\n",
  451. " X.append(f[1])\n",
  452. " Y.append(targets[targets.user_id == f[0]].passed.values[0])"
  453. ]
  454. },
  455. {
  456. "cell_type": "code",
  457. "execution_count": 709,
  458. "metadata": {
  459. "collapsed": false
  460. },
  461. "outputs": [
  462. {
  463. "name": "stdout",
  464. "output_type": "stream",
  465. "text": [
  466. "\n"
  467. ]
  468. }
  469. ],
  470. "source": [
  471. "X_test = []\n",
  472. "ind = []\n",
  473. "for us_id in tqdm_notebook(set(events_test.user_id.tolist())):\n",
  474. " f = gen_features(us_id,True)\n",
  475. " ind.append(f[0])\n",
  476. " X_test.append(f[1])"
  477. ]
  478. },
  479. {
  480. "cell_type": "code",
  481. "execution_count": 710,
  482. "metadata": {
  483. "collapsed": false
  484. },
  485. "outputs": [],
  486. "source": [
  487. "#\n",
  488. "# in X we have NAN features, we need to feel it, \n",
  489. "# let's do it with pandas\n",
  490. "# \n",
  491. "X = pd.DataFrame(X)"
  492. ]
  493. },
  494. {
  495. "cell_type": "code",
  496. "execution_count": 711,
  497. "metadata": {
  498. "collapsed": true
  499. },
  500. "outputs": [],
  501. "source": [
  502. "X = X.fillna(method='bfill')"
  503. ]
  504. },
  505. {
  506. "cell_type": "code",
  507. "execution_count": 712,
  508. "metadata": {
  509. "collapsed": false
  510. },
  511. "outputs": [],
  512. "source": [
  513. "X = X.fillna(0)"
  514. ]
  515. },
  516. {
  517. "cell_type": "code",
  518. "execution_count": 713,
  519. "metadata": {
  520. "collapsed": true
  521. },
  522. "outputs": [],
  523. "source": [
  524. "X_test = pd.DataFrame(X_test).fillna(method='bfill')\n",
  525. "X_test = X_test.fillna(0)"
  526. ]
  527. },
  528. {
  529. "cell_type": "code",
  530. "execution_count": 714,
  531. "metadata": {
  532. "collapsed": false
  533. },
  534. "outputs": [],
  535. "source": [
  536. "X = X.values\n",
  537. "X = X.astype(np.float32)"
  538. ]
  539. },
  540. {
  541. "cell_type": "code",
  542. "execution_count": 715,
  543. "metadata": {
  544. "collapsed": false
  545. },
  546. "outputs": [],
  547. "source": [
  548. "X_test = X_test.as_matrix()\n",
  549. "\n",
  550. "Y = np.ravel(Y)\n",
  551. "Y = Y.astype(np.int32)\n",
  552. "\n",
  553. "\n",
  554. "X_test = X_test.astype(np.float32)"
  555. ]
  556. },
  557. {
  558. "cell_type": "code",
  559. "execution_count": 716,
  560. "metadata": {
  561. "collapsed": true
  562. },
  563. "outputs": [],
  564. "source": [
  565. "def feature_normalize(feature_index, features_array):\n",
  566. " f3_max = max([a[feature_index] for a in features_array])\n",
  567. " f3_min = min([a[feature_index] for a in features_array])\n",
  568. "\n",
  569. " for row in features_array:\n",
  570. " if (f3_max - f3_min) != 0:\n",
  571. " row[feature_index] = (row[feature_index] - f3_min) / (f3_max - f3_min)\n",
  572. " else:\n",
  573. " pass\n",
  574. " \n",
  575. " return features_array"
  576. ]
  577. },
  578. {
  579. "cell_type": "code",
  580. "execution_count": 717,
  581. "metadata": {
  582. "collapsed": false
  583. },
  584. "outputs": [],
  585. "source": [
  586. "for i in range(len(X[0])):\n",
  587. " X = feature_normalize(i, X)"
  588. ]
  589. },
  590. {
  591. "cell_type": "code",
  592. "execution_count": 718,
  593. "metadata": {
  594. "collapsed": false
  595. },
  596. "outputs": [],
  597. "source": [
  598. "for i in range(len(X_test[0])):\n",
  599. " X_test = feature_normalize(i, X_test)"
  600. ]
  601. },
  602. {
  603. "cell_type": "code",
  604. "execution_count": 719,
  605. "metadata": {
  606. "button": false,
  607. "collapsed": false,
  608. "deletable": true,
  609. "new_sheet": false,
  610. "run_control": {
  611. "read_only": false
  612. }
  613. },
  614. "outputs": [],
  615. "source": [
  616. "from sklearn.cross_validation import train_test_split\n",
  617. "from sklearn.metrics import f1_score\n",
  618. "\n",
  619. "Xtr,Xval,Ytr,Yval = train_test_split(X,Y,test_size=0.1,random_state=128)"
  620. ]
  621. },
  622. {
  623. "cell_type": "code",
  624. "execution_count": 720,
  625. "metadata": {
  626. "collapsed": true
  627. },
  628. "outputs": [],
  629. "source": [
  630. "import theano\n",
  631. "import theano.tensor as T\n",
  632. "import lasagne"
  633. ]
  634. },
  635. {
  636. "cell_type": "code",
  637. "execution_count": 721,
  638. "metadata": {
  639. "collapsed": true
  640. },
  641. "outputs": [],
  642. "source": [
  643. "input_X = T.vector(\"X\")\n",
  644. "target_y = T.scalar(dtype='int32')"
  645. ]
  646. },
  647. {
  648. "cell_type": "code",
  649. "execution_count": 722,
  650. "metadata": {
  651. "collapsed": true
  652. },
  653. "outputs": [],
  654. "source": [
  655. "from lasagne.updates import sgd\n",
  656. "from lasagne.nonlinearities import leaky_rectify, softmax, tanh, elu\n",
  657. "from lasagne.layers import InputLayer, DenseLayer\n",
  658. "\n",
  659. "l_in = InputLayer(shape=(None,len(X[0])))\n",
  660. "hl = DenseLayer(incoming=l_in, num_units=100) \n",
  661. "bb = lasagne.layers.batch_norm(hl)\n",
  662. "hl = DenseLayer(incoming=hl, num_units=60)\n",
  663. "bb = lasagne.layers.batch_norm(hl)\n",
  664. "hl = DenseLayer(incoming=bb, num_units=20)\n",
  665. "bb = lasagne.layers.batch_norm(hl)\n",
  666. "l_out = DenseLayer(incoming=bb, num_units=2, nonlinearity=softmax, name='outputlayer') "
  667. ]
  668. },
  669. {
  670. "cell_type": "code",
  671. "execution_count": 723,
  672. "metadata": {
  673. "collapsed": true
  674. },
  675. "outputs": [],
  676. "source": [
  677. "sym_x = T.matrix('X')\n",
  678. "sym_t = T.ivector('target')\n",
  679. "\n",
  680. "train_out = lasagne.layers.get_output(l_out, {l_in: sym_x}, deterministic=False)\n",
  681. "eval_out = lasagne.layers.get_output(l_out, {l_in: sym_x}, deterministic=True)\n",
  682. "\n",
  683. "all_params = lasagne.layers.get_all_params(l_out, trainable=True)"
  684. ]
  685. },
  686. {
  687. "cell_type": "code",
  688. "execution_count": 724,
  689. "metadata": {
  690. "collapsed": true
  691. },
  692. "outputs": [],
  693. "source": [
  694. "cost_train = T.nnet.categorical_crossentropy(train_out, sym_t).mean()\n",
  695. "cost_eval = T.nnet.categorical_crossentropy(eval_out, sym_t).mean()"
  696. ]
  697. },
  698. {
  699. "cell_type": "code",
  700. "execution_count": 725,
  701. "metadata": {
  702. "collapsed": true
  703. },
  704. "outputs": [],
  705. "source": [
  706. "all_grads = T.grad(cost_train, all_params)\n",
  707. "updates = lasagne.updates.adagrad(all_grads, all_params, learning_rate=0.1)"
  708. ]
  709. },
  710. {
  711. "cell_type": "code",
  712. "execution_count": 726,
  713. "metadata": {
  714. "collapsed": true
  715. },
  716. "outputs": [],
  717. "source": [
  718. "f_eval = theano.function(inputs=[sym_x, sym_t],\n",
  719. " outputs=[cost_eval, eval_out])\n",
  720. "\n",
  721. "f_train = theano.function(inputs=[sym_x, sym_t],\n",
  722. " outputs=[cost_train, eval_out],\n",
  723. " updates=updates)\n",
  724. "\n",
  725. "f_pred = theano.function(inputs=[sym_x],\n",
  726. " outputs=eval_out)"
  727. ]
  728. },
  729. {
  730. "cell_type": "code",
  731. "execution_count": 727,
  732. "metadata": {
  733. "collapsed": true
  734. },
  735. "outputs": [],
  736. "source": [
  737. "def iterate_minibatches(inputs, targets, batchsize, inputs_new=None):\n",
  738. " assert len(inputs) == len(targets)\n",
  739. " if inputs_new != None:\n",
  740. " assert len(inputs_new) == len(inputs_new)\n",
  741. " indices = np.arange(len(inputs))\n",
  742. " np.random.shuffle(indices)\n",
  743. " for start_idx in range(0, len(inputs) - batchsize + 1, batchsize):\n",
  744. " excerpt = indices[start_idx:start_idx + batchsize]\n",
  745. " if inputs_new != None:\n",
  746. " yield inputs[excerpt], inputs_new[excerpt], targets[excerpt]\n",
  747. " else:\n",
  748. " yield inputs[excerpt], targets[excerpt]"
  749. ]
  750. },
  751. {
  752. "cell_type": "code",
  753. "execution_count": 739,
  754. "metadata": {
  755. "collapsed": false
  756. },
  757. "outputs": [
  758. {
  759. "ename": "KeyboardInterrupt",
  760. "evalue": "",
  761. "output_type": "error",
  762. "traceback": [
  763. "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
  764. "\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)",
  765. "\u001b[0;32m<ipython-input-739-4cf751260849>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mepoch\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mrange\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m1000\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mx\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0my\u001b[0m \u001b[0;32min\u001b[0m \u001b[0miterate_minibatches\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0marray\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0marray\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mY\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mdtype\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mint32\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m10\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 6\u001b[0;31m \u001b[0mf_train\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlist\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mlist\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0my\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 7\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mold_score\u001b[0m \u001b[0;34m<\u001b[0m \u001b[0mnow_score\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 8\u001b[0m \u001b[0mold_score\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnow_score\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
  766. "\u001b[0;32m/home/a.tvorozhkov/anaconda3/lib/python3.5/site-packages/theano/compile/function_module.py\u001b[0m in \u001b[0;36m__call__\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 871\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 872\u001b[0m \u001b[0moutputs\u001b[0m \u001b[0;34m=\u001b[0m\u001b[0;31m\\\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 873\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfn\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0moutput_subset\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mNone\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;31m\\\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 874\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfn\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0moutput_subset\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0moutput_subset\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 875\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mException\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
  767. "\u001b[0;31mKeyboardInterrupt\u001b[0m: "
  768. ]
  769. }
  770. ],
  771. "source": [
  772. "old_score = 0\n",
  773. "answer = []\n",
  774. "predicted = []\n",
  775. "for epoch in range(1000):\n",
  776. " for x,y in iterate_minibatches(np.array(X),np.array(Y,dtype=np.int32), 10):\n",
  777. " f_train(list(x),list(y))\n",
  778. " if old_score < now_score:\n",
  779. " old_score = now_score\n",
  780. " answer = [i.argmax() for i in f_pred(X_test)]\n",
  781. " predicted = f_pred(X_test)"
  782. ]
  783. },
  784. {
  785. "cell_type": "code",
  786. "execution_count": 732,
  787. "metadata": {
  788. "collapsed": false
  789. },
  790. "outputs": [],
  791. "source": [
  792. "a = pd.DataFrame()"
  793. ]
  794. },
  795. {
  796. "cell_type": "code",
  797. "execution_count": 733,
  798. "metadata": {
  799. "collapsed": false
  800. },
  801. "outputs": [],
  802. "source": [
  803. "a['passed'] = answer"
  804. ]
  805. },
  806. {
  807. "cell_type": "code",
  808. "execution_count": 734,
  809. "metadata": {
  810. "collapsed": true
  811. },
  812. "outputs": [],
  813. "source": [
  814. "a['user_id'] = ind"
  815. ]
  816. },
  817. {
  818. "cell_type": "code",
  819. "execution_count": 735,
  820. "metadata": {
  821. "collapsed": true
  822. },
  823. "outputs": [],
  824. "source": [
  825. "a = a.set_index('user_id')"
  826. ]
  827. },
  828. {
  829. "cell_type": "code",
  830. "execution_count": 736,
  831. "metadata": {
  832. "collapsed": false
  833. },
  834. "outputs": [
  835. {
  836. "data": {
  837. "text/html": [
  838. "<div>\n",
  839. "<table border=\"1\" class=\"dataframe\">\n",
  840. " <thead>\n",
  841. " <tr style=\"text-align: right;\">\n",
  842. " <th></th>\n",
  843. " <th>passed</th>\n",
  844. " </tr>\n",
  845. " <tr>\n",
  846. " <th>user_id</th>\n",
  847. " <th></th>\n",
  848. " </tr>\n",
  849. " </thead>\n",
  850. " <tbody>\n",
  851. " <tr>\n",
  852. " <th>8193</th>\n",
  853. " <td>0</td>\n",
  854. " </tr>\n",
  855. " <tr>\n",
  856. " <th>16387</th>\n",
  857. " <td>0</td>\n",
  858. " </tr>\n",
  859. " <tr>\n",
  860. " <th>8196</th>\n",
  861. " <td>0</td>\n",
  862. " </tr>\n",
  863. " <tr>\n",
  864. " <th>5</th>\n",
  865. " <td>0</td>\n",
  866. " </tr>\n",
  867. " <tr>\n",
  868. " <th>9</th>\n",
  869. " <td>0</td>\n",
  870. " </tr>\n",
  871. " </tbody>\n",
  872. "</table>\n",
  873. "</div>"
  874. ],
  875. "text/plain": [
  876. " passed\n",
  877. "user_id \n",
  878. "8193 0\n",
  879. "16387 0\n",
  880. "8196 0\n",
  881. "5 0\n",
  882. "9 0"
  883. ]
  884. },
  885. "execution_count": 736,
  886. "metadata": {},
  887. "output_type": "execute_result"
  888. }
  889. ],
  890. "source": [
  891. "a.head()"
  892. ]
  893. },
  894. {
  895. "cell_type": "code",
  896. "execution_count": 738,
  897. "metadata": {
  898. "collapsed": false
  899. },
  900. "outputs": [],
  901. "source": [
  902. "a.to_csv('red.csv')"
  903. ]
  904. }
  905. ],
  906. "metadata": {
  907. "anaconda-cloud": {},
  908. "kernelspec": {
  909. "display_name": "Python [default]",
  910. "language": "python",
  911. "name": "python3"
  912. },
  913. "language_info": {
  914. "codemirror_mode": {
  915. "name": "ipython",
  916. "version": 3
  917. },
  918. "file_extension": ".py",
  919. "mimetype": "text/x-python",
  920. "name": "python",
  921. "nbconvert_exporter": "python",
  922. "pygments_lexer": "ipython3",
  923. "version": "3.5.2"
  924. },
  925. "widgets": {
  926. "state": {
  927. "00cc5f7a46ca4be0bba00e6ceb579bb5": {
  928. "views": [
  929. {
  930. "cell_index": 8
  931. }
  932. ]
  933. },
  934. "0c40a99c801d49128662aad82934a1aa": {
  935. "views": [
  936. {
  937. "cell_index": 8
  938. }
  939. ]
  940. },
  941. "1c8753f3411d463580879b08e2ddedb2": {
  942. "views": [
  943. {
  944. "cell_index": 6
  945. }
  946. ]
  947. },
  948. "2924a75eb0994a4ea19e4a69da2c9369": {
  949. "views": [
  950. {
  951. "cell_index": 9
  952. }
  953. ]
  954. },
  955. "29f9da92b18a453f933638b9c2405c2d": {
  956. "views": [
  957. {
  958. "cell_index": 20
  959. }
  960. ]
  961. },
  962. "400480f182d74426902a7a22d237df49": {
  963. "views": [
  964. {
  965. "cell_index": 8
  966. }
  967. ]
  968. },
  969. "5042d02270ee4d70a11abc437292a005": {
  970. "views": [
  971. {
  972. "cell_index": 10
  973. }
  974. ]
  975. },
  976. "9b2eaa75f9cb4ed69f45edcc18a1bcfa": {
  977. "views": [
  978. {
  979. "cell_index": 8
  980. }
  981. ]
  982. },
  983. "ad42efd44c2c4869b5165cb9b28fed70": {
  984. "views": [
  985. {
  986. "cell_index": 8
  987. }
  988. ]
  989. },
  990. "c5ab72890efb4c47b71cfe4efffc746f": {
  991. "views": [
  992. {
  993. "cell_index": 7
  994. }
  995. ]
  996. },
  997. "cb56ba5222384625a58b9fcd5a4edfd6": {
  998. "views": [
  999. {
  1000. "cell_index": 21
  1001. }
  1002. ]
  1003. },
  1004. "dff634abe09c40bb8e328e7bf43c47df": {
  1005. "views": [
  1006. {
  1007. "cell_index": 6
  1008. }
  1009. ]
  1010. },
  1011. "e53fe8aabd744a11b7d3ce29f3d89cd8": {
  1012. "views": [
  1013. {
  1014. "cell_index": 14
  1015. }
  1016. ]
  1017. },
  1018. "f199d361089443e0beec5c02c35f5394": {
  1019. "views": [
  1020. {
  1021. "cell_index": 8
  1022. }
  1023. ]
  1024. },
  1025. "f59ec1ad186d410aadf1e71bf09e6804": {
  1026. "views": [
  1027. {
  1028. "cell_index": 13
  1029. }
  1030. ]
  1031. },
  1032. "f715a71e5cad4bf89dae0a3b1f0fe3d5": {
  1033. "views": [
  1034. {
  1035. "cell_index": 8
  1036. }
  1037. ]
  1038. },
  1039. "f73d00d1f55f48c8bd04e41b2f4100ac": {
  1040. "views": [
  1041. {
  1042. "cell_index": 14
  1043. }
  1044. ]
  1045. },
  1046. "fde5d46ad89d410585f882b641c279b9": {
  1047. "views": [
  1048. {
  1049. "cell_index": 13
  1050. }
  1051. ]
  1052. }
  1053. },
  1054. "version": "1.2.0"
  1055. }
  1056. },
  1057. "nbformat": 4,
  1058. "nbformat_minor": 1
  1059. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement