Advertisement
Alexandre_lsv

Untitled

Nov 19th, 2016
1,110
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. {
  2.  "cells": [
  3.   {
  4.    "cell_type": "code",
  5.    "execution_count": 1,
  6.    "metadata": {
  7.     "collapsed": true
  8.    },
  9.    "outputs": [],
  10.    "source": [
  11.     "from string import ascii_letters\n",
  12.     "from sklearn.feature_extraction.text import TfidfVectorizer\n",
  13.     "import requests\n",
  14.     "import re\n",
  15.     "from sklearn.cluster import DBSCAN\n",
  16.     "from tkinter import *"
  17.    ]
  18.   },
  19.   {
  20.    "cell_type": "code",
  21.    "execution_count": 2,
  22.    "metadata": {
  23.     "collapsed": true
  24.    },
  25.    "outputs": [],
  26.    "source": [
  27.     "def parsString(s):\n",
  28.     "    resS=\"\"\n",
  29.     "    tmpS=\"\"\n",
  30.     "    s=s.lower()\n",
  31.     "    for ch in s:\n",
  32.     "        if (ch in ascii_letters):\n",
  33.     "            tmpS+=ch\n",
  34.     "        else:\n",
  35.     "            #resS+=tmpS\n",
  36.     "            #tmpS=ch\n",
  37.     "            if (tmpS!=\"\"):\n",
  38.     "                resS+=' '\n",
  39.     "                resS+=tmpS\n",
  40.     "            else:\n",
  41.     "                resS+=' '\n",
  42.     "                resS+=ch\n",
  43.     "            tmpS=\"\"\n",
  44.     "    if tmpS==\"\":\n",
  45.     "        resS+=tmpS\n",
  46.     "    #print(resS)\n",
  47.     "    return resS\n",
  48.     "    "
  49.    ]
  50.   },
  51.   {
  52.    "cell_type": "code",
  53.    "execution_count": 3,
  54.    "metadata": {
  55.     "collapsed": false
  56.    },
  57.    "outputs": [],
  58.    "source": [
  59.     "def readDataset():\n",
  60.     "    f=open('dataset', 'r')\n",
  61.     "    it=0;\n",
  62.     "    vectorsS=[]\n",
  63.     "    for i in f:\n",
  64.     "        vectorsS.append(parsString(i))\n",
  65.     "        it+=1\n",
  66.     "        #if (it>1000):\n",
  67.     "        #    break\n",
  68.     "    f.close()\n",
  69.     "    return vectorsS"
  70.    ]
  71.   },
  72.   {
  73.    "cell_type": "code",
  74.    "execution_count": 4,
  75.    "metadata": {
  76.     "collapsed": false
  77.    },
  78.    "outputs": [],
  79.    "source": [
  80.     "def makeDataset():\n",
  81.     "    f=open('dataset', 'w')\n",
  82.     "    for i in range(200):\n",
  83.     "        responce=requests.get('https://slot-ml.ptsecurity.com/api/v1/users/ad828642388ee5a23f0c90483520309204ea3d53/vectors/?random')\n",
  84.     "        vect=str(responce.text)[str(responce.text).find(\"vector\")+10:str(responce.text).find(\"\\\"id\\\":\")-4]\n",
  85.     "        #id=str(responce.text)[str(responce.text).find(\"\\\"id\\\":\")+7:-4]\n",
  86.     "        #print(rpost, mpost)\n",
  87.     "        f.write(vect+'\\n')\n",
  88.     "        #if (i%10==0):\n",
  89.     "        #    print(i)\n",
  90.     "    f.close()\n",
  91.     "\n"
  92.    ]
  93.   },
  94.   {
  95.    "cell_type": "code",
  96.    "execution_count": 5,
  97.    "metadata": {
  98.     "collapsed": true
  99.    },
  100.    "outputs": [],
  101.    "source": [
  102.     "def train(dataset):\n",
  103.     "    # Obtain some string samples.\n",
  104.     "    # Get a char-based vectorizer with (1,2) n-gram range.\n",
  105.     "    vectorizer = TfidfVectorizer(analyzer='word', ngram_range=(1, 1))\n",
  106.     "    # Vectorize the samples.\n",
  107.     "    vectors = vectorizer.fit_transform(dataset)\n",
  108.     "    #print(vectorizer.get_feature_names())\n",
  109.     "    #print(vectors)\n",
  110.     "    return vectors"
  111.    ]
  112.   },
  113.   {
  114.    "cell_type": "code",
  115.    "execution_count": 6,
  116.    "metadata": {
  117.     "collapsed": true
  118.    },
  119.    "outputs": [],
  120.    "source": [
  121.     "def clusterization(vectors):\n",
  122.     "    model = DBSCAN(eps=0.5).fit(vectors)\n",
  123.     "    return model"
  124.    ]
  125.   },
  126.   {
  127.    "cell_type": "code",
  128.    "execution_count": 7,
  129.    "metadata": {
  130.     "collapsed": false
  131.    },
  132.    "outputs": [],
  133.    "source": [
  134.     "def sendSol(vectorsString):\n",
  135.     "    #f=open('dataset', 'w')\n",
  136.     "    ok=0\n",
  137.     "    for i in range(100):\n",
  138.     "        responce=requests.get('https://slot-ml.ptsecurity.com/api/v1/users/c34f6b9b747ebe438da878a07a318892a988c06f/vectors/?random')\n",
  139.     "        vect=str(responce.text)[str(responce.text).find(\"vector\")+10:str(responce.text).find(\"\\\"id\\\":\")-4]\n",
  140.     "        id=str(responce.text)[str(responce.text).find(\"\\\"id\\\":\")+7:-4]\n",
  141.     "        #print(responce.text)\n",
  142.     "        #print(vect)\n",
  143.     "        #print(id)    \n",
  144.     "        rpost = ('https://slot-ml.ptsecurity.com/api/v1/users/c34f6b9b747ebe438da878a07a318892a988c06f/results/')\n",
  145.     "        #rpost=('5.8.180.70:80/api/v1/users/c34f6b9b747ebe438da878a07a318892a988c06f/results/')\n",
  146.     "        #mpost=('{ \\\"vector\\\": \\\"'+id+'\\\", \\\"class\\\": 1 }')\n",
  147.     "        #vectorsString[i]=parsString(vect)\n",
  148.     "        strv=parsString(vect)\n",
  149.     "        #vectorsString.append(parsString(strv))\n",
  150.     "        vectorsString[999]=strv\n",
  151.     "        vectors=train(vectorsString)\n",
  152.     "        #print(vectors)\n",
  153.     "        model = clusterization(vectors)\n",
  154.     "        cnt=0;\n",
  155.     "        clas=model.labels_[999]\n",
  156.     "        if(clas==-1):\n",
  157.     "            if (ok*2<=i):\n",
  158.     "                if (len(strv)>100):\n",
  159.     "                    clas=-1\n",
  160.     "                else:\n",
  161.     "                    clas=-2\n",
  162.     "            else:\n",
  163.     "                clas=-4\n",
  164.     "        mpost={}\n",
  165.     "        mpost[\"vector\"]=id\n",
  166.     "        mpost[\"class\"]=clas+3\n",
  167.     "        #print(clas)\n",
  168.     "        #print(rpost, mpost)\n",
  169.     "        msg=requests.post(rpost, data=mpost)\n",
  170.     "        if (str(msg)==\"<Response [200]>\"):\n",
  171.     "            ok+=1\n",
  172.     "        #if (i%10==0):\n",
  173.     "        #    print(i, ok)\n",
  174.     "    #f.close()\n",
  175.     "\n"
  176.    ]
  177.   },
  178.   {
  179.    "cell_type": "code",
  180.    "execution_count": 8,
  181.    "metadata": {
  182.     "collapsed": true
  183.    },
  184.    "outputs": [],
  185.    "source": [
  186.     "def check(strfr):\n",
  187.     "    makeDataset()\n",
  188.     "    vectorsString=readDataset()\n",
  189.     "    #f=open('dataset', 'w')\n",
  190.     "    ok=0\n",
  191.     "    vect=str(strfr)\n",
  192.     "    #print(responce.text)\n",
  193.     "    #print(vect)\n",
  194.     "    #print(id)    \n",
  195.     "    #rpost = ('https://slot-ml.ptsecurity.com/api/v1/users/c34f6b9b747ebe438da878a07a318892a988c06f/results/')\n",
  196.     "    #rpost=('5.8.180.70:80/api/v1/users/c34f6b9b747ebe438da878a07a318892a988c06f/results/')\n",
  197.     "    #mpost=('{ \\\"vector\\\": \\\"'+id+'\\\", \\\"class\\\": 1 }')\n",
  198.     "    #vectorsString[i]=parsString(vect)\n",
  199.     "    strv=parsString(vect)\n",
  200.     "    #vectorsString.append(parsString(strv))\n",
  201.     "    vectorsString[999]=strv\n",
  202.     "    vectors=train(vectorsString)\n",
  203.     "    #print(vectors)\n",
  204.     "    model = clusterization(vectors)\n",
  205.     "    cnt=0;\n",
  206.     "    clas=model.labels_[999]\n",
  207.     "    return clas\n",
  208.     "\n"
  209.    ]
  210.   },
  211.   {
  212.    "cell_type": "code",
  213.    "execution_count": 9,
  214.    "metadata": {
  215.     "collapsed": false
  216.    },
  217.    "outputs": [],
  218.    "source": [
  219.     "def action(event):\n",
  220.     "    ent.delete(\"1.0\", END)\n",
  221.     "    ent.insert(\"1.0\", check(tf.get(\"1.0\",END)))\n",
  222.     "root = Tk()\n",
  223.     "tf=Text(root, height=8, width=70)\n",
  224.     "tf.pack()\n",
  225.     "ent = Text(root,height=1,width=3)\n",
  226.     "ent.pack()\n",
  227.     "but = Button(root)\n",
  228.     "but[\"text\"]=\"Check\"\n",
  229.     "\n",
  230.     "but.bind(\"<Button-1>\", action)\n",
  231.     "but.pack()\n",
  232.     "root.mainloop()\n",
  233.     "#makeDataset()\n",
  234.     "#print(vectorsString)\n",
  235.     "#vectors=train(vectorsString)\n",
  236.     "#print(vectors)\n",
  237.     "#model = clusterization(vectors)\n",
  238.     "#sendSol(vectorsString)\n",
  239.     "\n"
  240.    ]
  241.   }
  242.  ],
  243.  "metadata": {
  244.   "kernelspec": {
  245.    "display_name": "Python 3",
  246.    "language": "python",
  247.    "name": "python3"
  248.   },
  249.   "language_info": {
  250.    "codemirror_mode": {
  251.     "name": "ipython",
  252.     "version": 3
  253.    },
  254.    "file_extension": ".py",
  255.    "mimetype": "text/x-python",
  256.    "name": "python",
  257.    "nbconvert_exporter": "python",
  258.    "pygments_lexer": "ipython3",
  259.    "version": "3.5.2"
  260.   }
  261.  },
  262.  "nbformat": 4,
  263.  "nbformat_minor": 1
  264. }
Advertisement
RAW Paste Data Copied
Advertisement