SHARE
TWEET

Untitled

a guest Oct 14th, 2019 88 Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. {
  2.  "cells": [
  3.   {
  4.    "cell_type": "code",
  5.    "execution_count": 22,
  6.    "metadata": {},
  7.    "outputs": [],
  8.    "source": [
  9.     "import time\n",
  10.     "import json\n",
  11.     "\n",
  12.     "import numpy as np\n",
  13.     "import faiss\n",
  14.     "\n",
  15.     "import sys\n",
  16.     "\n",
  17.     "def ivecs_read(fname):\n",
  18.     "    a = np.fromfile(fname, dtype='int32')\n",
  19.     "    d = a[0]\n",
  20.     "    return a.reshape(-1, d + 1)[:, 1:].copy()\n",
  21.     "\n",
  22.     "def fvecs_read(fname):\n",
  23.     "    return ivecs_read(fname).view('float32')\n",
  24.     "\n",
  25.     "simdir = '/mnt/vol/gfsai-east/ai-group/datasets/simsearch/'\n",
  26.     "\n",
  27.     "def load_sift1M(root_dir=simdir):\n",
  28.     "    print(\"Loading sift1M...\")\n",
  29.     "    xt = fvecs_read(\"%s/sift1M/sift_learn.fvecs\" %(root_dir))\n",
  30.     "    xb = fvecs_read(\"%s/sift1M/sift_base.fvecs\" % (root_dir))\n",
  31.     "    xq = fvecs_read(\"%s/sift1M/sift_query.fvecs\" % (root_dir))\n",
  32.     "    gt = ivecs_read(\"%s/sift1M/sift_groundtruth.ivecs\" %(root_dir))\n",
  33.     "\n",
  34.     "    return xb, xq, xt, gt\n",
  35.     "\n",
  36.     "\n",
  37.     "def load_random():\n",
  38.     "    print(\"Loading random...\")\n",
  39.     "    np.random.seed(1234)             # make reproducible\n",
  40.     "    xb = np.random.random((1000 * 1000, 128)).astype('float32')\n",
  41.     "    xb[:, 0] += np.arange(1000 * 1000) / 1000.\n",
  42.     "    xq = xb[:100]\n",
  43.     "    xt = xb\n",
  44.     "    gt = xb\n",
  45.     "    return xb, xq, xt, gt\n",
  46.     "\n",
  47.     "def test_with(index, xb, xq):\n",
  48.     "    index.train(xb)\n",
  49.     "\n",
  50.     "    index.add(xb)\n",
  51.     "    \n",
  52.     "    stats = faiss.cvar.indexIVF_stats\n",
  53.     "    stats.reset()    \n",
  54.     "    total_times = []\n",
  55.     "    for j in range(100):\n",
  56.     "        t0 = time.time()\n",
  57.     "        D, I = index.search(xq[:10], 10)\n",
  58.     "        t1 = time.time()\n",
  59.     "        total_times.append((t1 - t0) * 1000.0)\n",
  60.     "    print(np.median(total_times))\n",
  61.     "\n",
  62.     "def test_with1(index, xb, xq):\n",
  63.     "\n",
  64.     "    stats = faiss.cvar.indexIVF_stats\n",
  65.     "    stats.reset()    \n",
  66.     "    total_times = []\n",
  67.     "    for j in range(100):\n",
  68.     "        t0 = time.time()\n",
  69.     "        D, I = index.search(xq[:10], 10)\n",
  70.     "        t1 = time.time()\n",
  71.     "        total_times.append((t1 - t0) * 1000.0)\n",
  72.     "    print(np.median(total_times), stats.ndis)\n"
  73.    ]
  74.   },
  75.   {
  76.    "cell_type": "code",
  77.    "execution_count": 10,
  78.    "metadata": {},
  79.    "outputs": [
  80.     {
  81.      "name": "stdout",
  82.      "output_type": "stream",
  83.      "text": [
  84.       "Loading sift1M...\n",
  85.       "(10000, 512) (1000000, 512)\n",
  86.       "47.14250564575195 137396900\n"
  87.      ]
  88.     }
  89.    ],
  90.    "source": [
  91.     "\n",
  92.     "d = 512\n",
  93.     "\n",
  94.     "#sift1M\n",
  95.     "xb, xq, xt, gt = load_sift1M()\n",
  96.     "xb = np.hstack((xb, np.zeros((xb.shape[0], d - xb.shape[1]), dtype = xb.dtype)))\n",
  97.     "xq = np.hstack((xq, np.zeros((xq.shape[0], d - xq.shape[1]), dtype = xq.dtype)))\n",
  98.     "\n",
  99.     "print(xq.shape, xb.shape)\n",
  100.     "\n",
  101.     "faiss.omp_set_num_threads(16)\n",
  102.     "index = faiss.IndexIVFFlat(faiss.IndexFlatL2(d), d, 128)\n",
  103.     "index.nprobe = 16\n",
  104.     "test_with(index, xb, xq)"
  105.    ]
  106.   },
  107.   {
  108.    "cell_type": "code",
  109.    "execution_count": 12,
  110.    "metadata": {},
  111.    "outputs": [
  112.     {
  113.      "name": "stdout",
  114.      "output_type": "stream",
  115.      "text": [
  116.       "Loading random...\n",
  117.       "(100, 512) (1000000, 512)\n",
  118.       "29.09719944000244 88811000\n"
  119.      ]
  120.     }
  121.    ],
  122.    "source": [
  123.     "#random\n",
  124.     "xb, xq, xt, gt = load_random()\n",
  125.     "xb = np.hstack((xb, np.zeros((xb.shape[0], d - xb.shape[1]), dtype = xb.dtype)))\n",
  126.     "xq = np.hstack((xq, np.zeros((xq.shape[0], d - xq.shape[1]), dtype = xq.dtype)))\n",
  127.     "\n",
  128.     "print(xq.shape, xb.shape)\n",
  129.     "\n",
  130.     "faiss.omp_set_num_threads(16)\n",
  131.     "index2 = faiss.IndexIVFFlat(faiss.IndexFlatL2(d), d, 128)\n",
  132.     "index2.nprobe = 16\n",
  133.     "test_with(index2, xb, xq)"
  134.    ]
  135.   },
  136.   {
  137.    "cell_type": "code",
  138.    "execution_count": 18,
  139.    "metadata": {},
  140.    "outputs": [],
  141.    "source": [
  142.     "D, I = index2.quantizer.search(xq[:10], 16)"
  143.    ]
  144.   },
  145.   {
  146.    "cell_type": "code",
  147.    "execution_count": 25,
  148.    "metadata": {},
  149.    "outputs": [
  150.     {
  151.      "name": "stdout",
  152.      "output_type": "stream",
  153.      "text": [
  154.       "57.5098991394043 151247400\n"
  155.      ]
  156.     }
  157.    ],
  158.    "source": [
  159.     "test_with1(index2, xb, xb[np.random.choice(1000000, size=100)])"
  160.    ]
  161.   },
  162.   {
  163.    "cell_type": "code",
  164.    "execution_count": 30,
  165.    "metadata": {},
  166.    "outputs": [
  167.     {
  168.      "data": {
  169.       "text/plain": [
  170.        "array([5658, 5176, 4474, 4336, 4191, 4410, 4866, 5219, 5097, 5018, 5732,\n",
  171.        "       6245, 6441, 6860, 7450, 7638])"
  172.       ]
  173.      },
  174.      "execution_count": 30,
  175.      "metadata": {
  176.       "bento_obj_id": "139687776509584"
  177.      },
  178.      "output_type": "execute_result"
  179.     }
  180.    ],
  181.    "source": [
  182.     "# get the list for the clusters \n",
  183.     "list_sizes = np.array([index2.invlists.list_size(i) for i in range(128)])\n",
  184.     "list_sizes[I[0]] "
  185.    ]
  186.   },
  187.   {
  188.    "cell_type": "code",
  189.    "execution_count": 29,
  190.    "metadata": {},
  191.    "outputs": [
  192.     {
  193.      "data": {
  194.       "text/plain": [
  195.        "7812.5"
  196.       ]
  197.      },
  198.      "execution_count": 29,
  199.      "metadata": {
  200.       "bento_obj_id": "139693108448184"
  201.      },
  202.      "output_type": "execute_result"
  203.     }
  204.    ],
  205.    "source": [
  206.     "list_sizes.mean()"
  207.    ]
  208.   },
  209.   {
  210.    "cell_type": "code",
  211.    "execution_count": null,
  212.    "metadata": {
  213.     "collapsed": true
  214.    },
  215.    "outputs": [],
  216.    "source": []
  217.   }
  218.  ],
  219.  "metadata": {
  220.   "bento_stylesheets": {
  221.    "bento/extensions/flow/main.css": true,
  222.    "bento/extensions/kernel_selector/main.css": true,
  223.    "bento/extensions/kernel_ui/main.css": true,
  224.    "bento/extensions/new_kernel/main.css": true,
  225.    "bento/extensions/system_usage/main.css": true,
  226.    "bento/extensions/theme/main.css": true
  227.   },
  228.   "kernelspec": {
  229.    "display_name": "faiss",
  230.    "language": "python",
  231.    "name": "bento_kernel_faiss"
  232.   },
  233.   "language_info": {
  234.    "codemirror_mode": {
  235.     "name": "ipython",
  236.     "version": 3
  237.    },
  238.    "file_extension": ".py",
  239.    "mimetype": "text/x-python",
  240.    "name": "python",
  241.    "nbconvert_exporter": "python",
  242.    "pygments_lexer": "ipython3",
  243.    "version": "3.6.3rc1+"
  244.   }
  245.  },
  246.  "nbformat": 4,
  247.  "nbformat_minor": 2
  248. }
RAW Paste Data
We use cookies for various purposes including analytics. By continuing to use Pastebin, you agree to our use of cookies as described in the Cookies Policy. OK, I Understand
Not a member of Pastebin yet?
Sign Up, it unlocks many cool features!
 
Top