Advertisement
Guest User

Untitled

a guest
Oct 14th, 2019
130
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 6.11 KB | None | 0 0
  1. {
  2. "cells": [
  3. {
  4. "cell_type": "code",
  5. "execution_count": 22,
  6. "metadata": {},
  7. "outputs": [],
  8. "source": [
  9. "import time\n",
  10. "import json\n",
  11. "\n",
  12. "import numpy as np\n",
  13. "import faiss\n",
  14. "\n",
  15. "import sys\n",
  16. "\n",
  17. "def ivecs_read(fname):\n",
  18. " a = np.fromfile(fname, dtype='int32')\n",
  19. " d = a[0]\n",
  20. " return a.reshape(-1, d + 1)[:, 1:].copy()\n",
  21. "\n",
  22. "def fvecs_read(fname):\n",
  23. " return ivecs_read(fname).view('float32')\n",
  24. "\n",
  25. "simdir = '/mnt/vol/gfsai-east/ai-group/datasets/simsearch/'\n",
  26. "\n",
  27. "def load_sift1M(root_dir=simdir):\n",
  28. " print(\"Loading sift1M...\")\n",
  29. " xt = fvecs_read(\"%s/sift1M/sift_learn.fvecs\" %(root_dir))\n",
  30. " xb = fvecs_read(\"%s/sift1M/sift_base.fvecs\" % (root_dir))\n",
  31. " xq = fvecs_read(\"%s/sift1M/sift_query.fvecs\" % (root_dir))\n",
  32. " gt = ivecs_read(\"%s/sift1M/sift_groundtruth.ivecs\" %(root_dir))\n",
  33. "\n",
  34. " return xb, xq, xt, gt\n",
  35. "\n",
  36. "\n",
  37. "def load_random():\n",
  38. " print(\"Loading random...\")\n",
  39. " np.random.seed(1234) # make reproducible\n",
  40. " xb = np.random.random((1000 * 1000, 128)).astype('float32')\n",
  41. " xb[:, 0] += np.arange(1000 * 1000) / 1000.\n",
  42. " xq = xb[:100]\n",
  43. " xt = xb\n",
  44. " gt = xb\n",
  45. " return xb, xq, xt, gt\n",
  46. "\n",
  47. "def test_with(index, xb, xq):\n",
  48. " index.train(xb)\n",
  49. "\n",
  50. " index.add(xb)\n",
  51. " \n",
  52. " stats = faiss.cvar.indexIVF_stats\n",
  53. " stats.reset() \n",
  54. " total_times = []\n",
  55. " for j in range(100):\n",
  56. " t0 = time.time()\n",
  57. " D, I = index.search(xq[:10], 10)\n",
  58. " t1 = time.time()\n",
  59. " total_times.append((t1 - t0) * 1000.0)\n",
  60. " print(np.median(total_times))\n",
  61. "\n",
  62. "def test_with1(index, xb, xq):\n",
  63. "\n",
  64. " stats = faiss.cvar.indexIVF_stats\n",
  65. " stats.reset() \n",
  66. " total_times = []\n",
  67. " for j in range(100):\n",
  68. " t0 = time.time()\n",
  69. " D, I = index.search(xq[:10], 10)\n",
  70. " t1 = time.time()\n",
  71. " total_times.append((t1 - t0) * 1000.0)\n",
  72. " print(np.median(total_times), stats.ndis)\n"
  73. ]
  74. },
  75. {
  76. "cell_type": "code",
  77. "execution_count": 10,
  78. "metadata": {},
  79. "outputs": [
  80. {
  81. "name": "stdout",
  82. "output_type": "stream",
  83. "text": [
  84. "Loading sift1M...\n",
  85. "(10000, 512) (1000000, 512)\n",
  86. "47.14250564575195 137396900\n"
  87. ]
  88. }
  89. ],
  90. "source": [
  91. "\n",
  92. "d = 512\n",
  93. "\n",
  94. "#sift1M\n",
  95. "xb, xq, xt, gt = load_sift1M()\n",
  96. "xb = np.hstack((xb, np.zeros((xb.shape[0], d - xb.shape[1]), dtype = xb.dtype)))\n",
  97. "xq = np.hstack((xq, np.zeros((xq.shape[0], d - xq.shape[1]), dtype = xq.dtype)))\n",
  98. "\n",
  99. "print(xq.shape, xb.shape)\n",
  100. "\n",
  101. "faiss.omp_set_num_threads(16)\n",
  102. "index = faiss.IndexIVFFlat(faiss.IndexFlatL2(d), d, 128)\n",
  103. "index.nprobe = 16\n",
  104. "test_with(index, xb, xq)"
  105. ]
  106. },
  107. {
  108. "cell_type": "code",
  109. "execution_count": 12,
  110. "metadata": {},
  111. "outputs": [
  112. {
  113. "name": "stdout",
  114. "output_type": "stream",
  115. "text": [
  116. "Loading random...\n",
  117. "(100, 512) (1000000, 512)\n",
  118. "29.09719944000244 88811000\n"
  119. ]
  120. }
  121. ],
  122. "source": [
  123. "#random\n",
  124. "xb, xq, xt, gt = load_random()\n",
  125. "xb = np.hstack((xb, np.zeros((xb.shape[0], d - xb.shape[1]), dtype = xb.dtype)))\n",
  126. "xq = np.hstack((xq, np.zeros((xq.shape[0], d - xq.shape[1]), dtype = xq.dtype)))\n",
  127. "\n",
  128. "print(xq.shape, xb.shape)\n",
  129. "\n",
  130. "faiss.omp_set_num_threads(16)\n",
  131. "index2 = faiss.IndexIVFFlat(faiss.IndexFlatL2(d), d, 128)\n",
  132. "index2.nprobe = 16\n",
  133. "test_with(index2, xb, xq)"
  134. ]
  135. },
  136. {
  137. "cell_type": "code",
  138. "execution_count": 18,
  139. "metadata": {},
  140. "outputs": [],
  141. "source": [
  142. "D, I = index2.quantizer.search(xq[:10], 16)"
  143. ]
  144. },
  145. {
  146. "cell_type": "code",
  147. "execution_count": 25,
  148. "metadata": {},
  149. "outputs": [
  150. {
  151. "name": "stdout",
  152. "output_type": "stream",
  153. "text": [
  154. "57.5098991394043 151247400\n"
  155. ]
  156. }
  157. ],
  158. "source": [
  159. "test_with1(index2, xb, xb[np.random.choice(1000000, size=100)])"
  160. ]
  161. },
  162. {
  163. "cell_type": "code",
  164. "execution_count": 30,
  165. "metadata": {},
  166. "outputs": [
  167. {
  168. "data": {
  169. "text/plain": [
  170. "array([5658, 5176, 4474, 4336, 4191, 4410, 4866, 5219, 5097, 5018, 5732,\n",
  171. " 6245, 6441, 6860, 7450, 7638])"
  172. ]
  173. },
  174. "execution_count": 30,
  175. "metadata": {
  176. "bento_obj_id": "139687776509584"
  177. },
  178. "output_type": "execute_result"
  179. }
  180. ],
  181. "source": [
  182. "# get the list for the clusters \n",
  183. "list_sizes = np.array([index2.invlists.list_size(i) for i in range(128)])\n",
  184. "list_sizes[I[0]] "
  185. ]
  186. },
  187. {
  188. "cell_type": "code",
  189. "execution_count": 29,
  190. "metadata": {},
  191. "outputs": [
  192. {
  193. "data": {
  194. "text/plain": [
  195. "7812.5"
  196. ]
  197. },
  198. "execution_count": 29,
  199. "metadata": {
  200. "bento_obj_id": "139693108448184"
  201. },
  202. "output_type": "execute_result"
  203. }
  204. ],
  205. "source": [
  206. "list_sizes.mean()"
  207. ]
  208. },
  209. {
  210. "cell_type": "code",
  211. "execution_count": null,
  212. "metadata": {
  213. "collapsed": true
  214. },
  215. "outputs": [],
  216. "source": []
  217. }
  218. ],
  219. "metadata": {
  220. "bento_stylesheets": {
  221. "bento/extensions/flow/main.css": true,
  222. "bento/extensions/kernel_selector/main.css": true,
  223. "bento/extensions/kernel_ui/main.css": true,
  224. "bento/extensions/new_kernel/main.css": true,
  225. "bento/extensions/system_usage/main.css": true,
  226. "bento/extensions/theme/main.css": true
  227. },
  228. "kernelspec": {
  229. "display_name": "faiss",
  230. "language": "python",
  231. "name": "bento_kernel_faiss"
  232. },
  233. "language_info": {
  234. "codemirror_mode": {
  235. "name": "ipython",
  236. "version": 3
  237. },
  238. "file_extension": ".py",
  239. "mimetype": "text/x-python",
  240. "name": "python",
  241. "nbconvert_exporter": "python",
  242. "pygments_lexer": "ipython3",
  243. "version": "3.6.3rc1+"
  244. }
  245. },
  246. "nbformat": 4,
  247. "nbformat_minor": 2
  248. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement