From d515b327080e7c8898e776171d7f7fc7cfef429c Mon Sep 17 00:00:00 2001
From: searchivairus <leo@boytsov.info>
Date: Tue, 6 Feb 2018 22:00:58 -0500
Subject: [PATCH] Improving Python notebooks (no ticket)

---
 .../notebooks/search_dense_nonoptim.ipynb     | 376 ++++++++++++++++++
 ...h_dense.ipynb => search_dense_optim.ipynb} |  92 ++---
 python_bindings/notebooks/search_sparse.ipynb |  86 ++--
 3 files changed, 463 insertions(+), 91 deletions(-)
 create mode 100644 python_bindings/notebooks/search_dense_nonoptim.ipynb
 rename python_bindings/notebooks/{search_dense.ipynb => search_dense_optim.ipynb} (82%)

diff --git a/python_bindings/notebooks/search_dense_nonoptim.ipynb b/python_bindings/notebooks/search_dense_nonoptim.ipynb
new file mode 100644
index 0000000..77ff7e4
--- /dev/null
+++ b/python_bindings/notebooks/search_dense_nonoptim.ipynb
@@ -0,0 +1,376 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "import numpy \n",
+    "import sys \n",
+    "import nmslib \n",
+    "import time \n",
+    "import math \n",
+    "from sklearn.neighbors import NearestNeighbors\n",
+    "from sklearn.model_selection import train_test_split"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "# Just read the data\n",
+    "all_data_matrix = numpy.loadtxt('../../sample_data/final128_10K.txt')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "# Create a held-out query data set\n",
+    "(data_matrix, query_matrix) = train_test_split(all_data_matrix, test_size = 0.1)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "# of queries 1000, # of data points 9000\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(\"# of queries %d, # of data points %d\"  % (query_matrix.shape[0], data_matrix.shape[0]) )"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Set index parameters\n",
+    "# These are the most important onese\n",
+    "M = 15\n",
+    "efC = 100\n",
+    "\n",
+    "num_threads = 4\n",
+    "index_time_params = {'M': M, 'indexThreadQty': num_threads, 'efConstruction': efC, 'post' : 0,\n",
+    "                     'skip_optimized_index' : 1 # using non-optimized index!\n",
+    "                    }"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "# Number of neighbors \n",
+    "K=100"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "# Space name should correspond to the space name \n",
+    "# used for brute-force search\n",
+    "space_name='l2'"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "9000"
+      ]
+     },
+     "execution_count": 8,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Intitialize the library, specify the space, the type of the vector and add data points \n",
+    "index = nmslib.init(method='hnsw', space=space_name, data_type=nmslib.DataType.DENSE_VECTOR) \n",
+    "index.addDataPointBatch(data_matrix) "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Index-time parameters {'M': 15, 'indexThreadQty': 4, 'efConstruction': 100, 'skip_optimized_index': 1, 'post': 0}\n",
+      "Indexing time = 0.291947\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Create an index\n",
+    "start = time.time()\n",
+    "index.createIndex(index_time_params) \n",
+    "end = time.time() \n",
+    "print('Index-time parameters', index_time_params)\n",
+    "print('Indexing time = %f' % (end-start))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Setting query-time parameters {'efSearch': 100}\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Setting query-time parameters\n",
+    "efS = 100\n",
+    "query_time_params = {'efSearch': efS}\n",
+    "print('Setting query-time parameters', query_time_params)\n",
+    "index.setQueryTimeParams(query_time_params)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "kNN time total=0.037669 (sec), per query=0.000038 (sec), per query adjusted for thread number=0.000151 (sec)\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Querying\n",
+    "query_qty = query_matrix.shape[0]\n",
+    "start = time.time() \n",
+    "nbrs = index.knnQueryBatch(query_matrix, k = K, num_threads = num_threads)\n",
+    "end = time.time() \n",
+    "print('kNN time total=%f (sec), per query=%f (sec), per query adjusted for thread number=%f (sec)' % \n",
+    "      (end-start, float(end-start)/query_qty, num_threads*float(end-start)/query_qty)) "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Computing gold-standard data\n",
+      "Brute-force preparation time 0.001139\n",
+      "brute-force kNN time total=0.319399 (sec), per query=0.000319 (sec)\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Computing gold-standard data \n",
+    "print('Computing gold-standard data')\n",
+    "\n",
+    "start = time.time()\n",
+    "sindx = NearestNeighbors(n_neighbors=K, metric='l2', algorithm='brute').fit(data_matrix)\n",
+    "end = time.time()\n",
+    "\n",
+    "print('Brute-force preparation time %f' % (end - start))\n",
+    "\n",
+    "start = time.time() \n",
+    "gs = sindx.kneighbors(query_matrix)\n",
+    "end = time.time()\n",
+    "\n",
+    "print('brute-force kNN time total=%f (sec), per query=%f (sec)' % \n",
+    "      (end-start, float(end-start)/query_qty) )"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "kNN recall 0.993040\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Finally computing recall\n",
+    "recall=0.0\n",
+    "for i in range(0, query_qty):\n",
+    "  correct_set = set(gs[1][i])\n",
+    "  ret_set = set(nbrs[i][0])\n",
+    "  recall = recall + float(len(correct_set.intersection(ret_set))) / len(correct_set)\n",
+    "recall = recall / query_qty\n",
+    "print('kNN recall %f' % recall)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "# Save a meta index\n",
+    "index.saveIndex('dense_index_nonoptim.bin')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Re-intitialize the library, specify the space, the type of the vector.\n",
+    "newIndex = nmslib.init(method='hnsw', space=space_name, data_type=nmslib.DataType.DENSE_VECTOR) "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "9000"
+      ]
+     },
+     "execution_count": 16,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# For non-optimized indices or methods different from HNSW we DO need to re-add data points\n",
+    "newIndex.addDataPointBatch(data_matrix) "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "# Re-load the index and re-run queries\n",
+    "newIndex.loadIndex('dense_index_nonoptim.bin')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Setting query-time parameters {'efSearch': 100}\n",
+      "kNN time total=0.031991 (sec), per query=0.000032 (sec), per query adjusted for thread number=0.000128 (sec)\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Setting query-time parameters and querying\n",
+    "print('Setting query-time parameters', query_time_params)\n",
+    "newIndex.setQueryTimeParams(query_time_params)\n",
+    "\n",
+    "query_qty = query_matrix.shape[0]\n",
+    "start = time.time() \n",
+    "new_nbrs = newIndex.knnQueryBatch(query_matrix, k = K, num_threads = num_threads)\n",
+    "end = time.time() \n",
+    "print('kNN time total=%f (sec), per query=%f (sec), per query adjusted for thread number=%f (sec)' % \n",
+    "      (end-start, float(end-start)/query_qty, num_threads*float(end-start)/query_qty)) "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "kNN recall 0.993040\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Finally computing recall for the new result set\n",
+    "recall=0.0\n",
+    "for i in range(0, query_qty):\n",
+    "  correct_set = set(gs[1][i])\n",
+    "  ret_set = set(new_nbrs[i][0])\n",
+    "  recall = recall + float(len(correct_set.intersection(ret_set))) / len(correct_set)\n",
+    "recall = recall / query_qty\n",
+    "print('kNN recall %f' % recall)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/python_bindings/notebooks/search_dense.ipynb b/python_bindings/notebooks/search_dense_optim.ipynb
similarity index 82%
rename from python_bindings/notebooks/search_dense.ipynb
rename to python_bindings/notebooks/search_dense_optim.ipynb
index 8d07ff0..21b6875 100644
--- a/python_bindings/notebooks/search_dense.ipynb
+++ b/python_bindings/notebooks/search_dense_optim.ipynb
@@ -2,7 +2,7 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": 183,
+   "execution_count": 1,
    "metadata": {
     "collapsed": true
    },
@@ -19,8 +19,10 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 184,
-   "metadata": {},
+   "execution_count": 2,
+   "metadata": {
+    "collapsed": true
+   },
    "outputs": [],
    "source": [
     "# Just read the data\n",
@@ -29,8 +31,10 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 185,
-   "metadata": {},
+   "execution_count": 3,
+   "metadata": {
+    "collapsed": true
+   },
    "outputs": [],
    "source": [
     "# Create a held-out query data set\n",
@@ -39,7 +43,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 186,
+   "execution_count": 4,
    "metadata": {},
    "outputs": [
     {
@@ -56,14 +60,14 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 187,
+   "execution_count": 5,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Index-time parameters {'indexThreadQty': 4, 'M': 15, 'efConstruction': 100, 'post': 0}\n"
+      "Index-time parameters {'efConstruction': 100, 'indexThreadQty': 4, 'M': 15, 'post': 0}\n"
      ]
     }
    ],
@@ -80,7 +84,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 188,
+   "execution_count": 6,
    "metadata": {
     "collapsed": true
    },
@@ -92,7 +96,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 189,
+   "execution_count": 7,
    "metadata": {
     "collapsed": true
    },
@@ -105,7 +109,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 190,
+   "execution_count": 8,
    "metadata": {},
    "outputs": [
     {
@@ -114,7 +118,7 @@
        "9000"
       ]
      },
-     "execution_count": 190,
+     "execution_count": 8,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -127,15 +131,15 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 191,
+   "execution_count": 9,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Index-time parameters {'indexThreadQty': 4, 'M': 15, 'efConstruction': 100}\n",
-      "Indexing time = 0.233778\n"
+      "Index-time parameters {'efConstruction': 100, 'indexThreadQty': 4, 'M': 15}\n",
+      "Indexing time = 0.298415\n"
      ]
     }
    ],
@@ -151,7 +155,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 192,
+   "execution_count": 10,
    "metadata": {},
    "outputs": [
     {
@@ -172,14 +176,14 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 193,
+   "execution_count": 11,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "kNN time total=0.025079 (sec), per query=0.000025 (sec), per query adjusted for thread number=0.000100 (sec)\n"
+      "kNN time total=0.025963 (sec), per query=0.000026 (sec), per query adjusted for thread number=0.000104 (sec)\n"
      ]
     }
    ],
@@ -195,7 +199,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 194,
+   "execution_count": 12,
    "metadata": {},
    "outputs": [
     {
@@ -203,8 +207,8 @@
      "output_type": "stream",
      "text": [
       "Computing gold-standard data\n",
-      "Brute-force preparation time 0.001082\n",
-      "brute-force kNN time total=0.222535 (sec), per query=0.000223 (sec)\n"
+      "Brute-force preparation time 0.001208\n",
+      "brute-force kNN time total=0.438362 (sec), per query=0.000438 (sec)\n"
      ]
     }
    ],
@@ -228,14 +232,14 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 195,
+   "execution_count": 13,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "kNN recall 0.992800\n"
+      "kNN recall 0.991740\n"
      ]
     }
    ],
@@ -252,54 +256,46 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 196,
+   "execution_count": 14,
    "metadata": {
     "collapsed": true
    },
    "outputs": [],
    "source": [
     "# Save a meta index\n",
-    "index.saveIndex('dense_index.bin')"
+    "index.saveIndex('dense_index_optim.bin')"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 197,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "9000"
-      ]
-     },
-     "execution_count": 197,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "execution_count": 15,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
    "source": [
-    "# Re-intitialize the library, specify the space, the type of the vector and add data points \n",
+    "# Re-intitialize the library, specify the space, the type of the vector.\n",
     "newIndex = nmslib.init(method='hnsw', space=space_name, data_type=nmslib.DataType.DENSE_VECTOR) \n",
-    "\n",
-    "newIndex.addDataPointBatch(data_matrix) "
+    "# For an optimized L2 index, there's no need to re-load data points, but this would be required for\n",
+    "# non-optimized index or any other methods different from HNSW (other methods can save only meta indices)\n",
+    "#newIndex.addDataPointBatch(data_matrix) "
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 198,
+   "execution_count": 16,
    "metadata": {
     "collapsed": true
    },
    "outputs": [],
    "source": [
     "# Re-load the index and re-run queries\n",
-    "newIndex.loadIndex('dense_index.bin')"
+    "newIndex.loadIndex('dense_index_optim.bin')"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 199,
+   "execution_count": 17,
    "metadata": {},
    "outputs": [
     {
@@ -307,7 +303,7 @@
      "output_type": "stream",
      "text": [
       "Setting query-time parameters {'efSearch': 100}\n",
-      "kNN time total=0.027727 (sec), per query=0.000028 (sec), per query adjusted for thread number=0.000111 (sec)\n"
+      "kNN time total=0.022647 (sec), per query=0.000023 (sec), per query adjusted for thread number=0.000091 (sec)\n"
      ]
     }
    ],
@@ -326,14 +322,14 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 200,
+   "execution_count": 18,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "kNN recall 0.992800\n"
+      "kNN recall 0.991740\n"
      ]
     }
    ],
diff --git a/python_bindings/notebooks/search_sparse.ipynb b/python_bindings/notebooks/search_sparse.ipynb
index 0b0e2d9..94ea321 100644
--- a/python_bindings/notebooks/search_sparse.ipynb
+++ b/python_bindings/notebooks/search_sparse.ipynb
@@ -2,7 +2,7 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": 70,
+   "execution_count": 1,
    "metadata": {
     "collapsed": true
    },
@@ -20,7 +20,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 71,
+   "execution_count": 2,
    "metadata": {
     "collapsed": true
    },
@@ -57,7 +57,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 72,
+   "execution_count": 3,
    "metadata": {},
    "outputs": [
     {
@@ -75,7 +75,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 73,
+   "execution_count": 4,
    "metadata": {
     "collapsed": true
    },
@@ -87,7 +87,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 74,
+   "execution_count": 5,
    "metadata": {},
    "outputs": [
     {
@@ -104,17 +104,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 75,
+   "execution_count": 6,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Index-time parameters {'M': 30, 'indexThreadQty': 4, 'efConstruction': 100, 'post': 0}\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "# Set index parameters\n",
     "# These are the most important onese\n",
@@ -122,13 +114,12 @@
     "efC = 100\n",
     "\n",
     "num_threads = 4\n",
-    "index_time_params = {'M': M, 'indexThreadQty': num_threads, 'efConstruction': efC, 'post' : 0}\n",
-    "print('Index-time parameters', index_time_params)"
+    "index_time_params = {'M': M, 'indexThreadQty': num_threads, 'efConstruction': efC, 'post' : 0}"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 76,
+   "execution_count": 7,
    "metadata": {
     "collapsed": true
    },
@@ -140,7 +131,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 77,
+   "execution_count": 8,
    "metadata": {},
    "outputs": [
     {
@@ -149,7 +140,7 @@
        "4500"
       ]
      },
-     "execution_count": 77,
+     "execution_count": 8,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -162,22 +153,21 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 78,
+   "execution_count": 9,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Index-time parameters {'M': 30, 'indexThreadQty': 4, 'efConstruction': 100}\n",
-      "Indexing time = 12.817909\n"
+      "Index-time parameters {'indexThreadQty': 4, 'post': 0, 'M': 30, 'efConstruction': 100}\n",
+      "Indexing time = 16.409487\n"
      ]
     }
    ],
    "source": [
     "# Create an index\n",
     "start = time.time()\n",
-    "index_time_params = {'M': M, 'indexThreadQty': num_threads, 'efConstruction': efC}\n",
     "index.createIndex(index_time_params) \n",
     "end = time.time() \n",
     "print('Index-time parameters', index_time_params)\n",
@@ -186,7 +176,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 79,
+   "execution_count": 10,
    "metadata": {},
    "outputs": [
     {
@@ -207,14 +197,14 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 80,
+   "execution_count": 11,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "kNN time total=1.068709 (sec), per query=0.002137 (sec), per query adjusted for thread number=0.008550 (sec)\n"
+      "kNN time total=2.378762 (sec), per query=0.004758 (sec), per query adjusted for thread number=0.019030 (sec)\n"
      ]
     }
    ],
@@ -230,7 +220,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 81,
+   "execution_count": 12,
    "metadata": {},
    "outputs": [
     {
@@ -238,8 +228,8 @@
      "output_type": "stream",
      "text": [
       "Computing gold-standard data\n",
-      "Brute-force preparation time 0.085145\n",
-      "brute-force kNN time total=4.342679 (sec), per query=0.008685 (sec)\n"
+      "Brute-force preparation time 0.058578\n",
+      "brute-force kNN time total=2.372437 (sec), per query=0.004745 (sec)\n"
      ]
     }
    ],
@@ -263,14 +253,14 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 82,
+   "execution_count": 13,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "kNN recall 0.971320\n"
+      "kNN recall 0.970280\n"
      ]
     }
    ],
@@ -287,7 +277,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 83,
+   "execution_count": 14,
    "metadata": {
     "collapsed": true
    },
@@ -299,7 +289,19 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 84,
+   "execution_count": 15,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "# Re-intitialize the library, specify the space, the type of the vector\n",
+    "newIndex = nmslib.init(method='hnsw', space='cosinesimil_sparse', data_type=nmslib.DataType.SPARSE_VECTOR) "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
    "metadata": {},
    "outputs": [
     {
@@ -308,21 +310,19 @@
        "4500"
       ]
      },
-     "execution_count": 84,
+     "execution_count": 16,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
-    "# Re-intitialize the library, specify the space, the type of the vector and add data points \n",
-    "newIndex = nmslib.init(method='hnsw', space='cosinesimil_sparse', data_type=nmslib.DataType.SPARSE_VECTOR) \n",
-    "\n",
+    "# For non-optimized indices we need to re-add data points\n",
     "newIndex.addDataPointBatch(data_matrix) "
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 85,
+   "execution_count": 17,
    "metadata": {
     "collapsed": true
    },
@@ -334,7 +334,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 86,
+   "execution_count": 18,
    "metadata": {},
    "outputs": [
     {
@@ -342,7 +342,7 @@
      "output_type": "stream",
      "text": [
       "Setting query-time parameters {'efSearch': 100}\n",
-      "kNN time total=1.036072 (sec), per query=0.002072 (sec), per query adjusted for thread number=0.008289 (sec)\n"
+      "kNN time total=1.224607 (sec), per query=0.002449 (sec), per query adjusted for thread number=0.009797 (sec)\n"
      ]
     }
    ],
@@ -361,14 +361,14 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 87,
+   "execution_count": 19,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "kNN recall 0.971320\n"
+      "kNN recall 0.970280\n"
      ]
     }
    ],