From 3a7423737af730b99db8a0081856192e1f1c2693 Mon Sep 17 00:00:00 2001 From: Doug Friedman Date: Wed, 16 May 2018 20:08:12 -0400 Subject: [PATCH 01/13] Fix wrong link in readme.MD The jaccard sparse link was pointing to the wrong notebook so I fixed it. --- python_bindings/notebooks/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python_bindings/notebooks/README.md b/python_bindings/notebooks/README.md index 9dec549..726f1b9 100644 --- a/python_bindings/notebooks/README.md +++ b/python_bindings/notebooks/README.md @@ -5,7 +5,7 @@ We have Python notebooks for the following scenarios: 3. [The Euclidean space ofr for 8-bit integer SIFT vectors (the index is not optimized)](search_sift_uint8.ipynb); 4. [KL-divergence (non-optimized index)](search_vector_dense_kldiv.ipynb); 3. [Sparse cosine similarity (non-optimized index)](search_sparse_cosine.ipynb); -4. [Sparse Jaccard similarity (non-optimized index)](search_sparse_cosine.ipynb). +4. [Sparse Jaccard similarity (non-optimized index)](search_generic_sparse_jaccard.ipynb). Note that for for the dense space, we have examples of the so-called optimized and non-optimized indices. Except HNSW, all the methods save meta-indices rather than real ones. Meta indices contain only index structure, but not the data. Hence, before a meta-index can be loaded, we need to re-load data. One example is a memory efficient space to search for SIFT vectors. From 628078626aad0bcaff66dca00fa47ba15df4a69c Mon Sep 17 00:00:00 2001 From: Leonid Boytsov Date: Fri, 15 Jun 2018 16:14:18 -0400 Subject: [PATCH 02/13] Fixing a previously introduced build bug. --- python_bindings/setup.py | 2 +- similarity_search/include/factory/method/bbtree.h | 2 +- similarity_search/include/method/bbtree.h | 4 ---- similarity_search/test/test_integr.cc | 2 +- 4 files changed, 3 insertions(+), 7 deletions(-) diff --git a/python_bindings/setup.py b/python_bindings/setup.py index cd5cf01..49a22f6 100755 --- a/python_bindings/setup.py +++ b/python_bindings/setup.py @@ -19,7 +19,7 @@ else: # Otherwise build all the files here directly (excluding extras which need eigen/boost) - exclude_files = set("""lsh.cc lsh_multiprobe.cc lsh_space.cc falconn.cc nndes.cc space_sqfd.cc + exclude_files = set("""bbtree.cc lsh.cc lsh_multiprobe.cc lsh_space.cc falconn.cc nndes.cc space_sqfd.cc dummy_app.cc main.cc""".split()) for root, subdirs, files in os.walk(os.path.join("nmslib", "similarity_search", "src")): diff --git a/similarity_search/include/factory/method/bbtree.h b/similarity_search/include/factory/method/bbtree.h index 6f79806..264550f 100644 --- a/similarity_search/include/factory/method/bbtree.h +++ b/similarity_search/include/factory/method/bbtree.h @@ -15,7 +15,7 @@ #ifndef _FACTORY_BBTREE_H_ #define _FACTORY_BBTREE_H_ -#if WITH_EXTRAS +#ifdef WITH_EXTRAS #include diff --git a/similarity_search/include/method/bbtree.h b/similarity_search/include/method/bbtree.h index 888e924..e4a4fad 100644 --- a/similarity_search/include/method/bbtree.h +++ b/similarity_search/include/method/bbtree.h @@ -33,8 +33,6 @@ #ifndef _BBTREE_H_ #define _BBTREE_H_ -#if WITH_EXTRAS - #include "index.h" #include "params.h" @@ -128,6 +126,4 @@ class BBTree : public Index { } // namespace similarity -#endif - #endif // _BBTREE_H_ diff --git a/similarity_search/test/test_integr.cc b/similarity_search/test/test_integr.cc index 0fe083b..15fcff9 100644 --- a/similarity_search/test/test_integr.cc +++ b/similarity_search/test/test_integr.cc @@ -289,6 +289,7 @@ vector vTestCaseDesc = { MethodTestCase(DIST_TYPE_FLOAT, "l2", "final8_10K.txt", "list_clusters", false, "strategy=random,useBucketSize=1,bucketSize=10", "", 0 /* no KNN */, 0.5 /* range search radius 0.5*/ , 1.0, 1.0, 0.0, 0.0, 2.4, 3.4), +#ifdef WITH_EXTRAS // *************** bbtree tests ******************** // // knn /* @@ -311,7 +312,6 @@ vector vTestCaseDesc = { MethodTestCase(DIST_TYPE_FLOAT, "kldivgenfast", "final8_10K.txt", "bbtree", false, "bucketSize=10", "", 0 /* no KNN */, 0.5 /* range search radius 0.5*/ , 0.999, 1.0, 0.0, 0.0, 1.2, 2.4), -#ifdef WITH_EXTRAS // *************** multi-probe LSH tests ******************** // // knn From 4a8026adf4effb11660d701a1c16363011f14a88 Mon Sep 17 00:00:00 2001 From: Ben Frederickson Date: Mon, 2 Jul 2018 14:03:46 -0700 Subject: [PATCH 03/13] Don't throw exceptions when logging When NMSLIB is logging to the python logger, there can occasionally be exceptions be thrown on system exit (https://github.com/nmslib/nmslib/issues/327). Fix by trapping these exceptions since making a log call shouldn't throw an exception. --- python_bindings/nmslib.cc | 43 ++++++++++++++++---------- python_bindings/tests/bindings_test.py | 6 ++++ 2 files changed, 32 insertions(+), 17 deletions(-) diff --git a/python_bindings/nmslib.cc b/python_bindings/nmslib.cc index d203547..3a4279c 100644 --- a/python_bindings/nmslib.cc +++ b/python_bindings/nmslib.cc @@ -388,23 +388,32 @@ class PythonLogger int line, const char * function, const std::string & message) { - AcquireGIL l; - switch(severity) { - case LIB_DEBUG: - inner.attr("debug")(message); - break; - case LIB_INFO: - inner.attr("info")(message); - break; - case LIB_WARNING: - inner.attr("warning")(message); - break; - case LIB_ERROR: - inner.attr("error")(message); - break; - case LIB_FATAL: - inner.attr("critical")(message); - break; + // In cases when the interpreter was shutting down, attempting to log in python + // could throw an exception (https://github.com/nmslib/nmslib/issues/327). + // Logging shouldn't cause exceptions, so catch it and dump to stderr instead. + try { + AcquireGIL l; + switch(severity) { + case LIB_DEBUG: + inner.attr("debug")(message); + break; + case LIB_INFO: + inner.attr("info")(message); + break; + case LIB_WARNING: + inner.attr("warning")(message); + break; + case LIB_ERROR: + inner.attr("error")(message); + break; + case LIB_FATAL: + inner.attr("critical")(message); + break; + } + } catch (const std::exception & e) { + std::cerr << "Failed to log '" << message << "'. Exception:" << e.what() << std::endl; + } catch (...) { + std::cerr << "Failed to log '" << message << "'" << std::endl; } } }; diff --git a/python_bindings/tests/bindings_test.py b/python_bindings/tests/bindings_test.py index 7499275..56add76 100644 --- a/python_bindings/tests/bindings_test.py +++ b/python_bindings/tests/bindings_test.py @@ -169,5 +169,11 @@ def testSparse(self): self.assertEqual(index[3], [(3, 1.0)]) +class GlobalTestCase(unittest.TestCase): + def testGlobal(self): + # this is a one line reproduction of https://github.com/nmslib/nmslib/issues/327 + GlobalTestCase.index = nmslib.init() + + if __name__ == "__main__": unittest.main() From c82195f6afd1b0a8036ccd6d66d306681db6882c Mon Sep 17 00:00:00 2001 From: Ben Frederickson Date: Mon, 2 Jul 2018 14:32:35 -0700 Subject: [PATCH 04/13] fix OSX travis CI Minor tweaks to fix OSX ci. Also test out python 2.7 in addition to 3.7. --- .travis.yml | 44 +++++++++++++++++++++++++++++--------------- 1 file changed, 29 insertions(+), 15 deletions(-) diff --git a/.travis.yml b/.travis.yml index 06ca91d..6a8aace 100644 --- a/.travis.yml +++ b/.travis.yml @@ -8,7 +8,13 @@ matrix: - os: linux env: PYTHON=3 - os: osx - env: PYTHON=3.6 + env: PYTHON=3 + osx_image: xcode9.3 + python: 3.7.0 + - os: osx + env: PYTHON=2 + osx_image: xcode9.3 + python: 2.7.14 allow_failures: - os: osx @@ -29,17 +35,27 @@ addons: before_install: - | - if [ "$TRAVIS_OS_NAME" = "linux" ]; then export CXX=g++-4.8 CC=gcc-4.8; fi - if [ "$TRAVIS_OS_NAME" = "osx" ] && [ "${PYTHON:0:1}" = "3" ]; then - brew update - brew upgrade python - command curl -sSL https://rvm.io/mpapis.asc | gpg --import -; - rvm get stable - else + PIP=pip + PY=python + if [ "$TRAVIS_OS_NAME" = "linux" ]; then + export CXX=g++-4.8 CC=gcc-4.8; pip install --user --upgrade pip virtualenv virtualenv -p python$PYTHON venv source venv/bin/activate - fi + fi + + if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then + brew update + brew install gcc + PIP=pip2 + PY=python2 + if [ "${PYTHON:0:1}" = "3" ]; then + brew upgrade python + brew install python3 + PIP=pip3 + PY=python3 + fi + fi install: - | @@ -49,10 +65,12 @@ install: cmake similarity_search fi make -j 4 - travis_wait travis_retry pip install -r python_bindings/requirements.txt scipy six flake8 - travis_retry cd python_bindings && python setup.py build install && cd .. + travis_wait travis_retry $PIP install -r python_bindings/requirements.txt scipy six flake8 + travis_retry cd python_bindings && $PY setup.py build install && cd .. script: +- $PY --version +- cd python_bindings && $PY setup.py test && flake8 && cd .. - | set -e if [ "$TRAVIS_OS_NAME" = "linux" -o "$TRAVIS_OS_NAME" = "osx" ] ; then @@ -61,10 +79,6 @@ script: ./release/test_integr integr.log cd .. fi - cd python_bindings - python setup.py test - flake8 - cd .. cache: - apt From d9b015b0bfbe24f03dd2070ccdda9275d573cb6c Mon Sep 17 00:00:00 2001 From: Ben Frederickson Date: Mon, 2 Jul 2018 17:03:03 -0700 Subject: [PATCH 05/13] . --- .travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 6a8aace..64457d9 100644 --- a/.travis.yml +++ b/.travis.yml @@ -73,7 +73,7 @@ script: - cd python_bindings && $PY setup.py test && flake8 && cd .. - | set -e - if [ "$TRAVIS_OS_NAME" = "linux" -o "$TRAVIS_OS_NAME" = "osx" ] ; then + if [ "$TRAVIS_OS_NAME" = "linux" ] ; then cd similarity_search; ./release/bunit ./release/test_integr integr.log From dccb0270a797d2367929f0c81ac75810e9fabc7f Mon Sep 17 00:00:00 2001 From: Leonid Boytsov Date: Fri, 6 Jul 2018 10:24:20 -0400 Subject: [PATCH 06/13] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 5ab9051..aca48f9 100644 --- a/README.md +++ b/README.md @@ -5,7 +5,7 @@ Non-Metric Space Library (NMSLIB) ================= -The latest **pre**-release is [1.7.2](https://github.com/nmslib/nmslib/releases/tag/v1.7.1). Note that the manual is not updated to reflect some of the changes. In particular, we changed the build procedure for Windows. Also note that the manual targets primiarily developers who will extend the library. For most other folks, [Python binding docs should be sufficient](python_bindings). The basic parameter tuning/selection guidelines are also available [online](/python_bindings/parameters.md). +The latest **pre**-release is [1.7.3](https://github.com/nmslib/nmslib/releases/tag/v1.7.3). Note that the manual is not updated to reflect some of the changes. In particular, we changed the build procedure for Windows. Also note that the manual targets primiarily developers who will extend the library. For most other folks, [Python binding docs should be sufficient](python_bindings). The basic parameter tuning/selection guidelines are also available [online](/python_bindings/parameters.md). ----------------- Non-Metric Space Library (NMSLIB) is an **efficient** cross-platform similarity search library and a toolkit for evaluation of similarity search methods. The core-library does **not** have any third-party dependencies. From 5a1dcb5fbb0450cefae5a758a34065fd0c91618f Mon Sep 17 00:00:00 2001 From: Leonid Boytsov Date: Fri, 6 Jul 2018 17:43:34 -0400 Subject: [PATCH 07/13] changing version --- python_bindings/setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python_bindings/setup.py b/python_bindings/setup.py index 49a22f6..0d668ed 100755 --- a/python_bindings/setup.py +++ b/python_bindings/setup.py @@ -4,7 +4,7 @@ import sys import setuptools -__version__ = '1.7.2' +__version__ = '1.7.3' libdir = os.path.join(".", "nmslib", "similarity_search") library_file = os.path.join(libdir, "release", "libNonMetricSpaceLib.a") From ee68bd7f67d4dafea23a98176ded9fbc4c132874 Mon Sep 17 00:00:00 2001 From: Leonid Boytsov Date: Sun, 8 Jul 2018 16:42:38 -0400 Subject: [PATCH 08/13] An additional fix for #327 --- python_bindings/nmslib.cc | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/python_bindings/nmslib.cc b/python_bindings/nmslib.cc index 3a4279c..3f95536 100644 --- a/python_bindings/nmslib.cc +++ b/python_bindings/nmslib.cc @@ -348,7 +348,9 @@ struct IndexWrapper { } ~IndexWrapper() { - LOG(LIB_DEBUG) << "Destroying Index"; + // In cases when the interpreter was shutting down, attempting to log in python + // could throw an exception (https://github.com/nmslib/nmslib/issues/327). + //LOG(LIB_DEBUG) << "Destroying Index"; freeObjectVector(&data); } From b085528f815a7dbba1d90652512bac3e86062a0d Mon Sep 17 00:00:00 2001 From: Leonid Boytsov Date: Sun, 8 Jul 2018 22:00:06 -0400 Subject: [PATCH 09/13] version bump --- README.md | 2 +- python_bindings/setup.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index aca48f9..3c5e049 100644 --- a/README.md +++ b/README.md @@ -5,7 +5,7 @@ Non-Metric Space Library (NMSLIB) ================= -The latest **pre**-release is [1.7.3](https://github.com/nmslib/nmslib/releases/tag/v1.7.3). Note that the manual is not updated to reflect some of the changes. In particular, we changed the build procedure for Windows. Also note that the manual targets primiarily developers who will extend the library. For most other folks, [Python binding docs should be sufficient](python_bindings). The basic parameter tuning/selection guidelines are also available [online](/python_bindings/parameters.md). +The latest **pre**-release is [1.7.3.1](https://github.com/nmslib/nmslib/releases/tag/v1.7.3.1). Note that the manual is not updated to reflect some of the changes. In particular, we changed the build procedure for Windows. Also note that the manual targets primiarily developers who will extend the library. For most other folks, [Python binding docs should be sufficient](python_bindings). The basic parameter tuning/selection guidelines are also available [online](/python_bindings/parameters.md). ----------------- Non-Metric Space Library (NMSLIB) is an **efficient** cross-platform similarity search library and a toolkit for evaluation of similarity search methods. The core-library does **not** have any third-party dependencies. diff --git a/python_bindings/setup.py b/python_bindings/setup.py index 0d668ed..c26a53c 100755 --- a/python_bindings/setup.py +++ b/python_bindings/setup.py @@ -4,7 +4,7 @@ import sys import setuptools -__version__ = '1.7.3' +__version__ = '1.7.3.1' libdir = os.path.join(".", "nmslib", "similarity_search") library_file = os.path.join(libdir, "release", "libNonMetricSpaceLib.a") From 75926713c4c4e4ef12ac007e8e43cb70032098dc Mon Sep 17 00:00:00 2001 From: Leonid Boytsov Date: Thu, 12 Jul 2018 20:14:21 -0400 Subject: [PATCH 10/13] Checking if data is loaded via non-optimized index --- similarity_search/include/logging.h | 2 +- similarity_search/src/method/hnsw.cc | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/similarity_search/include/logging.h b/similarity_search/include/logging.h index 7505888..b03f754 100644 --- a/similarity_search/include/logging.h +++ b/similarity_search/include/logging.h @@ -128,7 +128,7 @@ class RuntimeErrorWrapper { #define CHECK_MSG(condition,message) \ if (!(condition)) {\ - LOG(LIB_ERROR) << "Check failed: " << #condition; \ + LOG(LIB_ERROR) << "Check failed: " << #condition << " " << string(message); \ throw runtime_error("Check failed: " + string(message)); \ } diff --git a/similarity_search/src/method/hnsw.cc b/similarity_search/src/method/hnsw.cc index 8e0775b..f88e7f9 100644 --- a/similarity_search/src/method/hnsw.cc +++ b/similarity_search/src/method/hnsw.cc @@ -684,7 +684,7 @@ namespace similarity { void Hnsw::Search(KNNQuery *query, IdType) const { - if (this->data_.empty()) { + if (this->data_.empty() and this->data_rearranged_.empty()) { return; } bool useOld = searchAlgoType_ == kOld || (searchAlgoType_ == kHybrid && ef_ >= 1000); From 4197cf2c5eb10f764479fd2e502d9ff9c073811e Mon Sep 17 00:00:00 2001 From: Leonid Boytsov Date: Thu, 12 Jul 2018 20:15:36 -0400 Subject: [PATCH 11/13] Changing install_requires to setup_requires so pip could pull up dependencies during the local installation. --- python_bindings/setup.py | 1 + 1 file changed, 1 insertion(+) diff --git a/python_bindings/setup.py b/python_bindings/setup.py index c26a53c..66211eb 100755 --- a/python_bindings/setup.py +++ b/python_bindings/setup.py @@ -137,6 +137,7 @@ def build_extensions(self): high dimensions and/or non-metric spaces. Hence, the main focus is on approximate methods.""", ext_modules=ext_modules, install_requires=['pybind11>=2.0', 'numpy'], + setup_requires=['pybind11>=2.0', 'numpy'], cmdclass={'build_ext': BuildExt}, test_suite="tests", zip_safe=False, From eca1b3249d29992b989a1cbcf4b8c14f38841387 Mon Sep 17 00:00:00 2001 From: Leonid Boytsov Date: Thu, 12 Jul 2018 20:16:53 -0400 Subject: [PATCH 12/13] Bumping up the version of the bindings. --- python_bindings/setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python_bindings/setup.py b/python_bindings/setup.py index 66211eb..939f31d 100755 --- a/python_bindings/setup.py +++ b/python_bindings/setup.py @@ -4,7 +4,7 @@ import sys import setuptools -__version__ = '1.7.3.1' +__version__ = '1.7.3.2' libdir = os.path.join(".", "nmslib", "similarity_search") library_file = os.path.join(libdir, "release", "libNonMetricSpaceLib.a") From 1be8dcbc6f51f28fc1ec0c1dc1d59ad4a4af015d Mon Sep 17 00:00:00 2001 From: Leonid Boytsov Date: Thu, 12 Jul 2018 20:22:54 -0400 Subject: [PATCH 13/13] Changing version in the README. --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 3c5e049..7403a9c 100644 --- a/README.md +++ b/README.md @@ -5,7 +5,7 @@ Non-Metric Space Library (NMSLIB) ================= -The latest **pre**-release is [1.7.3.1](https://github.com/nmslib/nmslib/releases/tag/v1.7.3.1). Note that the manual is not updated to reflect some of the changes. In particular, we changed the build procedure for Windows. Also note that the manual targets primiarily developers who will extend the library. For most other folks, [Python binding docs should be sufficient](python_bindings). The basic parameter tuning/selection guidelines are also available [online](/python_bindings/parameters.md). +The latest **pre**-release is [1.7.3.2](https://github.com/nmslib/nmslib/releases/tag/v1.7.3.2). Note that the manual is not updated to reflect some of the changes. In particular, we changed the build procedure for Windows. Also note that the manual targets primiarily developers who will extend the library. For most other folks, [Python binding docs should be sufficient](python_bindings). The basic parameter tuning/selection guidelines are also available [online](/python_bindings/parameters.md). ----------------- Non-Metric Space Library (NMSLIB) is an **efficient** cross-platform similarity search library and a toolkit for evaluation of similarity search methods. The core-library does **not** have any third-party dependencies.