Skip to content

Commit

Permalink
Additional mem leak fixes: #398
Browse files Browse the repository at this point in the history
  • Loading branch information
searchivarius committed Jun 19, 2019
1 parent c817f21 commit 7186a8c
Show file tree
Hide file tree
Showing 3 changed files with 123 additions and 2 deletions.
113 changes: 113 additions & 0 deletions python_bindings/tests/bindings_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,12 @@
import numpy.testing as npt

import nmslib
import time

import os, gc, psutil

MEM_TEST_ITER=50
MEM_GROW_COEFF=8 # This is a bit adhoc but seems to work in practice


def get_exact_cosine(row, data, N=10):
Expand Down Expand Up @@ -294,6 +300,113 @@ def testGlobal(self):
# this is a one line reproduction of https://github.com/nmslib/nmslib/issues/327
GlobalTestCase.index = nmslib.init()

class MemoryLeak1TestCase(TestCaseBase):
def testMemoryLeak1(self):
process = psutil.Process(os.getpid())

np.random.seed(23)
data = np.random.randn(10000, 10).astype(np.float32)
query = np.random.randn(1000, 10).astype(np.float32)
space_name = 'l2'

num_threads=4

index_time_params = {'M': 20,
'efConstruction': 100,
'indexThreadQty': num_threads,
'post' : 0,
'skip_optimized_index' : 1} # using non-optimized index!

query_time_params = {'efSearch': 100}

with tempfile.NamedTemporaryFile() as tmp:

index = nmslib.init(method='hnsw', space=space_name, data_type=nmslib.DataType.DENSE_VECTOR)
index.addDataPointBatch(data)

index.createIndex(index_time_params)
index.saveIndex(tmp.name, save_data=True)

init_mem = process.memory_info().rss

delta1 = None

for k in range(MEM_TEST_ITER):

index = nmslib.init(method='hnsw', space=space_name, data_type=nmslib.DataType.DENSE_VECTOR)
index.loadIndex(tmp.name, load_data=True)
index.setQueryTimeParams(query_time_params)

if k == 0:
delta1 = process.memory_info().rss - init_mem

nbrs = index.knnQueryBatch(query, k = 10, num_threads = num_threads)

nbrs = None
index = None

gc.collect()

gc.collect()
time.sleep(1)
gc.collect()
time.sleep(1)
delta_last = process.memory_info().rss - init_mem

# if this check fails a memory leak is possible (but not necessarily 100% certain, memory is random)
self.assertTrue(delta_last < delta1 * MEM_GROW_COEFF)

class MemoryLeak2TestCase(TestCaseBase):
def testMemoryLeak2(self):
process = psutil.Process(os.getpid())

np.random.seed(23)
data = np.random.randn(10000, 10).astype(np.float32)
query = np.random.randn(1000, 10).astype(np.float32)
space_name = 'l2'

num_threads=4

index_time_params = {'M': 20,
'efConstruction': 100,
'indexThreadQty': num_threads,
'post' : 0,
'skip_optimized_index' : 1} # using non-optimized index!

query_time_params = {'efSearch': 100}

with tempfile.NamedTemporaryFile() as tmp:

init_mem = process.memory_info().rss

delta1 = None

for k in range(MEM_TEST_ITER):

index = nmslib.init(method='hnsw', space=space_name, data_type=nmslib.DataType.DENSE_VECTOR)
index.addDataPointBatch(data)
index.createIndex(index_time_params)

if k == 0:
delta1 = process.memory_info().rss - init_mem

index.setQueryTimeParams(query_time_params)
nbrs = index.knnQueryBatch(query, k = 10, num_threads = num_threads)

nbrs = None
index = None

gc.collect()

gc.collect()
time.sleep(1)
gc.collect()
time.sleep(1)
delta_last = process.memory_info().rss - init_mem

# if this check fails a memory leak is possible (but not necessarily 100% certain, memory is random)
self.assertTrue(delta_last < delta1 * MEM_GROW_COEFF)


if __name__ == "__main__":
unittest.main()
8 changes: 7 additions & 1 deletion similarity_search/include/object.h
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,13 @@ using std::numeric_limits;

class Object {
public:
explicit Object(char* buffer) : buffer_(buffer), memory_allocated_(false) {}
/*
* Memory ownership handling of the Object class is not good, but we keep it this way for historical/compatibility reasons
* Currrently, when the second constructor is called, new memory is always allocated.
* However, this constructor is nornally called to created an object that reuses someone else's memory.
* In NMSLIB 1.8.1 we make it possible to take ownership of the memory provided.
*/
explicit Object(char* buffer, bool memory_allocated=false) : buffer_(buffer), memory_allocated_(memory_allocated) {}

Object(IdType id, LabelType label, size_t datalength, const void* data) {
buffer_ = new char[ID_SIZE + LABEL_SIZE + DATALENGTH_SIZE + datalength];
Expand Down
4 changes: 3 additions & 1 deletion similarity_search/src/space.cc
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,9 @@ Space<dist_t>::ReadObjectVectorFromBinData(ObjectVector& data,
readBinaryPOD(input, objSize);
unique_ptr<char []> buf(new char[objSize]);
input.read(&buf[0], objSize);
data.push_back(new Object(buf.release()));
// true guarantees that the Object will take ownership of memory
// less than ideal, but ok for now
data.push_back(new Object(buf.release(), true));
}

return unique_ptr<DataFileInputState>(new DataFileInputState());
Expand Down

0 comments on commit 7186a8c

Please sign in to comment.