diff --git a/similarity_search/CMakeLists.txt b/similarity_search/CMakeLists.txt index fae4e53..1451b30 100644 --- a/similarity_search/CMakeLists.txt +++ b/similarity_search/CMakeLists.txt @@ -42,28 +42,28 @@ if(${CMAKE_CXX_COMPILER_ID} STREQUAL "GNU") message(FATAL_ERROR "GCC version must be at least 4.7!") endif() # Uncomment the following lines to see how the code compiles without AVX,SSE4.2 and/or SSE2 - #set (CMAKE_CXX_FLAGS_RELEASE "-Wall -Wunreachable-code -Ofast -lm -lrt -DNDEBUG -std=c++11 -DHAVE_CXX0X -fopenmp -fpic -march=x86-64") - #set (CMAKE_CXX_FLAGS_RELEASE "-Wall -Wunreachable-code -Ofast -lm -lrt -DNDEBUG -std=c++11 -DHAVE_CXX0X -fopenmp -fpic -march=core2") - #set (CMAKE_CXX_FLAGS_RELEASE "-Wall -Wunreachable-code -Ofast -lm -lrt -DNDEBUG -std=c++11 -DHAVE_CXX0X -fopenmp -fpic -msse4.2") - set (CMAKE_CXX_FLAGS_RELEASE "-Wall -Wunreachable-code -Wcast-align -Ofast -lm -lrt -DNDEBUG -std=c++11 -fopenmp -DHAVE_CXX0X -march=native -Wl,--no-as-needed -fpic") - set (CMAKE_CXX_FLAGS_DEBUG "-Wall -Wunreachable-code -Wcast-align -ggdb -lm -lrt -DNDEBUG -std=c++11 -fopenmp -DHAVE_CXX0X -march=native -Wl,--no-as-needed -fpic") + #set (CMAKE_CXX_FLAGS_RELEASE "-Wall -Wunreachable-code -Ofast -lm -lrt -DNDEBUG -std=c++11 -DHAVE_CXX0X -pthread -fpic -march=x86-64") + #set (CMAKE_CXX_FLAGS_RELEASE "-Wall -Wunreachable-code -Ofast -lm -lrt -DNDEBUG -std=c++11 -DHAVE_CXX0X -pthread -fpic -march=core2") + #set (CMAKE_CXX_FLAGS_RELEASE "-Wall -Wunreachable-code -Ofast -lm -lrt -DNDEBUG -std=c++11 -DHAVE_CXX0X -pthread -fpic -msse4.2") + set (CMAKE_CXX_FLAGS_RELEASE "-Wall -Wunreachable-code -Wcast-align -Ofast -lm -lrt -DNDEBUG -std=c++11 -pthread -DHAVE_CXX0X -march=native -Wl,--no-as-needed -fpic") + set (CMAKE_CXX_FLAGS_DEBUG "-Wall -Wunreachable-code -Wcast-align -ggdb -lm -lrt -DNDEBUG -std=c++11 -pthread -DHAVE_CXX0X -march=native -Wl,--no-as-needed -fpic") elseif(${CMAKE_CXX_COMPILER_ID} STREQUAL "Intel") if (CXX_COMPILER_VERSION VERSION_LESS 14.0.1) message(FATAL_ERROR "Intel version must be at least 14.0.1!") endif() - set (CMAKE_CXX_FLAGS_RELEASE "-Wall -Wunreachable-code -Ofast -lrt -DNDEBUG -std=c++11 -DHAVE_CXX0X -openmp -march=native -fpic") - set (CMAKE_CXX_FLAGS_DEBUG "-Wall -Wunreachable-code -ggdb -lrt -DNDEBUG -std=c++11 -DHAVE_CXX0X -openmp -march=native -fpic") + set (CMAKE_CXX_FLAGS_RELEASE "-Wall -Wunreachable-code -Ofast -lrt -DNDEBUG -std=c++11 -DHAVE_CXX0X -pthread -march=native -fpic") + set (CMAKE_CXX_FLAGS_DEBUG "-Wall -Wunreachable-code -ggdb -lrt -DNDEBUG -std=c++11 -DHAVE_CXX0X -pthread -march=native -fpic") elseif(${CMAKE_CXX_COMPILER_ID} STREQUAL "Clang") if (CXX_COMPILER_VERSION VERSION_LESS 4.2.1) message(FATAL_ERROR "Clang version must be at least 3.4 (GCC >= 4.2.1 equivalent)!") endif() if (CMAKE_SYSTEM_NAME MATCHES Darwin) # MACOSX - set (CMAKE_CXX_FLAGS_RELEASE "-Wall -Wunreachable-code -Wcast-align -O3 -DNDEBUG -std=c++11 -DHAVE_CXX0X -fpic -march=native") - set (CMAKE_CXX_FLAGS_DEBUG "-Wall -Wunreachable-code -Wcast-align -ggdb -DNDEBUG -std=c++11 -DHAVE_CXX0X -fpic -march=native") + set (CMAKE_CXX_FLAGS_RELEASE "-Wall -Wunreachable-code -Wcast-align -O3 -DNDEBUG -std=c++11 -DHAVE_CXX0X -pthread -fpic -march=native") + set (CMAKE_CXX_FLAGS_DEBUG "-Wall -Wunreachable-code -Wcast-align -ggdb -DNDEBUG -std=c++11 -DHAVE_CXX0X -pthread -fpic -march=native") else() - set (CMAKE_CXX_FLAGS_RELEASE "-Wall -Wunreachable-code -Wcast-align -O3 -DNDEBUG -std=c++11 -DHAVE_CXX0X -fopenmp -march=native -fpic") - set (CMAKE_CXX_FLAGS_DEBUG "-Wall -Wunreachable-code -Wcast-align -ggdb -DNDEBUG -std=c++11 -DHAVE_CXX0X -fopenmp -march=native -fpic") + set (CMAKE_CXX_FLAGS_RELEASE "-Wall -Wunreachable-code -Wcast-align -O3 -DNDEBUG -std=c++11 -DHAVE_CXX0X -pthread -march=native -fpic") + set (CMAKE_CXX_FLAGS_DEBUG "-Wall -Wunreachable-code -Wcast-align -ggdb -DNDEBUG -std=c++11 -DHAVE_CXX0X -pthread -march=native -fpic") endif() #message(FATAL_ERROR "CLANG ${CMAKE_SYSTEM_NAME}") elseif(WIN32) diff --git a/similarity_search/include/thread_pool.h b/similarity_search/include/thread_pool.h index 21c0b34..0a98141 100644 --- a/similarity_search/include/thread_pool.h +++ b/similarity_search/include/thread_pool.h @@ -54,33 +54,47 @@ namespace similarity { */ - // replacement for the openmp '#pragma omp parallel for' directive - // only handles a subset of functionality (no reductions etc) + /* + * replacement for the openmp '#pragma omp parallel for' directive + * only handles a subset of functionality (no reductions etc) + * Process ids from start (inclusive) to end (EXCLUSIVE) + */ template - inline void ParallelFor(int initial, int final, int numThreads, Function fn) { + inline void ParallelFor(size_t start, size_t end, size_t numThreads, Function fn) { if (numThreads <= 0) { numThreads = std::thread::hardware_concurrency(); } - std::vector threads; - std::atomic current(initial); + std::vector threads; + std::atomic current(start); // keep track of exceptions in threads // https://stackoverflow.com/a/32428427/1713196 std::exception_ptr lastException = nullptr; + std::mutex lastExceptMutex; - for (int i = 0; i < numThreads; ++i) { + for (size_t i = 0; i < numThreads; ++i) { threads.push_back(std::thread([&] { while (true) { - int id = current.fetch_add(1); - if ((id >= final) || lastException) { + size_t id = current.fetch_add(1); + + if ((id >= end)) { break; } try { fn(id); } catch (...) { + std::unique_lock lastExcepLock(lastExceptMutex); lastException = std::current_exception(); + /* + * This will work even when current is the largest value that + * size_t can fit, because fetch_add returns the previous value + * before the increment (what will result in overflow + * and produce 0 instead of current + 1). + */ + current = end; + break; } } })); diff --git a/similarity_search/src/method/hnsw.cc b/similarity_search/src/method/hnsw.cc index 9bd9229..bb8009a 100644 --- a/similarity_search/src/method/hnsw.cc +++ b/similarity_search/src/method/hnsw.cc @@ -1012,7 +1012,7 @@ namespace similarity { ++currElem; } - for (int_fast32_t i = 0; i < query->GetK() && i < sortedArr.size(); ++i) { + for (uint_fast32_t i = 0; i < query->GetK() && i < sortedArr.size(); ++i) { query->CheckAndAddToResult(queueData[i].key, queueData[i].data->getData()); }