From 7dfc3da92e39c32c7ea2d123bbe2768def83a9ac Mon Sep 17 00:00:00 2001 From: searchivarius Date: Mon, 3 Jun 2019 04:56:46 -0400 Subject: [PATCH] Documentation and query server improvements (save/load data). --- manual/query_server.md | 11 ++++- .../cpp_client_server/QueryService_server.cpp | 41 +++++++++++++++---- 2 files changed, 44 insertions(+), 8 deletions(-) diff --git a/manual/query_server.md b/manual/query_server.md index 1a3e047..b8bcbaa 100644 --- a/manual/query_server.md +++ b/manual/query_server.md @@ -18,8 +18,17 @@ if Apache Thrift is installed to a non-standard location). The query server has a similar set of parameters to the benchmarking utility ``experiment``. For example, you can start the server as follows: ``` - ./query_server -i ../../sample_data/final8_10K.txt -s l2 -m sw-graph -c NN=10,efConstruction=200,initIndexAttempts=1 -p 10000 + ./query_server -i ../../sample_data/final8_10K.txt -s l2 -m hnsw -m hnsw -c M=20,efConstruction=100 -p 10000 ``` +If the search method supports saving the index, it possible to save both the index and the data for faster loading using a combinations of options `-S` and `--cacheData`: +``` + ./query_server -i ../../sample_data/final8_10K.txt -s l2 -m hnsw -m hnsw -c M=20,efConstruction=100 -p 10000 -S location --cacheData +``` +Next time when you start a server it does not need the original and it does not need to re-create the index: +``` + ./query_server -s l2 -m hnsw -m hnsw -p 10000 -L location --cacheData +``` + There are also three sample clients implemented in [C++](query_server/cpp_client_server), [Python](query_server/python_client/), and [Java](query_server/java_client/). A client reads a string representation of a query object from the standard stream. diff --git a/query_server/cpp_client_server/QueryService_server.cpp b/query_server/cpp_client_server/QueryService_server.cpp index 0852580..aa782e1 100644 --- a/query_server/cpp_client_server/QueryService_server.cpp +++ b/query_server/cpp_client_server/QueryService_server.cpp @@ -31,6 +31,7 @@ #include #include "params.h" +#include "space.h" #include "params_def.h" #include "utils.h" #include "space.h" @@ -47,6 +48,7 @@ #define MAX_SPIN_LOCK_QTY 1000000 #define SLEEP_DURATION 10 +#define DATA_FILE_PREF ".dat" const unsigned THREAD_COEFF = 4; @@ -94,6 +96,7 @@ class QueryServiceHandler : virtual public QueryServiceIf { const string& MethodName, const string& LoadIndexLoc, const string& SaveIndexLoc, + bool& CacheData, const AnyParams& IndexParams, const AnyParams& QueryTimeParams) : debugPrint_(debugPrint), @@ -102,10 +105,27 @@ class QueryServiceHandler : virtual public QueryServiceIf { counter_(0) { - unique_ptr inpState(space_->ReadDataset(dataSet_, - externIds_, - DataFile, - MaxNumData)); + unique_ptr inpState; + + if (!CacheData || !DoesFileExist(LoadIndexLoc + DATA_FILE_PREF)) { + CHECK_MSG(!DataFile.empty(), "Specify the input data file!") + inpState = space_->ReadDataset(dataSet_, + externIds_, + DataFile, + MaxNumData); + if (CacheData && !SaveIndexLoc.empty()) { + LOG(LIB_INFO) << "Saving data to location: " << SaveIndexLoc + DATA_FILE_PREF; + + space_->WriteObjectVectorBinData(dataSet_, externIds_, SaveIndexLoc + DATA_FILE_PREF); + } + } else { + LOG(LIB_INFO) << "Loading cached data from location: " << LoadIndexLoc + DATA_FILE_PREF; + + inpState = space_->ReadObjectVectorFromBinData(dataSet_, + externIds_, + LoadIndexLoc + DATA_FILE_PREF, + MaxNumData); + } space_->UpdateParamsFromFile(*inpState); CHECK(dataSet_.size() == externIds_.size()); @@ -408,6 +428,7 @@ void ParseCommandLineForServer(int argc, char*argv[], bool& debugPrint, string& LoadIndexLoc, string& SaveIndexLoc, + bool& CacheData, int& port, size_t& threadQty, string& LogFile, @@ -435,11 +456,12 @@ void ParseCommandLineForServer(int argc, char*argv[], (LOG_FILE_PARAM_OPT.c_str(), po::value(&LogFile)->default_value(LOG_FILE_PARAM_DEFAULT), LOG_FILE_PARAM_MSG.c_str()) (SPACE_TYPE_PARAM_OPT.c_str(), po::value(&spaceParamStr)->required(), SPACE_TYPE_PARAM_MSG.c_str()) (DIST_TYPE_PARAM_OPT.c_str(), po::value(&DistType)->default_value(DIST_TYPE_FLOAT), DIST_TYPE_PARAM_MSG.c_str()) - (DATA_FILE_PARAM_OPT.c_str(), po::value(&DataFile)->required(), DATA_FILE_PARAM_MSG.c_str()) + (DATA_FILE_PARAM_OPT.c_str(), po::value(&DataFile)->default_value(""), DATA_FILE_PARAM_MSG.c_str()) (MAX_NUM_DATA_PARAM_OPT.c_str(), po::value(&MaxNumData)->default_value(MAX_NUM_DATA_PARAM_DEFAULT), MAX_NUM_DATA_PARAM_MSG.c_str()) (METHOD_PARAM_OPT.c_str(), po::value(&MethodName)->required(), METHOD_PARAM_MSG.c_str()) (LOAD_INDEX_PARAM_OPT.c_str(), po::value(&LoadIndexLoc)->default_value(LOAD_INDEX_PARAM_DEFAULT), LOAD_INDEX_PARAM_MSG.c_str()) (SAVE_INDEX_PARAM_OPT.c_str(), po::value(&SaveIndexLoc)->default_value(SAVE_INDEX_PARAM_DEFAULT), SAVE_INDEX_PARAM_MSG.c_str()) + ("cacheData", po::bool_switch(&CacheData), "save/load data together with the index") (QUERY_TIME_PARAMS_PARAM_OPT.c_str(), po::value(&queryTimeParamStr)->default_value(""), QUERY_TIME_PARAMS_PARAM_MSG.c_str()) (INDEX_TIME_PARAMS_PARAM_OPT.c_str(), po::value(&indexTimeParamStr)->default_value(""), INDEX_TIME_PARAMS_PARAM_MSG.c_str()) ; @@ -487,11 +509,11 @@ void ParseCommandLineForServer(int argc, char*argv[], QueryTimeParams = shared_ptr(new AnyParams(desc)); } - if (DataFile.empty()) { + if (DataFile.empty() && !CacheData) { LOG(LIB_FATAL) << "data file is not specified!"; } - if (!DoesFileExist(DataFile)) { + if (!CacheData && !DoesFileExist(DataFile)) { LOG(LIB_FATAL) << "data file " << DataFile << " doesn't exist"; } } catch (const exception& e) { @@ -514,6 +536,7 @@ int main(int argc, char *argv[]) { std::shared_ptr IndexParams; std::shared_ptr QueryTimeParams; + bool CacheData; string LoadIndexLoc; string SaveIndexLoc; @@ -521,6 +544,7 @@ int main(int argc, char *argv[]) { debugPrint, LoadIndexLoc, SaveIndexLoc, + CacheData, port, threadQty, LogFile, @@ -549,6 +573,7 @@ int main(int argc, char *argv[]) { MethodName, LoadIndexLoc, SaveIndexLoc, + CacheData, *IndexParams, *QueryTimeParams)); } else if (DIST_TYPE_FLOAT == DistType) { @@ -560,6 +585,7 @@ int main(int argc, char *argv[]) { MethodName, LoadIndexLoc, SaveIndexLoc, + CacheData, *IndexParams, *QueryTimeParams)); } else if (DIST_TYPE_DOUBLE == DistType) { @@ -571,6 +597,7 @@ int main(int argc, char *argv[]) { MethodName, LoadIndexLoc, SaveIndexLoc, + CacheData, *IndexParams, *QueryTimeParams));