Skip to content

Commit

Permalink
Towards replacing stringstream with atol, atof, and atod #69
Browse files Browse the repository at this point in the history
  • Loading branch information
searchivairus committed Dec 8, 2017
1 parent d79b5b4 commit b854b20
Show file tree
Hide file tree
Showing 4 changed files with 281 additions and 47 deletions.
258 changes: 258 additions & 0 deletions similarity_search/include/read_data.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,258 @@
#ifndef READ_DATA_H
#define READ_DATA_H

#include <cmath>
#include <string>
#include <limits>
#include <iomanip>
#include <sstream>
#include <cstdio>
#include <vector>

namespace similarity {

using std::string;
using std::vector;
using std::stringstream;

template <typename dist_t>
struct SparseVectElem {
uint32_t id_;
dist_t val_;
SparseVectElem(uint32_t id = 0, dist_t val = 0) : id_(id), val_(val) {}
bool operator<(const SparseVectElem<dist_t>& that) const {
return id_ < that.id_;
}
bool operator==(const SparseVectElem<dist_t>& that) const {
return id_ == that.id_ && val_ == that.val_;
}
bool operator!=(const SparseVectElem<dist_t>& that) const {
return !operator==(that);
}
};

template <typename dist_t>
inline ostream& operator<<(ostream& out, SparseVectElem<dist_t> e) {
return out << "[" << e.id_ << ": " << e.val_ << "]";
}

inline int strtoi_wrapper(const char* ptr, char** endPtr) {
errno = 0;
long val = strtol(ptr, endPtr, 10);
if (errno == ERANGE){
return 0;
}
if (val < INT_MIN || val > INT_MAX) {
*endPtr = const_cast<char*>(ptr);
errno = ERANGE;
return 0;
}
return static_cast<int>(val);
}

template <typename T>
inline bool ReadVecDataViaStream(string line, vector<T>& res) {
try {
res.clear();
ReplaceSomePunct(line);

stringstream str(line);
str.exceptions(ios::badbit);

T val;
while (str >> val) {
res.push_back(val);
}
} catch (const exception& e) {
LOG(LIB_ERROR) << "Exception: " << e.what();
return false;
}
return true;
}

template <typename T>
inline bool ReadVecDataEfficiently(string line, vector<T>& res);

template <>
inline bool ReadVecDataEfficiently<float>(string line, vector<float>& res) {
ReplaceSomePunct(line);
const char *ptr = line.c_str();
char *endPtr = nullptr;

res.clear();
errno = 0;

for (float val = strtof(ptr, &endPtr);
ptr != endPtr;
val = strtof(ptr, &endPtr)) {
ptr = endPtr;
if (errno == ERANGE){
errno = 0;
return false;
}
res.push_back(val);
}

if (errno == ERANGE){
errno = 0;
return false;
}

return true;
}

template <>
inline bool ReadVecDataEfficiently<double>(string line, vector<double>& res) {
ReplaceSomePunct(line);
const char *ptr = line.c_str();
char *endPtr = nullptr;

res.clear();
errno = 0;

for (double val = strtod(ptr, &endPtr);
ptr != endPtr;
val = strtod(ptr, &endPtr)) {
ptr = endPtr;
if (errno == ERANGE){
errno = 0;
return false;
}
res.push_back(val);
}

if (errno == ERANGE){
errno = 0;
return false;
}

return true;
}


template <>
inline bool ReadVecDataEfficiently<int>(string line, vector<int>& res) {
ReplaceSomePunct(line);
const char *ptr = line.c_str();
char *endPtr = nullptr;

res.clear();
errno = 0;

for (int val = strtoi_wrapper(ptr, &endPtr);
ptr != endPtr;
val = strtoi_wrapper(ptr, &endPtr)) {
ptr = endPtr;
if (errno == ERANGE){
errno = 0;
return false;
}
res.push_back(val);
}

if (errno == ERANGE){
errno = 0;
return false;
}

return true;
}


template <typename T>
inline bool ReadSparseVecDataViaStream(string line, vector<SparseVectElem<T>>& res) {
try {
ReplaceSomePunct(line);
std::stringstream str(line);
str.exceptions(std::ios::badbit);

res.clear();

uint32_t id;
T val;

while (str >> id && str >> val) {
res.push_back(SparseVectElem<T>(id, val));
}
} catch (const exception& e) {
LOG(LIB_ERROR) << "Exception: " << e.what();
return false;
}

return true;
}

template <typename T>
inline bool ReadSparseVecDataEfficiently(string line, vector<SparseVectElem<T>>& res);

template <>
inline bool ReadSparseVecDataEfficiently<float>(string line, vector<SparseVectElem<float>>& res) {
ReplaceSomePunct(line);
const char *ptr = line.c_str();
char *endPtr = nullptr;

float val; IdType id;

res.clear();
errno = 0;

while (true) {
if (endPtr != nullptr) ptr = endPtr;
id = strtoi_wrapper(ptr, &endPtr);
if (errno == ERANGE){
errno = 0;
return false;
}
if (ptr == endPtr) break;

ptr = endPtr;
val = strtof(ptr, &endPtr);
if (errno == ERANGE) {
errno = 0;
return false;
}
if (ptr == endPtr) return false;

res.push_back(SparseVectElem<float>(id, val));
};

return true;
}

template <>
inline bool ReadSparseVecDataEfficiently<double>(string line, vector<SparseVectElem<double>>& res) {
ReplaceSomePunct(line);
const char *ptr = line.c_str();
char *endPtr = nullptr;

double val; IdType id;

res.clear();
errno = 0;

while (true) {
if (endPtr != nullptr) ptr = endPtr;
id = strtoi_wrapper(ptr, &endPtr);
if (errno == ERANGE){
errno = 0;
return false;
}
if (ptr == endPtr) break;

ptr = endPtr;
val = strtod(ptr, &endPtr);
if (errno == ERANGE) {
errno = 0;
return false;
}
if (ptr == endPtr) return false;

res.push_back(SparseVectElem<double>(id, val));
};

return true;
}

}

#endif
22 changes: 1 addition & 21 deletions similarity_search/include/space/space_sparse_vector.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,33 +31,13 @@
#include "utils.h"
#include "space.h"
#include "distcomp.h"
#include "read_data.h"

namespace similarity {

using std::vector;
using std::fill;

template <typename dist_t>
struct SparseVectElem {
uint32_t id_;
dist_t val_;
SparseVectElem(uint32_t id = 0, dist_t val = 0) : id_(id), val_(val) {}
bool operator<(const SparseVectElem<dist_t>& that) const {
return id_ < that.id_;
}
bool operator==(const SparseVectElem<dist_t>& that) const {
return id_ == that.id_ && val_ == that.val_;
}
bool operator!=(const SparseVectElem<dist_t>& that) const {
return !operator==(that);
}
};

template <typename dist_t>
ostream& operator<<(ostream& out, SparseVectElem<dist_t> e) {
return out << "[" << e.id_ << ": " << e.val_ << "]";
}

/*
* The maximum number of sparse elements that will be kept on the stack
* by the function ComputeDistanceHelper.
Expand Down
22 changes: 11 additions & 11 deletions similarity_search/src/space/space_sparse_vector.cc
Original file line number Diff line number Diff line change
Expand Up @@ -27,30 +27,30 @@
#include "logging.h"
#include "distcomp.h"
#include "experimentconf.h"
#include "read_data.h"

namespace similarity {

using namespace std;

template <typename dist_t>
void SpaceSparseVector<dist_t>::ReadSparseVec(std::string line, size_t line_num, LabelType& label, vector<ElemType>& v) const
void SpaceSparseVector<dist_t>::ReadSparseVec(string line, size_t line_num, LabelType& label, vector<ElemType>& v) const
{
v.clear();

label = Object::extractLabel(line);

ReplaceSomePunct(line);
std::stringstream str(line);

str.exceptions(std::ios::badbit);

uint32_t id;
dist_t val;
#if 0
if (!ReadSparseVecDataViaStream(line, v)) {
#else
if (!ReadSparseVecDataEfficiently(line, v)) {
#endif
PREPARE_RUNTIME_ERR(err) << "Failed to parse the line # " << line_num << ": '" << line << "'" << std::endl;
LOG(LIB_ERROR) << err.stream().str();
THROW_RUNTIME_ERR(err);
}

try {
while (str >> id && str >> val) {
v.push_back(ElemType(id, val));
}
sort(v.begin(), v.end());

for (unsigned i = 1; i < v.size(); ++i) {
Expand Down
26 changes: 11 additions & 15 deletions similarity_search/src/space/space_vector.cc
Original file line number Diff line number Diff line change
Expand Up @@ -22,13 +22,16 @@
#include <memory>
#include <iomanip>
#include <limits>
#include <cstdio>
#include <cstdint>

#include "object.h"
#include "utils.h"
#include "logging.h"
#include "distcomp.h"
#include "experimentconf.h"
#include "space/space_vector.h"
#include "read_data.h"

namespace similarity {

Expand Down Expand Up @@ -126,21 +129,14 @@ void VectorSpace<dist_t>::ReadVec(string line, LabelType& label, vector<dist_t>&

label = Object::extractLabel(line);

ReplaceSomePunct(line);
stringstream str(line);

str.exceptions(ios::badbit);

dist_t val;


try {
while (str >> val) {
v.push_back(val);
}
} catch (const exception &e) {
LOG(LIB_ERROR) << "Exception: " << e.what();
LOG(LIB_FATAL) << "Failed to parse the line: '" << line << "'";
#if 0
if (!ReadVecDataViaStream(line, v)) {
#else
if (!ReadVecDataEfficiently(line, v)) {
#endif
PREPARE_RUNTIME_ERR(err) << "Failed to parse the line: '" << line << "'";
LOG(LIB_ERROR) << err.stream().str();
THROW_RUNTIME_ERR(err);
}
}

Expand Down

0 comments on commit b854b20

Please sign in to comment.