Skip to content

Commit 0d21951

Browse files
committed
More hashing functionality.
1 parent 55dbe68 commit 0d21951

File tree

2 files changed

+86
-40
lines changed

2 files changed

+86
-40
lines changed

src_basic/hash_functions.h

Lines changed: 31 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -281,7 +281,7 @@ uint32_t murmur3_32(const uint8_t *key, size_t len, uint32_t seed) {
281281
// Add null pointer check
282282
if (key == nullptr) return seed;
283283
if (len == 0) return seed;
284-
284+
285285
uint32_t h = seed;
286286
uint32_t k;
287287
/* Read in groups of 4. */
@@ -319,7 +319,7 @@ template <typename T> inline T unaligned_load(void const *ptr) noexcept {
319319
// compiler should optimize this very well anyways.
320320
static_assert(std::is_trivially_copyable_v<T>, "T must be trivially copyable");
321321
if (ptr == nullptr) return T{};
322-
322+
323323
T t;
324324
std::memcpy(&t, ptr, sizeof(T));
325325
return t;
@@ -394,19 +394,19 @@ namespace hash_utils {
394394
inline void hash_combine(std::size_t& seed, std::size_t hash) noexcept {
395395
seed ^= hash + 0x9e3779b9 + (seed << 6) + (seed >> 2);
396396
}
397-
397+
398398
// Safe wrapper for murmur3_32 with bounds checking
399399
inline uint32_t safe_murmur3_32(const void* data, size_t len, uint32_t seed) {
400400
if (len == 0 || data == nullptr) return seed;
401401
return murmur3_32(static_cast<const uint8_t*>(data), len, seed);
402402
}
403-
403+
404404
// Type-safe memory hashing for arithmetic types
405405
template<typename T>
406406
inline uint32_t hash_arithmetic_array(const T* data, size_t count, uint32_t seed) {
407407
static_assert(std::is_arithmetic_v<T>, "T must be arithmetic type");
408408
if (count == 0 || data == nullptr) return seed;
409-
409+
410410
const size_t byte_size = sizeof(T) * count;
411411
const uint8_t* byte_data = reinterpret_cast<const uint8_t*>(data);
412412
return safe_murmur3_32(byte_data, byte_size, seed);
@@ -418,7 +418,7 @@ size_t ComputeHashTwoMap(size_t const &seed,
418418
std::map<T, size_t> const &ListDiagNorm,
419419
std::map<T, size_t> const &ListOffDiagNorm) {
420420
size_t hash = seed;
421-
421+
422422
auto update_from_map = [&](const std::map<T, size_t>& map) {
423423
for (const auto& kv : map) {
424424
size_t hash1 = std::hash<T>{}(kv.first);
@@ -427,34 +427,41 @@ size_t ComputeHashTwoMap(size_t const &seed,
427427
hash_utils::hash_combine(hash, hash2);
428428
}
429429
};
430-
430+
431431
update_from_map(ListDiagNorm);
432432
update_from_map(ListOffDiagNorm);
433433
return hash;
434434
}
435435

436+
template <typename T>
437+
size_t std_vector_hash(std::vector<T> const& V, size_t const& seed) {
438+
size_t ret_hash = seed;
439+
size_t hash_size = std::hash<size_t>{}(V.size());
440+
hash_utils::hash_combine(ret_hash, hash_size);
441+
442+
if constexpr (std::is_arithmetic_v<T>) {
443+
return hash_utils::hash_arithmetic_array(V.data(), V.size(), ret_hash);
444+
} else {
445+
for (const auto& elem : V) {
446+
std::size_t elem_hash = std::hash<T>{}(elem);
447+
hash_utils::hash_combine(ret_hash, elem_hash);
448+
}
449+
return ret_hash;
450+
}
451+
}
452+
453+
454+
436455
namespace std {
437-
template <typename T>
456+
template <typename T>
438457
struct hash<std::vector<T>> {
439458
std::size_t operator()(const std::vector<T>& V) const {
440-
if (V.empty()) return 0;
441-
442-
if constexpr (std::is_arithmetic_v<T>) {
443-
// Use safe arithmetic hashing
444-
uint32_t seed = 0x1b873540;
445-
return hash_utils::hash_arithmetic_array(V.data(), V.size(), seed);
446-
} else {
447-
// Use proper hash combining for non-arithmetic types
448-
std::size_t seed = 0;
449-
for (const auto& elem : V) {
450-
std::size_t elem_hash = std::hash<T>{}(elem);
451-
hash_utils::hash_combine(seed, elem_hash);
452-
}
453-
return seed;
454-
}
459+
size_t seed = 1234;
460+
return std_vector_hash<T>(V, seed);
455461
}
456462
};
457-
template <typename T1, typename T2>
463+
464+
template <typename T1, typename T2>
458465
struct hash<std::pair<T1, T2>> {
459466
std::size_t operator()(const std::pair<T1, T2>& ePair) const {
460467
std::size_t seed = std::hash<T1>{}(ePair.first);

src_matrix/MAT_Matrix.h

Lines changed: 55 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -3233,49 +3233,86 @@ template <typename T> bool IsSymmetricMatrix(MyMatrix<T> const &M) {
32333233
return true;
32343234
}
32353235

3236+
template <typename T>
3237+
size_t hash_matrix_sizes(MyMatrix<T> const& M, size_t const& seed) {
3238+
size_t ret_hash = seed;
3239+
size_t hash_nrow = std::hash<int>{}(M.rows());
3240+
size_t hash_ncol = std::hash<int>{}(M.cols());
3241+
hash_utils::hash_combine(ret_hash, hash_nrow);
3242+
hash_utils::hash_combine(ret_hash, hash_ncol);
3243+
return ret_hash;
3244+
}
3245+
3246+
template <typename T>
3247+
size_t hash_vector_size(MyVector<T> const& V, size_t const& seed) {
3248+
size_t ret_hash = seed;
3249+
size_t hash_size = std::hash<int>{}(V.size());
3250+
hash_utils::hash_combine(ret_hash, hash_size);
3251+
return ret_hash;
3252+
}
3253+
3254+
3255+
32363256
template <typename T>
32373257
inline typename std::enable_if<!std::is_arithmetic<T>::value, size_t>::type
32383258
Matrix_Hash(MyMatrix<T> const &M, size_t const &seed) {
3239-
if (M.size() == 0) return seed;
3259+
size_t ret_hash = hash_matrix_sizes(M, seed);
3260+
int nbRow = M.rows();
3261+
int nbCol = M.cols();
32403262

3241-
size_t result = seed;
3263+
for (int iRow = 0; iRow < nbRow; iRow++) {
3264+
for (int iCol = 0; iCol < nbCol; iCol++) {
3265+
size_t hash_elem = std::hash<T>{}(M(iRow, iCol));
3266+
hash_utils::hash_combine(ret_hash, hash_elem);
3267+
}
3268+
}
3269+
return ret_hash;
3270+
}
3271+
3272+
// The type independent hash.
3273+
// That is if a matrix can be represented in in16_t, int32_t, int64_t, mpz_class
3274+
// then it has the same hash however it is represented.
3275+
// In practice this is done via writing to a string.
3276+
template<typename T>
3277+
size_t matrix_type_independent_hash(MyMatrix<T> const& M, size_t const& seed) {
3278+
size_t ret_hash = hash_matrix_sizes(M, seed);
32423279
int nbRow = M.rows();
32433280
int nbCol = M.cols();
3244-
3281+
std::stringstream s;
32453282
for (int iRow = 0; iRow < nbRow; iRow++) {
32463283
for (int iCol = 0; iCol < nbCol; iCol++) {
3247-
size_t elem_hash = std::hash<T>{}(M(iRow, iCol));
3248-
hash_utils::hash_combine(result, elem_hash);
3284+
s << " " << M(iRow,iCol);
32493285
}
32503286
}
3251-
return result;
3287+
std::string stro = s.str();
3288+
size_t hash_stro = std::hash<std::string>()(stro);
3289+
hash_utils::hash_combine(ret_hash, hash_stro);
3290+
return ret_hash;
32523291
}
32533292

32543293
template <typename T>
32553294
inline typename std::enable_if<std::is_arithmetic<T>::value, size_t>::type
32563295
Matrix_Hash(MyMatrix<T> const &M, size_t const &seed) {
3257-
if (M.size() == 0) return seed;
3258-
return hash_utils::hash_arithmetic_array(M.data(), M.size(), seed);
3296+
size_t ret_hash = hash_matrix_sizes(M, seed);
3297+
return hash_utils::hash_arithmetic_array(M.data(), M.size(), ret_hash);
32593298
}
32603299

32613300
template <typename T>
32623301
inline typename std::enable_if<!std::is_arithmetic<T>::value, size_t>::type
32633302
Vector_Hash(MyVector<T> const &V, size_t const &seed) {
3264-
if (V.size() == 0) return seed;
3265-
3266-
size_t result = seed;
3303+
size_t ret_hash = hash_vector_size(V, seed);
32673304
for (int i = 0; i < V.size(); i++) {
32683305
size_t elem_hash = std::hash<T>{}(V(i));
3269-
hash_utils::hash_combine(result, elem_hash);
3306+
hash_utils::hash_combine(ret_hash, elem_hash);
32703307
}
3271-
return result;
3308+
return ret_hash;
32723309
}
32733310

32743311
template <typename T>
32753312
inline typename std::enable_if<std::is_arithmetic<T>::value, size_t>::type
32763313
Vector_Hash(MyVector<T> const &V, size_t const &seed) {
3277-
if (V.size() == 0) return seed;
3278-
return hash_utils::hash_arithmetic_array(V.data(), V.size(), seed);
3314+
size_t ret_hash = hash_vector_size(V, seed);
3315+
return hash_utils::hash_arithmetic_array(V.data(), V.size(), ret_hash);
32793316
}
32803317

32813318
namespace std {
@@ -3285,12 +3322,14 @@ template <typename T> struct hash<MyVector<T>> {
32853322
return Vector_Hash(e_val, seed);
32863323
}
32873324
};
3325+
32883326
template <typename T> struct hash<MyMatrix<T>> {
3289-
std::size_t operator()(const MyMatrix<T> &e_val) const {
3327+
std::size_t operator()(const MyMatrix<T> &e_val) const {
32903328
size_t seed = 0x1b873540;
32913329
return Matrix_Hash(e_val, seed);
32923330
}
32933331
};
3332+
32943333
// clang-format off
32953334
} // namespace std
32963335
// clang-format on

0 commit comments

Comments
 (0)