Mph
C++20 [Minimal] Static Perfect Hash library
Install / Use
/learn @qlibs/MphREADME
// <!-- // The MIT License (MIT) // // Copyright (c) 2024 Kris Jusiak <kris@jusiak.net> // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal // in the Software without restriction, including without limitation the rights // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell // copies of the Software, and to permit persons to whom the Software is // furnished to do so, subject to the following conditions: // // The above copyright notice and this permission notice shall be included in all // copies or substantial portions of the Software. // // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE // SOFTWARE. // #if 0 // --> Overview / Examples / API / FAQ / Resources
mph: [Minimal] Static perfect hash library
https://en.wikipedia.org/wiki/Perfect_hash_function
Use cases
A static perfect hash function maps a set of keys known in advance to a set of values with no collisions
Features
- Single header (https://raw.githubusercontent.com/qlibs/mph/main/mph) / C++20 module (https://raw.githubusercontent.com/qlibs/mph/main/mph.cppm)
- Self verification upon include (can be disabled by
-DNTEST- see FAQ) - Compiles cleanly with (
-Wall -Wextra -Werror -pedantic -pedantic-errors -fno-exceptions -fno-rtti) - Minimal API
- Optimized run-time execution (see performance / benchmarks)
- Fast compilation times (see compilation)
- Trade-offs (see FAQ)
Requirements
- C++20 (gcc-12+, clang-15+) / [optional] (bmi2), [optional] (simd),
Overview
Hello world (https://godbolt.org/z/dzd6o3Pxo)
enum class color { red, green, blue };
constexpr auto colors = std::array{
std::pair{"red"sv, color::red},
std::pair{"green"sv, color::green},
std::pair{"blue"sv, color::blue},
};
static_assert(color::green == mph::lookup<colors>("green"));
static_assert(color::red == mph::lookup<colors>("red"));
static_assert(color::blue == mph::lookup<colors>("blue"));
std::print("{}", mph::lookup<colors>("green"sv)); // prints 1
[!NOTE]
mph::lookupassumes only valid input and returns mapped value direclty.
static_assert(not mph::find<colors>("unknown"));
static_assert(mph::find<colors>("green"));
static_assert(mph::find<colors>("red"));
static_assert(mph::find<colors>("blue"));
std::print("{}", *mph::find<colors>("green"sv)); // prints 1
[!NOTE]
mph::finddoesnt assume valid input and returns optional of mapped value.
<a name="performance"></a>
[Minimal] Lookup (https://godbolt.org/z/rqYj9a1cr)
int lookup(int id) {
static constexpr std::array ids{
std::pair{54u, 91u},
std::pair{64u, 324u},
std::pair{91u, 234u},
};
return mph::lookup<ids>(id);
}
lookup: // g++ -DNDEBUG -std=c++20 -O3
imull $1275516394, %edi, %eax
shrl $23, %eax
movl $24029728, %ecx
shrxl %eax, %ecx, %eax
andl $511, %eax
retq
Lookup (https://godbolt.org/z/vv6W4nGfb)
int lookup(int id) {
static constexpr std::array ids{
std::pair{54u, 91u},
std::pair{324u, 54u},
std::pair{64u, 324u},
std::pair{234u, 64u},
std::pair{91u, 234u},
};
return mph::lookup<ids>(id);
}
lookup: // g++ -DNDEBUG -std=c++20 -O3
andl $7, %edi
leaq lookup(%rip), %rax
movl (%rax,%rdi,4), %eax
retq
lookup:
.long 324
.long 0
.long 64
.long 234
.long 54
.long 0
.long 91
Find (https://godbolt.org/z/qMzxKK4sd)
int find(int id) {
static constexpr std::array ids{
std::pair{27629, 1},
std::pair{6280, 2},
// 1..128 pairs...
std::pair{33691, 128},
};
return *mph::find<ids>(id);
}
find: // g++ -DNDEBUG -std=c++20 -O3 -mbmi2 -mavx512f
vpbroadcastd %edi, %zmm0
shll $4, %edi
movzbl %dil, %ecx
leaq find
vpcmpeqd (%rdx,%rcx,4), %zmm0, %k0
kmovw %k0, %esi
kortestw %k0, %k0
rep bsfq %rax, %rax
movl $64, %eax
addl %eax, %ecx
xorl %eax, %eax
testw %si, %si
cmovnel 1024(%rdx,%rcx,4), %eax
vzeroupper
retq
find:
... // see godbolt
Find Strings (https://godbolt.org/z/KaKzf7Pax)
int find(std::span<const char, 8> str) {
static constexpr auto symbols = std::array{
std::pair{"AMZN "sv, 1},
std::pair{"AAPL "sv, 2},
std::pair{"GOOGL "sv, 3},
std::pair{"META "sv, 4},
std::pair{"MSFT "sv, 5},
std::pair{"NVDA "sv, 6},
std::pair{"TSLA "sv, 7},
};
return *mph::find<symbols>(str);
}
find: // g++ -DNDEBUG -std=c++20 -O3 -mbmi2
movq 8(%rsi), %rax
movl $1031, %ecx
leaq find(%rip), %rdx
xorl %esi, %esi
movq (%rax), %rax
pextq %rcx, %rax, %rcx
shll $4, %ecx
cmpq (%rcx,%rdx), %rax
movzbl 8(%rcx,%rdx), %eax
cmovnel %esi, %eax
retq
find:
... // see godbolt
Find Strings (https://godbolt.org/z/fdMPsYWjE)
int find(std::string_view str) {
using std::literals::operator""sv;
// values assigned from 0..N-1
static constexpr std::array symbols{
"BTC "sv, "ETH "sv, "BNB "sv,
"SOL "sv, "XRP "sv, "DOGE"sv,
"TON "sv, "ADA "sv, "SHIB"sv,
"AVAX"sv, "LINK"sv, "BCH "sv,
};
return *mph::find<symbols>(str);
}
find: // g++ -DNDEBUG -std=c++20 -O3 -mbmi2
shll $3, %edi
bzhil %edi, (%rsi), %eax
movl $789, %ecx
pextl %ecx, %eax, %ecx
leaq find(%rip), %rdx
xorl %esi, %esi
cmpl (%rdx,%rcx,8), %eax
movzbl 4(%rdx,%rcx,8), %eax
cmovnel %esi, %eax
retq
find:
... // see godbolt
Examples
- [feature]
lookup/findcustomization point - https://godbolt.org/z/enqeGxKK9 - [feature]
tocustomization point - https://godbolt.org/z/jTMx4n6j3 - [example]
branchless dispatcher- https://godbolt.org/z/5PTE3ercE - [performance - https://wg21.link/P2996]
enum_to_string- https://godbolt.org/z/ojohP6j7f - [performance - https://wg21.link/P2996]
string_to_enum- https://godbolt.org/z/83vGhY7M8
<a name="benchmarks"></a>
Benchmarks (run-time)
https://github.com/qlibs/mph/tree/benchmark
clang++ -std=c++20 -O3 -DNDEBUG -mbmi2 benchmark.cpp
| ns/op | op/s | err% |total | benchmark
|------:|---------------:|-----:|-----:|:----------
| 12.25 | 81,602,449.70 | 0.3% | 0.15 | `random_strings_5_len_4.std.map`
| 5.56 | 179,750,906.50 | 0.2% | 0.07 | `random_strings_5_len_4.std.unordered_map`
| 9.17 | 109,096,850.98 | 0.2% | 0.11 | `random_strings_5_len_4.boost.unordered_map`
| 13.48 | 74,210,250.54 | 0.3% | 0.16 | `random_strings_5_len_4.boost.flat_map`
| 7.70 | 129,942,965.18 | 0.3% | 0.09 | `random_strings_5_len_4.gperf`
| 1.61 | 621,532,188.81 | 0.1% | 0.02 | `random_strings_5_len_4.mph`
| 14.66 | 68,218,086.71 | 0.8% | 0.18 | `random_strings_5_len_8.std.map`
| 13.45 | 74,365,239.56 | 0.2% | 0.16 | `random_strings_5_len_8.std.unordered_map`
| 9.68 | 103,355,605.09 | 0.2% | 0.12 | `random_strings_5_len_8.boost.unordered_map`
| 16.00 | 62,517,180.19 | 0.4% | 0.19 | `random_strings_5_len_8.boost.flat_map`
| 7.70 | 129,809,356.36 | 0.3% | 0.09 | `random_strings_5_len_8.gperf`
| 1.58 | 633,084,194.24 | 0.1% | 0.02 | `random_strings_5_len_8.mph`
| 17.21 | 58,109,576.87 | 0.3% | 0.21 | `random_strings_6_len_2_5.std.map`
| 15.28 | 65,461,167.99 | 0.2% | 0.18 | `random_strings_6_len_2_5.std.unordered_map`
| 12.21 | 81,931,391.20 | 0.4% | 0.15 | `random_strings_6_len_2_5.boost.unordered_map`
| 17.15 | 58,323,741.08 | 0.5% | 0.21 | `random_strings_6_len_2_5.boost.flat_map`
| 7.94 | 125,883,197.55 | 0.5% | 0.09 | `random_strings_6_len_2_5.gperf`
| 6.05 | 165,239,616.00 | 0.5% | 0.07 | `random_strings_6_len_2_5.mph`
| 31.61 | 31,631,402.94 | 0.2% | 0.38 | `random_strings_100_len_8.std.map`
| 15.32 | 65,280,594.09 | 0.2% | 0.18 | `random_strings_100_len_8.std.unordered_map`
| 17.13 | 58,383,850.20 | 0.3% | 0.20 | `random_strings_100_len_8.boost.unordered_map`
| 31.42 | 31,822,519.67 | 0.2% | 0.38 | `random_strings_100_len_8.boost.flat_map`
| 8.04 | 124,397,773.85 | 0.2% | 0.10 | `random_strings_100_len_8.gperf`
| 1.58 | 632,813,481.73 | 0.1% | 0.02 | `random_strings_100_len_8.mph`
| 32.62 | 30,656,015.03 | 0.3% | 0.39 | `random_strings_100_len_1_8.std.map`
| 19.34 | 51,697,107.73 | 0.5% | 0.23 | `random_strings_100_len_1_8.std.unordered_map`
| 19.51 | 51,254,525.17 | 0.3% | 0.23 | `random_strings_100_len_1_8.boost.unordered_map`
| 33.58 | 29,780,574.17 | 0.6% | 0.40 | `random_strings_100_len_1_8.boost.flat_map`
| 13.06 | 76,577,037.07 | 0.7% | 0.16 | `random_strings_100_len_1_8.gperf`
| 6.02 | 166,100,665.07 | 0.2% | 0.07 | `random_strings_100_len_1_8.mph`
| 1.28 | 778,7
