Created
April 15, 2022 22:56
-
-
Save tybl/e4b4b1678634c608a25753864e5fa03e to your computer and use it in GitHub Desktop.
string_split benchmarking
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <string_view> | |
#include <algorithm> | |
struct StringRange { | |
char const* begin; | |
char const* end; | |
}; | |
// uses string::find_first_of | |
std::vector<std::string> | |
split(const std::string& str, const std::string& delims = " ") | |
{ | |
std::vector<std::string> output; | |
//output.reserve(str.length() / 4); | |
size_t first = 0; | |
while (first < str.size()) | |
{ | |
const auto second = str.find_first_of(delims, first); | |
if (first != second) | |
{ | |
output.emplace_back(str.substr(first, second-first)); | |
} | |
if (second == std::string::npos) | |
break; | |
first = second + 1; | |
} | |
return output; | |
} | |
std::vector<std::string> | |
splitStd(const std::string& str, const std::string& delims = " ") | |
{ | |
std::vector<std::string> output; | |
auto first = std::cbegin(str); | |
while (first != std::cend(str)) | |
{ | |
const auto second = std::find_first_of(first, std::cend(str), | |
std::cbegin(delims), std::cend(delims)); | |
if (first != second) | |
output.emplace_back(first, second); | |
if (second == std::cend(str)) | |
break; | |
first = std::next(second); | |
} | |
return output; | |
} | |
std::vector<std::string> splitPtr(const std::string& str, const std::string& delims = " ") | |
{ | |
std::vector<std::string> output; | |
// output.reserve(str.size() / 2); | |
for (auto first = str.data(), second = str.data(), last = first + str.size(); second != last && first != last; first = second + 1) { | |
second = std::find_first_of(first, last, std::cbegin(delims), std::cend(delims)); | |
if (first != second) | |
output.emplace_back(first, second); | |
} | |
return output; | |
} | |
std::vector<std::string_view> | |
splitSV(std::string_view strv, std::string_view delims = " ") | |
{ | |
std::vector<std::string_view> output; | |
size_t first = 0; | |
while (first < strv.size()) | |
{ | |
const auto second = strv.find_first_of(delims, first); | |
if (first != second) | |
output.emplace_back(strv.substr(first, second-first)); | |
if (second == std::string_view::npos) | |
break; | |
first = second + 1; | |
} | |
return output; | |
} | |
std::vector<std::string_view> | |
splitSVStd(std::string_view strv, std::string_view delims = " ") | |
{ | |
std::vector<std::string_view> output; | |
//output.reserve(strv.length() / 4); | |
auto first = strv.begin(); | |
while (first != strv.end()) | |
{ | |
const auto second = std::find_first_of(first, std::cend(strv), | |
std::cbegin(delims), std::cend(delims)); | |
//std::cout << first << ", " << second << '\n'; | |
if (first != second) | |
{ | |
output.emplace_back(strv.substr(std::distance(strv.begin(), first), std::distance(first, second))); | |
} | |
if (second == strv.end()) | |
break; | |
first = std::next(second); | |
} | |
return output; | |
} | |
std::vector<std::string_view> splitSVPtr(std::string_view str, std::string_view delims = " ") | |
{ | |
std::vector<std::string_view> output; | |
//output.reserve(str.size() / 2); | |
for (auto first = str.data(), second = str.data(), last = first + str.size(); second != last && first != last; first = second + 1) { | |
second = std::find_first_of(first, last, std::cbegin(delims), std::cend(delims)); | |
if (first != second) | |
output.emplace_back(first, second - first); | |
} | |
return output; | |
} | |
auto split_ReserveExact(std::string_view p_str, char p_delim = ' ') -> std::vector<std::string_view> { | |
std::vector<std::string_view> result; | |
result.reserve(1UL + static_cast<size_t>(std::count(p_str.begin(), p_str.end(), p_delim))); | |
for (auto index = p_str.find(p_delim); index != std::string_view::npos; index = p_str.find(p_delim)) { | |
result.emplace_back(p_str.data(), index); | |
p_str.remove_prefix(index + 1); | |
} | |
result.emplace_back(p_str.data(), p_str.size()); | |
return result; | |
} | |
auto split_NoReserve(std::string_view p_str, char p_delim = ' ') -> std::vector<std::string_view> { | |
std::vector<std::string_view> result; | |
//result.reserve(1UL + static_cast<size_t>(std::count(p_str.begin(), p_str.end(), p_delim))); | |
for (auto index = p_str.find(p_delim); index != std::string_view::npos; index = p_str.find(p_delim)) { | |
result.emplace_back(p_str.data(), index); | |
p_str.remove_prefix(index + 1); | |
} | |
result.emplace_back(p_str.data(), p_str.size()); | |
return result; | |
} | |
auto split_ReserveEnough(std::string_view p_str, char p_delim = ' ') -> std::vector<std::string_view> { | |
std::vector<std::string_view> result; | |
result.reserve(p_str.size()); | |
for (auto index = p_str.find(p_delim); index != std::string_view::npos; index = p_str.find(p_delim)) { | |
result.emplace_back(p_str.data(), index); | |
p_str.remove_prefix(index + 1); | |
} | |
result.emplace_back(p_str.data(), p_str.size()); | |
return result; | |
} | |
auto split_StdFind(std::string_view p_str, char p_delim = ' ') -> std::vector<std::string_view> { | |
std::vector<std::string_view> result; | |
result.reserve(p_str.size()); | |
for (auto pos_p = std::find(p_str.cbegin(), p_str.cend(), p_delim); p_str.end() != pos_p; pos_p = std::find(p_str.begin(), p_str.end(), p_delim)) { | |
result.emplace_back(p_str.begin(), pos_p); | |
p_str = std::string_view(pos_p + 1, p_str.end()); | |
} | |
result.emplace_back(p_str.data(), p_str.size()); | |
return result; | |
} | |
auto split_Pair(std::string_view p_str, std::string_view p_delims = " ") -> std::vector<std::string_view> { | |
std::vector<std::string_view> result; | |
result.reserve(p_str.size()); | |
std::pair<char const*, char const*> sr{p_str.begin(), p_str.end()}; | |
for (auto pos_p = std::find_first_of(sr.first, sr.second, p_delims.cbegin(), p_delims.cend()); sr.second != pos_p; pos_p = std::find_first_of(sr.first, sr.second, p_delims.cbegin(), p_delims.cend())) { | |
result.emplace_back(sr.first, pos_p); | |
sr.first = std::next(pos_p); | |
} | |
result.emplace_back(sr.first, sr.second); | |
return result; | |
} | |
auto split_Singles(std::string_view p_str, std::string_view p_delims = " ") -> std::vector<std::string_view> { | |
std::vector<std::string_view> result; | |
result.reserve(p_str.size()); | |
auto first = p_str.cbegin(); | |
auto second = p_str.cend(); | |
for (auto pos_p = std::find_first_of(first, second, p_delims.cbegin(), p_delims.cend()); second != pos_p; pos_p = std::find_first_of(first, second, p_delims.cbegin(), p_delims.cend())) { | |
result.emplace_back(first, pos_p); | |
first = std::next(pos_p); | |
} | |
result.emplace_back(first, second); | |
return result; | |
} | |
inline auto split_on(std::string_view p_str, std::string_view p_delims = " ") -> std::vector<std::string_view> { | |
std::vector<std::string_view> result; | |
result.reserve(std::size(p_str)); | |
auto const* first_p = std::cbegin(p_str); | |
auto const* last_p = std::cend(p_str); | |
auto const* pos_p = std::find_first_of(first_p, last_p, p_delims.cbegin(), p_delims.cend()); | |
while (last_p != pos_p) { | |
result.emplace_back(first_p, pos_p); | |
first_p = std::next(pos_p); | |
pos_p = std::find_first_of(first_p, last_p, p_delims.cbegin(), p_delims.cend()); | |
} | |
result.emplace_back(first_p, last_p); | |
return result; | |
} | |
inline auto split_do(std::string_view p_str, std::string_view p_delims = " ") -> std::vector<std::string_view> { | |
std::vector<std::string_view> result; | |
auto const* first_p = std::cbegin(p_str); | |
auto const* last_p = std::cend(p_str); | |
auto const* pos_p = first_p; | |
do { | |
pos_p = std::find_first_of(first_p, last_p, p_delims.cbegin(), p_delims.cend()); | |
result.emplace_back(first_p, pos_p); | |
first_p = std::next(pos_p); | |
} while (last_p != pos_p); | |
return result; | |
} | |
const std::string_view LoremIpsumStrv{ "Lorem ipsum dolor sit amet, consectetur adipiscing elit," | |
"sed do eiusmod tempor incididuntsuperlongwordsuper ut labore et dut labore et dolore magna aliqua. Ut enim ad minim veniam, " | |
"quis nostrud exercitation ullamco laboris nisi ut aliquipsuperlongword ex ea commodo consequat. Duis aute" | |
"irure dolor in reprehenderit in voluptate velit esse cillum dut labore et dolore magna aliqua. Ut enim ad minim veniam, " | |
"quis nostrud exercitation ullamco laboris nisi ut aliquiut labore et dolore magna aliqua. Ut enim ad minim veniam, " | |
"quis nostrud exercitation ullamco laboris nisi ut aliquipsuperlongword ex ea commodo consequat. Duis aute" | |
"irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur." | |
"Excepteur sint occaecat cupidatatsuperlongword non proident, sunt ipsuperlongword ex ea commodo consequat. Duis aute" | |
"irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur." | |
"Excepteur sint occaecat cupidatatsuperlongword non proident, sunt iolore eu fugiat nulla pariatur." | |
"Excepteur sint occaecat cupidatatsuperlongword non proident, sunt iolore magna aliqua. Ut enim ad minim veniam, " | |
"quis nostrud exercitation ullamco laboris nisi ut aliquipsuperlongword ex ea commodo consequat. Duis aute" | |
"irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur." | |
"Excepteur sint occaecat cupidatatsuperlongword non proident, sunt in culpa qui officia deserunt mollit anim id est laborum." }; | |
#if 0 | |
static void StringSplit(benchmark::State& state) { | |
std::string str { LoremIpsumStrv }; | |
// Code inside this loop is measured repeatedly | |
for (auto _ : state) { | |
auto v = split(str); | |
benchmark::DoNotOptimize(v); | |
} | |
} | |
// Register the function as a benchmark | |
BENCHMARK(StringSplit); | |
static void StringSplitStd(benchmark::State& state) { | |
std::string str { LoremIpsumStrv }; | |
// Code inside this loop is measured repeatedly | |
for (auto _ : state) { | |
auto v = splitStd(str); | |
benchmark::DoNotOptimize(v); | |
} | |
} | |
// Register the function as a benchmark | |
BENCHMARK(StringSplitStd); | |
static void StringSplitPtr(benchmark::State& state) { | |
std::string str { LoremIpsumStrv }; | |
// Code inside this loop is measured repeatedly | |
for (auto _ : state) { | |
auto v = splitPtr(str); | |
benchmark::DoNotOptimize(v); | |
} | |
} | |
// Register the function as a benchmark | |
BENCHMARK(StringSplitPtr); | |
static void StringViewSplit(benchmark::State& state) { | |
for (auto _ : state) { | |
auto v = splitSV(LoremIpsumStrv); | |
benchmark::DoNotOptimize(v); | |
} | |
} | |
BENCHMARK(StringViewSplit); | |
static void StringViewSplitStd(benchmark::State& state) { | |
for (auto _ : state) { | |
auto v = splitSVStd(LoremIpsumStrv); | |
benchmark::DoNotOptimize(v); | |
} | |
} | |
BENCHMARK(StringViewSplitStd); | |
#endif | |
static void StringViewSplitPtr(benchmark::State& state) { | |
for (auto _ : state) { | |
auto v = splitSVPtr(LoremIpsumStrv); | |
benchmark::DoNotOptimize(v); | |
} | |
} | |
BENCHMARK(StringViewSplitPtr); | |
#if 0 | |
static void Split_ReserveExact(benchmark::State& state) { | |
for (auto _ : state) { | |
auto v = split_ReserveExact(LoremIpsumStrv); | |
benchmark::DoNotOptimize(v); | |
} | |
} | |
BENCHMARK(Split_ReserveExact); | |
static void Split_NoReserve(benchmark::State& state) { | |
for (auto _ : state) { | |
auto v = split_NoReserve(LoremIpsumStrv); | |
benchmark::DoNotOptimize(v); | |
} | |
} | |
BENCHMARK(Split_NoReserve); | |
static void Split_ReserveEnough(benchmark::State& state) { | |
for (auto _ : state) { | |
auto v = split_ReserveEnough(LoremIpsumStrv); | |
benchmark::DoNotOptimize(v); | |
} | |
} | |
BENCHMARK(Split_ReserveEnough); | |
static void Split_StdFind(benchmark::State& state) { | |
for (auto _ : state) { | |
auto v = split_StdFind(LoremIpsumStrv); | |
benchmark::DoNotOptimize(v); | |
} | |
} | |
BENCHMARK(Split_StdFind); | |
#endif | |
static void Split_Pair(benchmark::State& state) { | |
for (auto _ : state) { | |
auto v = split_Pair(LoremIpsumStrv); | |
benchmark::DoNotOptimize(v); | |
} | |
} | |
BENCHMARK(Split_Pair); | |
static void Split_Singles(benchmark::State& state) { | |
for (auto _ : state) { | |
auto v = split_Singles(LoremIpsumStrv); | |
benchmark::DoNotOptimize(v); | |
} | |
} | |
BENCHMARK(Split_Singles); | |
static void Split_On(benchmark::State& state) { | |
for (auto _ : state) { | |
auto v = split_on(LoremIpsumStrv); | |
benchmark::DoNotOptimize(v); | |
} | |
} | |
BENCHMARK(Split_On); | |
static void Split_Do(benchmark::State& state) { | |
for (auto _ : state) { | |
auto v = split_do(LoremIpsumStrv); | |
benchmark::DoNotOptimize(v); | |
} | |
} | |
BENCHMARK(Split_Do); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment