Skip to content

Instantly share code, notes, and snippets.

@tybl
Created April 15, 2022 22:56
Show Gist options
  • Save tybl/e4b4b1678634c608a25753864e5fa03e to your computer and use it in GitHub Desktop.
Save tybl/e4b4b1678634c608a25753864e5fa03e to your computer and use it in GitHub Desktop.
string_split benchmarking
#include <string_view>
#include <algorithm>
struct StringRange {
char const* begin;
char const* end;
};
// uses string::find_first_of
std::vector<std::string>
split(const std::string& str, const std::string& delims = " ")
{
std::vector<std::string> output;
//output.reserve(str.length() / 4);
size_t first = 0;
while (first < str.size())
{
const auto second = str.find_first_of(delims, first);
if (first != second)
{
output.emplace_back(str.substr(first, second-first));
}
if (second == std::string::npos)
break;
first = second + 1;
}
return output;
}
std::vector<std::string>
splitStd(const std::string& str, const std::string& delims = " ")
{
std::vector<std::string> output;
auto first = std::cbegin(str);
while (first != std::cend(str))
{
const auto second = std::find_first_of(first, std::cend(str),
std::cbegin(delims), std::cend(delims));
if (first != second)
output.emplace_back(first, second);
if (second == std::cend(str))
break;
first = std::next(second);
}
return output;
}
std::vector<std::string> splitPtr(const std::string& str, const std::string& delims = " ")
{
std::vector<std::string> output;
// output.reserve(str.size() / 2);
for (auto first = str.data(), second = str.data(), last = first + str.size(); second != last && first != last; first = second + 1) {
second = std::find_first_of(first, last, std::cbegin(delims), std::cend(delims));
if (first != second)
output.emplace_back(first, second);
}
return output;
}
std::vector<std::string_view>
splitSV(std::string_view strv, std::string_view delims = " ")
{
std::vector<std::string_view> output;
size_t first = 0;
while (first < strv.size())
{
const auto second = strv.find_first_of(delims, first);
if (first != second)
output.emplace_back(strv.substr(first, second-first));
if (second == std::string_view::npos)
break;
first = second + 1;
}
return output;
}
std::vector<std::string_view>
splitSVStd(std::string_view strv, std::string_view delims = " ")
{
std::vector<std::string_view> output;
//output.reserve(strv.length() / 4);
auto first = strv.begin();
while (first != strv.end())
{
const auto second = std::find_first_of(first, std::cend(strv),
std::cbegin(delims), std::cend(delims));
//std::cout << first << ", " << second << '\n';
if (first != second)
{
output.emplace_back(strv.substr(std::distance(strv.begin(), first), std::distance(first, second)));
}
if (second == strv.end())
break;
first = std::next(second);
}
return output;
}
std::vector<std::string_view> splitSVPtr(std::string_view str, std::string_view delims = " ")
{
std::vector<std::string_view> output;
//output.reserve(str.size() / 2);
for (auto first = str.data(), second = str.data(), last = first + str.size(); second != last && first != last; first = second + 1) {
second = std::find_first_of(first, last, std::cbegin(delims), std::cend(delims));
if (first != second)
output.emplace_back(first, second - first);
}
return output;
}
auto split_ReserveExact(std::string_view p_str, char p_delim = ' ') -> std::vector<std::string_view> {
std::vector<std::string_view> result;
result.reserve(1UL + static_cast<size_t>(std::count(p_str.begin(), p_str.end(), p_delim)));
for (auto index = p_str.find(p_delim); index != std::string_view::npos; index = p_str.find(p_delim)) {
result.emplace_back(p_str.data(), index);
p_str.remove_prefix(index + 1);
}
result.emplace_back(p_str.data(), p_str.size());
return result;
}
auto split_NoReserve(std::string_view p_str, char p_delim = ' ') -> std::vector<std::string_view> {
std::vector<std::string_view> result;
//result.reserve(1UL + static_cast<size_t>(std::count(p_str.begin(), p_str.end(), p_delim)));
for (auto index = p_str.find(p_delim); index != std::string_view::npos; index = p_str.find(p_delim)) {
result.emplace_back(p_str.data(), index);
p_str.remove_prefix(index + 1);
}
result.emplace_back(p_str.data(), p_str.size());
return result;
}
auto split_ReserveEnough(std::string_view p_str, char p_delim = ' ') -> std::vector<std::string_view> {
std::vector<std::string_view> result;
result.reserve(p_str.size());
for (auto index = p_str.find(p_delim); index != std::string_view::npos; index = p_str.find(p_delim)) {
result.emplace_back(p_str.data(), index);
p_str.remove_prefix(index + 1);
}
result.emplace_back(p_str.data(), p_str.size());
return result;
}
auto split_StdFind(std::string_view p_str, char p_delim = ' ') -> std::vector<std::string_view> {
std::vector<std::string_view> result;
result.reserve(p_str.size());
for (auto pos_p = std::find(p_str.cbegin(), p_str.cend(), p_delim); p_str.end() != pos_p; pos_p = std::find(p_str.begin(), p_str.end(), p_delim)) {
result.emplace_back(p_str.begin(), pos_p);
p_str = std::string_view(pos_p + 1, p_str.end());
}
result.emplace_back(p_str.data(), p_str.size());
return result;
}
auto split_Pair(std::string_view p_str, std::string_view p_delims = " ") -> std::vector<std::string_view> {
std::vector<std::string_view> result;
result.reserve(p_str.size());
std::pair<char const*, char const*> sr{p_str.begin(), p_str.end()};
for (auto pos_p = std::find_first_of(sr.first, sr.second, p_delims.cbegin(), p_delims.cend()); sr.second != pos_p; pos_p = std::find_first_of(sr.first, sr.second, p_delims.cbegin(), p_delims.cend())) {
result.emplace_back(sr.first, pos_p);
sr.first = std::next(pos_p);
}
result.emplace_back(sr.first, sr.second);
return result;
}
auto split_Singles(std::string_view p_str, std::string_view p_delims = " ") -> std::vector<std::string_view> {
std::vector<std::string_view> result;
result.reserve(p_str.size());
auto first = p_str.cbegin();
auto second = p_str.cend();
for (auto pos_p = std::find_first_of(first, second, p_delims.cbegin(), p_delims.cend()); second != pos_p; pos_p = std::find_first_of(first, second, p_delims.cbegin(), p_delims.cend())) {
result.emplace_back(first, pos_p);
first = std::next(pos_p);
}
result.emplace_back(first, second);
return result;
}
inline auto split_on(std::string_view p_str, std::string_view p_delims = " ") -> std::vector<std::string_view> {
std::vector<std::string_view> result;
result.reserve(std::size(p_str));
auto const* first_p = std::cbegin(p_str);
auto const* last_p = std::cend(p_str);
auto const* pos_p = std::find_first_of(first_p, last_p, p_delims.cbegin(), p_delims.cend());
while (last_p != pos_p) {
result.emplace_back(first_p, pos_p);
first_p = std::next(pos_p);
pos_p = std::find_first_of(first_p, last_p, p_delims.cbegin(), p_delims.cend());
}
result.emplace_back(first_p, last_p);
return result;
}
inline auto split_do(std::string_view p_str, std::string_view p_delims = " ") -> std::vector<std::string_view> {
std::vector<std::string_view> result;
auto const* first_p = std::cbegin(p_str);
auto const* last_p = std::cend(p_str);
auto const* pos_p = first_p;
do {
pos_p = std::find_first_of(first_p, last_p, p_delims.cbegin(), p_delims.cend());
result.emplace_back(first_p, pos_p);
first_p = std::next(pos_p);
} while (last_p != pos_p);
return result;
}
const std::string_view LoremIpsumStrv{ "Lorem ipsum dolor sit amet, consectetur adipiscing elit,"
"sed do eiusmod tempor incididuntsuperlongwordsuper ut labore et dut labore et dolore magna aliqua. Ut enim ad minim veniam, "
"quis nostrud exercitation ullamco laboris nisi ut aliquipsuperlongword ex ea commodo consequat. Duis aute"
"irure dolor in reprehenderit in voluptate velit esse cillum dut labore et dolore magna aliqua. Ut enim ad minim veniam, "
"quis nostrud exercitation ullamco laboris nisi ut aliquiut labore et dolore magna aliqua. Ut enim ad minim veniam, "
"quis nostrud exercitation ullamco laboris nisi ut aliquipsuperlongword ex ea commodo consequat. Duis aute"
"irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur."
"Excepteur sint occaecat cupidatatsuperlongword non proident, sunt ipsuperlongword ex ea commodo consequat. Duis aute"
"irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur."
"Excepteur sint occaecat cupidatatsuperlongword non proident, sunt iolore eu fugiat nulla pariatur."
"Excepteur sint occaecat cupidatatsuperlongword non proident, sunt iolore magna aliqua. Ut enim ad minim veniam, "
"quis nostrud exercitation ullamco laboris nisi ut aliquipsuperlongword ex ea commodo consequat. Duis aute"
"irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur."
"Excepteur sint occaecat cupidatatsuperlongword non proident, sunt in culpa qui officia deserunt mollit anim id est laborum." };
#if 0
static void StringSplit(benchmark::State& state) {
std::string str { LoremIpsumStrv };
// Code inside this loop is measured repeatedly
for (auto _ : state) {
auto v = split(str);
benchmark::DoNotOptimize(v);
}
}
// Register the function as a benchmark
BENCHMARK(StringSplit);
static void StringSplitStd(benchmark::State& state) {
std::string str { LoremIpsumStrv };
// Code inside this loop is measured repeatedly
for (auto _ : state) {
auto v = splitStd(str);
benchmark::DoNotOptimize(v);
}
}
// Register the function as a benchmark
BENCHMARK(StringSplitStd);
static void StringSplitPtr(benchmark::State& state) {
std::string str { LoremIpsumStrv };
// Code inside this loop is measured repeatedly
for (auto _ : state) {
auto v = splitPtr(str);
benchmark::DoNotOptimize(v);
}
}
// Register the function as a benchmark
BENCHMARK(StringSplitPtr);
static void StringViewSplit(benchmark::State& state) {
for (auto _ : state) {
auto v = splitSV(LoremIpsumStrv);
benchmark::DoNotOptimize(v);
}
}
BENCHMARK(StringViewSplit);
static void StringViewSplitStd(benchmark::State& state) {
for (auto _ : state) {
auto v = splitSVStd(LoremIpsumStrv);
benchmark::DoNotOptimize(v);
}
}
BENCHMARK(StringViewSplitStd);
#endif
static void StringViewSplitPtr(benchmark::State& state) {
for (auto _ : state) {
auto v = splitSVPtr(LoremIpsumStrv);
benchmark::DoNotOptimize(v);
}
}
BENCHMARK(StringViewSplitPtr);
#if 0
static void Split_ReserveExact(benchmark::State& state) {
for (auto _ : state) {
auto v = split_ReserveExact(LoremIpsumStrv);
benchmark::DoNotOptimize(v);
}
}
BENCHMARK(Split_ReserveExact);
static void Split_NoReserve(benchmark::State& state) {
for (auto _ : state) {
auto v = split_NoReserve(LoremIpsumStrv);
benchmark::DoNotOptimize(v);
}
}
BENCHMARK(Split_NoReserve);
static void Split_ReserveEnough(benchmark::State& state) {
for (auto _ : state) {
auto v = split_ReserveEnough(LoremIpsumStrv);
benchmark::DoNotOptimize(v);
}
}
BENCHMARK(Split_ReserveEnough);
static void Split_StdFind(benchmark::State& state) {
for (auto _ : state) {
auto v = split_StdFind(LoremIpsumStrv);
benchmark::DoNotOptimize(v);
}
}
BENCHMARK(Split_StdFind);
#endif
static void Split_Pair(benchmark::State& state) {
for (auto _ : state) {
auto v = split_Pair(LoremIpsumStrv);
benchmark::DoNotOptimize(v);
}
}
BENCHMARK(Split_Pair);
static void Split_Singles(benchmark::State& state) {
for (auto _ : state) {
auto v = split_Singles(LoremIpsumStrv);
benchmark::DoNotOptimize(v);
}
}
BENCHMARK(Split_Singles);
static void Split_On(benchmark::State& state) {
for (auto _ : state) {
auto v = split_on(LoremIpsumStrv);
benchmark::DoNotOptimize(v);
}
}
BENCHMARK(Split_On);
static void Split_Do(benchmark::State& state) {
for (auto _ : state) {
auto v = split_do(LoremIpsumStrv);
benchmark::DoNotOptimize(v);
}
}
BENCHMARK(Split_Do);
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment