-
-
Save colesbury/b865b0a7788db1e8e3d88c94dcc01fa6 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
extern void run(); | |
int main(int argc, char* argv[]) { | |
run(); | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
.PHONY: main | |
main: main.cpp libthread_local.so | |
g++ -std=c++11 -O2 main.cpp -L . -lthread_local -o main | |
libthread_local.so: thread_local.cpp | |
g++ -std=c++11 -O2 -fPIC -shared -o libthread_local.so thread_local.cpp | |
.PHONY: clear | |
clean: | |
rm -f main libthread_local.so |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <chrono> | |
#include <cmath> | |
#include <cstdlib> | |
#include <cstring> | |
#include <time.h> | |
#include <iostream> | |
#include <atomic> | |
double diff(timespec start, timespec end) | |
{ | |
timespec temp; | |
if ((end.tv_nsec-start.tv_nsec)<0) { | |
temp.tv_sec = end.tv_sec-start.tv_sec-1; | |
temp.tv_nsec = 1000000000+end.tv_nsec-start.tv_nsec; | |
} else { | |
temp.tv_sec = end.tv_sec-start.tv_sec; | |
temp.tv_nsec = end.tv_nsec-start.tv_nsec; | |
} | |
return (double)temp.tv_sec + temp.tv_nsec * 1e-9; | |
} | |
template <typename Op> | |
double benchmark(Op op) { | |
const int N = 10000000; | |
timespec time1, time2; | |
op(); | |
clock_gettime(CLOCK_MONOTONIC, &time1); | |
for (int i = 0; i < N; i++) | |
op(); | |
clock_gettime(CLOCK_MONOTONIC, &time2); | |
double time = diff(time1, time2); | |
std::cout << (time/N)*1e9 << " ns \n"; | |
return time; | |
} | |
__attribute__((noinline)) | |
void add_thread_local() { | |
static thread_local int x; | |
x += 1; | |
} | |
__attribute__((noinline)) | |
void add_global() { | |
static int x; | |
x += 1; | |
} | |
void run() { | |
benchmark([&]() { | |
add_thread_local(); | |
}); | |
benchmark([&]() { | |
add_global(); | |
}); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Results below. The difference between the two numbers approximates the "cost" of thread-locals access in shared libraries (about 1-1.5 ns).
Linux (Intel(R) Xeon(R) CPU E5-2698 v4 @ 2.20GHz):
macOS (2.4 GHz 8-Core Intel Core i9)