Created
November 12, 2023 19:58
-
-
Save eatonphil/c2948f65d216a9ad3667801578154eee to your computer and use it in GitHub Desktop.
Row vs Column Access Patterns (fork of https://gist.github.com/ceberly/d35e1ca2c385fdd0ba2d2cc6c62d2909)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <assert.h> | |
#include <stdbool.h> | |
#include <stdint.h> | |
#include <stdio.h> | |
#include <stdlib.h> | |
#include <string.h> | |
#include <time.h> | |
#define NUM_RECORDS 1000000 // 1M | |
#define NUM_QUERIES 1000 // number of queries to run in a row | |
#define LOCATION_ID 42 | |
struct User { | |
int id; | |
char name[255]; | |
int birth_location_id; | |
int time_of_birth; | |
int company; | |
int state; | |
int children; | |
bool married; | |
}; | |
struct ColDB { | |
int *id; | |
char *name; // store the names contiguously to mimic row db | |
int *birth_location_id; | |
int *time_of_birth; | |
int *company; | |
int *state; | |
int *children; | |
bool *married; | |
} col_db; | |
// row oriented DB is just an array of users. | |
struct User *row_db; | |
struct ColDB col_db; | |
static void setup(void) { | |
row_db = malloc(sizeof(struct User) * NUM_RECORDS); | |
assert(row_db != NULL); | |
col_db.id = malloc(sizeof(int) * NUM_RECORDS); | |
assert(col_db.id != NULL); | |
col_db.name = malloc(255 * NUM_RECORDS); | |
assert(col_db.name != NULL); | |
col_db.birth_location_id = malloc(sizeof(int) * NUM_RECORDS); | |
assert(col_db.birth_location_id != NULL); | |
col_db.time_of_birth = malloc(sizeof(int) * NUM_RECORDS); | |
assert(col_db.time_of_birth != NULL); | |
col_db.company = malloc(sizeof(int) * NUM_RECORDS); | |
assert(col_db.company != NULL); | |
col_db.state = malloc(sizeof(int) * NUM_RECORDS); | |
assert(col_db.state != NULL); | |
col_db.children = malloc(sizeof(int) * NUM_RECORDS); | |
assert(col_db.children != NULL); | |
col_db.married = malloc(sizeof(int) * NUM_RECORDS); | |
assert(col_db.married != NULL); | |
srand((unsigned int)time(NULL)); | |
for (size_t i = 0; i < NUM_RECORDS; i++) { | |
int id = abs(rand()); | |
int l = abs(rand()) % 100; // make some duplicate birth locations... | |
int t = abs(rand()) % 86400; // seconds in a day | |
row_db[i].id = id; | |
row_db[i].name[32] = '\0'; // just so we can print the names | |
row_db[i].birth_location_id = l; | |
row_db[i].time_of_birth = t; | |
row_db[i].company = l; | |
row_db[i].state = l; | |
row_db[i].children = l; | |
row_db[i].married = t % 10; | |
col_db.id[i] = id; | |
col_db.name[i * 255 + 32] = '\0'; | |
col_db.birth_location_id[i] = l; | |
col_db.time_of_birth[i] = t; | |
col_db.company[i] = l; | |
col_db.state[i] = l; | |
col_db.children[i] = l; | |
col_db.married[i] = t % 10; | |
} | |
} | |
// SELECT COUNT(1) FROM users WHERE location = 42 | |
static size_t rcount_users_by_location() { | |
size_t count = 0; | |
for (size_t i = 0; i < NUM_RECORDS; i++) { | |
struct User u = row_db[i]; | |
if (u.birth_location_id == LOCATION_ID) { | |
count++; | |
} | |
} | |
return count; | |
} | |
// SELECT COUNT(1) FROM users WHERE location = 42 | |
static size_t ccount_users_by_location() { | |
size_t count = 0; | |
for (size_t i = 0; i < NUM_RECORDS; i++) { | |
if (col_db.birth_location_id[i] == LOCATION_ID) { | |
count++; | |
} | |
} | |
return count; | |
} | |
struct Users { | |
size_t size; | |
size_t offset; | |
struct User* users; | |
}; | |
static struct Users* users_init() { | |
struct Users* users = (struct Users*)malloc(sizeof(struct User)); | |
assert(users != NULL); | |
users->size = NUM_RECORDS; | |
users->offset = 0; | |
users->users = (struct User*)malloc(sizeof(struct User) * users->size); | |
assert(users->users != NULL); | |
return users; | |
} | |
static void users_reset(struct Users* users) { | |
users->offset = 0; | |
} | |
static void users_append(struct Users* users, struct User user) { | |
assert(users->offset < users->size); | |
users->users[users->offset] = user; | |
users->offset++; | |
} | |
static void users_free(struct Users* users) { | |
free(users->users); | |
free(users); | |
} | |
// SELECT * FROM users WHERE location > 1 | |
static struct Users* rusers_by_location(struct Users* users) { | |
for (size_t i = 0; i < NUM_RECORDS; i++) { | |
struct User u = row_db[i]; | |
if (u.birth_location_id > 1) { | |
users_append(users, u); | |
} | |
} | |
return users; | |
} | |
// SELECT * FROM users WHERE location > 1 | |
static struct Users* cusers_by_location(struct Users *users) { | |
for (size_t i = 0; i < NUM_RECORDS; i++) { | |
if (col_db.birth_location_id[i] > 1) { | |
struct User u = { | |
.id = col_db.id[i], | |
.birth_location_id = col_db.birth_location_id[i], | |
.time_of_birth = col_db.time_of_birth[i], | |
.company = col_db.company[i], | |
.state = col_db.state[i], | |
.children = col_db.children[i], | |
.married = col_db.married[i] | |
}; | |
memcpy(u.name, col_db.name + (i * 255), 255); | |
users_append(users, u); | |
} | |
} | |
return users; | |
} | |
// SELECT * FROM users WHERE location = 42 OR company = 1 OR state = 12 OR married = T | |
static struct Users* rusers_restricted(struct Users* users) { | |
for (size_t i = 0; i < NUM_RECORDS; i++) { | |
struct User u = row_db[i]; | |
if (u.birth_location_id == 42 || u.company == 1 || u.state == 12 || u.married == true || u.children > 2) { | |
users_append(users, u); | |
} | |
} | |
return users; | |
} | |
// SELECT * FROM users WHERE location = 42 OR company = 1 OR state = 12 OR married = T | |
static struct Users* cusers_restricted(struct Users *users) { | |
for (size_t i = 0; i < NUM_RECORDS; i++) { | |
if (col_db.birth_location_id[i] == 42 || col_db.company[i] == 1 || col_db.state[i] == 12 || col_db.married[i] == true || col_db.children[i] > 2) { | |
struct User u = { | |
.id = col_db.id[i], | |
.birth_location_id = col_db.birth_location_id[i], | |
.time_of_birth = col_db.time_of_birth[i], | |
.company = col_db.company[i], | |
.state = col_db.state[i], | |
.children = col_db.children[i], | |
.married = col_db.married[i] | |
}; | |
memcpy(u.name, col_db.name + (i * 255), 255); | |
users_append(users, u); | |
} | |
} | |
return users; | |
} | |
// SELECT MAX(time_of_birth) FROM users; | |
static int cmax_time_of_birth(void) { | |
int result = 0; | |
for (size_t i = 0; i < NUM_RECORDS; i++) { | |
int t = col_db.time_of_birth[i]; | |
if (t > result) | |
result = t; | |
} | |
return result; | |
} | |
// SELECT MAX(time_of_birth) FROM users; | |
static int rmax_time_of_birth(void) { | |
int result = 0; | |
for (size_t i = 0; i < NUM_RECORDS; i++) { | |
int t = row_db[i].time_of_birth; | |
if (t > result) | |
result = t; | |
} | |
return result; | |
} | |
int main(int argc, char **argv) { | |
setup(); | |
int c, r = 0; | |
size_t result_count = 0; | |
struct User *results = malloc(sizeof(struct User) * NUM_RECORDS); | |
assert(results != NULL); | |
start = time(NULL); | |
for (size_t i = 0; i < NUM_QUERIES; i++) { | |
r = rmax_time_of_birth(); | |
} | |
printf("[ROW] elapsed: %lds\trmax_time_of_birth: %d\t[SELECT MAX(time_of_birth) FROM users;]\n", time(NULL) - start, r); | |
time_t start = time(NULL); | |
for (size_t i = 0; i < NUM_QUERIES; i++) { | |
c = cmax_time_of_birth(); | |
} | |
printf("[COL] elapsed: %lds\tcmax_time_of_birth: %d\t[SELECT MAX(time_of_birth) FROM users;]\n", time(NULL) - start, c); | |
start = time(NULL); | |
for (size_t i = 0; i < NUM_QUERIES; i++) { | |
result_count = rcount_users_by_location(); | |
} | |
printf("[ROW] elapsed: %lds\trecords for rcount_users_by_location count: %ld\t[SELECT COUNT(1) FROM users WHERE location = 42]\n", | |
time(NULL) - start, result_count); | |
start = time(NULL); | |
for (size_t i = 0; i < NUM_QUERIES; i++) { | |
result_count = ccount_users_by_location(); | |
} | |
printf("[COL] elapsed: %lds\trecords for ccount_users_by_location count: %ld\t[SELECT COUNT(1) FROM users WHERE location = 42]\n", | |
time(NULL) - start, result_count); | |
struct Users* users = users_init(); | |
start = time(NULL); | |
for (size_t i = 0; i < NUM_QUERIES; i++) { | |
users = rusers_by_location(users); | |
result_count = users->offset; | |
users_reset(users); | |
} | |
printf("[ROW] elapsed: %lds\trecords for r_users_by_location: %ld [SELECT * FROM users WHERE location > 1]\n", | |
time(NULL) - start, result_count); | |
start = time(NULL); | |
for (size_t i = 0; i < NUM_QUERIES; i++) { | |
users = cusers_by_location(users); | |
result_count = users->offset; | |
users_reset(users); | |
} | |
printf("[COL] elapsed: %lds\trecords for c_users_by_location: %ld [SELECT * FROM users WHERE location > 1]\n", | |
time(NULL) - start, result_count); | |
start = time(NULL); | |
for (size_t i = 0; i < NUM_QUERIES; i++) { | |
users = rusers_restricted(users); | |
result_count = users->offset; | |
users_reset(users); | |
} | |
printf("[ROW] elapsed: %lds\trecords for r_users_restricted: %ld\t[SELECT * FROM users WHERE location = 42 OR company = 1 OR state = 12 OR married = T OR children > 2]\n", | |
time(NULL) - start, result_count); | |
start = time(NULL); | |
for (size_t i = 0; i < NUM_QUERIES; i++) { | |
users = cusers_restricted(users); | |
result_count = users->offset; | |
users_reset(users); | |
} | |
printf("[COL] elapsed: %lds\trecords for c_users_restricted: %ld\t[SELECT * FROM users WHERE location = 42 OR company = 1 OR state = 12 OR married = T OR children > 2]\n", | |
time(NULL) - start, result_count); | |
users_free(users); | |
return EXIT_SUCCESS; | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
default: | |
gcc -O0 -g3 -Wall -Wextra -Wconversion -Wdouble-promotion \ | |
-Wno-unused-parameter -Wno-unused-function -Wno-sign-conversion \ | |
main.c | |
format: | |
clang-format -i main.c |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
$ make | |
$ ./a.out | |
[ROW] elapsed: 9s rmax_time_of_birth: 86399 [SELECT MAX(time_of_birth) FROM users;] | |
[COL] elapsed: 2s cmax_time_of_birth: 86399 [SELECT MAX(time_of_birth) FROM users;] | |
[ROW] elapsed: 6s records for rcount_users_by_location count: 9984 [SELECT COUNT(1) FROM users WHERE location = 42] | |
[COL] elapsed: 1s records for ccount_users_by_location count: 9984 [SELECT COUNT(1) FROM users WHERE location = 42] | |
[ROW] elapsed: 19s records for r_users_by_location: 980011 [SELECT * FROM users WHERE location > 1] | |
[COL] elapsed: 24s records for c_users_by_location: 980011 [SELECT * FROM users WHERE location > 1] | |
[ROW] elapsed: 21s records for r_users_restricted: 998008 [SELECT * FROM users WHERE location = 42 OR company = 1 OR state = 12 OR married = T OR children > 2] | |
[COL] elapsed: 27s records for c_users_restricted: 998008 [SELECT * FROM users WHERE location = 42 OR company = 1 OR state = 12 OR married = T OR children > 2] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment