Created
February 10, 2020 12:22
-
-
Save notogawa/36d0cc9168ae3236902729f26064281d to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stdio.h> | |
#include <string.h> | |
#include <stdint.h> | |
#include <assert.h> | |
#include <sys/time.h> | |
#include <sys/ioctl.h> | |
#include <sys/mman.h> | |
#include <sys/types.h> | |
#include <sys/stat.h> | |
#include <fcntl.h> | |
typedef struct { | |
uint32_t size; | |
uint32_t flags; | |
uint32_t handle; | |
uint32_t offset; | |
} drm_v3d_create_bo; | |
typedef struct { | |
uint32_t handle; | |
uint32_t flags; | |
uint64_t offset; | |
} drm_v3d_mmap_bo; | |
typedef struct { | |
uint32_t handle; | |
uint32_t pad; | |
} gem_close; | |
typedef struct { | |
uint32_t handle; | |
uint32_t pad; | |
uint64_t timeout_ns; | |
} drm_v3d_wait_bo; | |
typedef struct { | |
uint32_t cfg[7]; | |
uint32_t coef[4]; | |
uint64_t bo_handles; | |
uint32_t bo_handle_count; | |
uint32_t in_sync; | |
uint32_t out_sync; | |
} drm_v3d_submit_csd; | |
#define DRM_IOCTL_BASE 'd' | |
#define DRM_COMMAND_BASE 0x40 | |
#define DRM_GEM_CLOSE 0x09 | |
#define DRM_V3D_WAIT_BO (DRM_COMMAND_BASE + 0x01) | |
#define DRM_V3D_CREATE_BO (DRM_COMMAND_BASE + 0x02) | |
#define DRM_V3D_MMAP_BO (DRM_COMMAND_BASE + 0x03) | |
#define DRM_V3D_WAIT_BO (DRM_COMMAND_BASE + 0x01) | |
#define DRM_V3D_SUBMIT_CSD (DRM_COMMAND_BASE + 0x07) | |
#define IOCTL_GEM_CLOSE _IOW(DRM_IOCTL_BASE, DRM_GEM_CLOSE, gem_close) | |
#define IOCTL_V3D_CREATE_BO _IOWR(DRM_IOCTL_BASE, DRM_V3D_CREATE_BO, drm_v3d_create_bo) | |
#define IOCTL_V3D_MMAP_BO _IOWR(DRM_IOCTL_BASE, DRM_V3D_MMAP_BO, drm_v3d_mmap_bo) | |
#define IOCTL_V3D_WAIT_BO _IOWR(DRM_IOCTL_BASE, DRM_V3D_WAIT_BO, drm_v3d_wait_bo) | |
#define IOCTL_V3D_SUBMIT_CSD _IOW(DRM_IOCTL_BASE, DRM_V3D_SUBMIT_CSD, drm_v3d_submit_csd) | |
static uint64_t do_nothing[] = { | |
0x3c203186bb800000, // nop; thrsw | |
0x3c203186bb800000, // nop; thrsw | |
0x3c003186bb800000, // nop | |
0x3c003186bb800000, // nop | |
0x3c203186bb800000, // nop; thrsw | |
0x3c003186bb800000, // nop | |
0x3c003186bb800000, // nop | |
0x3c003186bb800000, // nop | |
}; | |
static int submit_csd(int fd, uint32_t phyaddr, uint32_t handle) { | |
const uint32_t wg_x = 1; | |
const uint32_t wg_y = 1; | |
const uint32_t wg_z = 1; | |
const uint32_t wg_size = wg_x * wg_y * wg_z; | |
const uint32_t wgs_per_sg = 1; | |
const uint32_t bo_handles[] = { handle }; | |
drm_v3d_submit_csd csd; | |
csd.cfg[0] = wg_x << 16; | |
csd.cfg[1] = wg_y << 16; | |
csd.cfg[2] = wg_z << 16; | |
csd.cfg[3] = | |
((((wgs_per_sg * wg_size + 16u - 1u) / 16u) - 1u) << 12) | | |
(wgs_per_sg << 8) | | |
(wg_size & 0xff); | |
csd.cfg[4] = 0; | |
csd.cfg[5] = phyaddr; | |
csd.cfg[6] = 0; | |
csd.coef[0] = 0; | |
csd.coef[1] = 0; | |
csd.coef[2] = 0; | |
csd.coef[3] = 0; | |
csd.bo_handles = (uintptr_t)bo_handles; | |
csd.bo_handle_count = sizeof(bo_handles)/sizeof(bo_handles[0]); | |
csd.in_sync = 0; | |
csd.out_sync = 0; | |
return ioctl(fd, IOCTL_V3D_SUBMIT_CSD, &csd); | |
} | |
static int wait_bo(int fd, uint32_t handle) { | |
drm_v3d_wait_bo wait; | |
wait.handle = handle; | |
wait.pad = 0; | |
wait.timeout_ns = 10e9; | |
return ioctl(fd, IOCTL_V3D_WAIT_BO, &wait); | |
} | |
static double get_time() { | |
struct timeval t; | |
gettimeofday(&t, NULL); | |
return (double)t.tv_sec + t.tv_usec * 1e-6; | |
} | |
int main() { | |
int fd = open("/dev/dri/card0", O_RDWR); | |
assert(fd > 0); | |
drm_v3d_create_bo create_bo; | |
create_bo.size = sizeof(do_nothing); | |
create_bo.flags = 0; | |
{ | |
int res = ioctl(fd, IOCTL_V3D_CREATE_BO, &create_bo); | |
assert(res == 0); | |
} | |
uint32_t handle = create_bo.handle; | |
uint32_t phyaddr = create_bo.offset; | |
drm_v3d_mmap_bo mmap_bo; | |
mmap_bo.handle = handle; | |
mmap_bo.flags = 0; | |
{ | |
int res = ioctl(fd, IOCTL_V3D_MMAP_BO, &mmap_bo); | |
assert(res == 0); | |
} | |
void* usraddr = mmap(NULL, sizeof(do_nothing), PROT_READ | PROT_WRITE, MAP_SHARED, fd, mmap_bo.offset); | |
assert(usraddr != MAP_FAILED); | |
memcpy(usraddr, do_nothing, sizeof(do_nothing)); | |
for (int submit_times = 1; submit_times < 11; ++submit_times) { | |
printf("[submit x%d]\n", submit_times); | |
for (int try = 0; try < 5; ++try) { | |
double start = get_time(); | |
for (int i = 0; i < submit_times; ++i) { | |
submit_csd(fd, phyaddr, handle); | |
} | |
wait_bo(fd, handle); | |
double end = get_time(); | |
printf(" try %d: %.6lf sec\n", try+1, end - start); | |
} | |
} | |
{ | |
int res = munmap(usraddr, sizeof(do_nothing)); | |
assert(res == 0); | |
} | |
gem_close cl; | |
cl.handle = handle; | |
ioctl(fd, IOCTL_GEM_CLOSE, &cl); | |
return 0; | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
[submit x1] | |
try 1: 0.021083 sec | |
try 2: 0.029954 sec | |
try 3: 0.029995 sec | |
try 4: 0.029996 sec | |
try 5: 0.039995 sec | |
[submit x2] | |
try 1: 0.060004 sec | |
try 2: 0.059991 sec | |
try 3: 0.059994 sec | |
try 4: 0.059995 sec | |
try 5: 0.059996 sec | |
[submit x3] | |
try 1: 0.089996 sec | |
try 2: 0.089995 sec | |
try 3: 0.089998 sec | |
try 4: 0.089993 sec | |
try 5: 0.089999 sec | |
[submit x4] | |
try 1: 0.119992 sec | |
try 2: 0.119995 sec | |
try 3: 0.119996 sec | |
try 4: 0.089998 sec | |
try 5: 0.119993 sec | |
[submit x5] | |
try 1: 0.149996 sec | |
try 2: 0.149995 sec | |
try 3: 0.149996 sec | |
try 4: 0.149995 sec | |
try 5: 0.149995 sec | |
[submit x6] | |
try 1: 0.180001 sec | |
try 2: 0.179991 sec | |
try 3: 0.179996 sec | |
try 4: 0.179997 sec | |
try 5: 0.179993 sec | |
[submit x7] | |
try 1: 0.209996 sec | |
try 2: 0.209996 sec | |
try 3: 0.209997 sec | |
try 4: 0.209994 sec | |
try 5: 0.209997 sec | |
[submit x8] | |
try 1: 0.239995 sec | |
try 2: 0.239995 sec | |
try 3: 0.239995 sec | |
try 4: 0.240000 sec | |
try 5: 0.239991 sec | |
[submit x9] | |
try 1: 0.269993 sec | |
try 2: 0.269998 sec | |
try 3: 0.269995 sec | |
try 4: 0.269996 sec | |
try 5: 0.269996 sec | |
[submit x10] | |
try 1: 0.299996 sec | |
try 2: 0.299995 sec | |
try 3: 0.299994 sec | |
try 4: 0.299996 sec | |
try 5: 0.299996 sec |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment