Skip to content

Instantly share code, notes, and snippets.

Last active April 26, 2019 09:44
Show Gist options
  • Save roxlu/59a13936f1244de32140 to your computer and use it in GitHub Desktop.
Save roxlu/59a13936f1244de32140 to your computer and use it in GitHub Desktop.
Experimental code to test fast pixel transfers using PBOs.
#include <gfx/AsyncUpload.h>
namespace gfx {
AsyncUpload::~AsyncUpload() {
int AsyncUpload::init(int w, int h, GLenum fmt) {
if (0 == w) {
RX_ERROR("Width is 0.");
return -1;
if (0 == h) {
RX_ERROR("Height is 0.");
return -2;
if (0 != width) {
RX_ERROR("The width is not 0, did you call shutdown?");
return -3;
if (fmt == GL_RGBA || fmt == GL_RGBA8) {
format = GL_RGBA;
channels = 4;
else if (fmt == GL_RGB || fmt == GL_RGB8) {
format = GL_RGB;
channels = 3;
else {
RX_ERROR("Format is not GL_RGBA, GL_RGBA8, GL_RGB, GL_RGB8; for now only GL_RGBA, GL_RGB are supported.");
return -4;
width = w;
height = h;
format = fmt;
dx = 0;
nbytes = width * height * channels;
for (int i = 0; i < ASYNC_UPLOAD_NUM_BUFFERS; ++i) {
glBindBuffer(GL_PIXEL_UNPACK_BUFFER, pbo[i]);
RX_VERBOSE("Created %d pixel unpack buffers that can hold %d bytes", ASYNC_UPLOAD_NUM_BUFFERS, nbytes);
return 0;
int AsyncUpload::upload(unsigned char* pixels) {
if (0 == width || 0 == height || 0 == channels) {
RX_ERROR("Trying to upload pixels; but it looks like we're not yet initialized.");
return -1;
if (NULL == pixels) {
RX_ERROR("Invalid pixels given; NULL");
return -2;
#if 1
/* fast upload */
glBindBuffer(GL_PIXEL_UNPACK_BUFFER, pbo[dx]);
glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, width, height, format, GL_UNSIGNED_BYTE, NULL);
else {
glBindBuffer(GL_PIXEL_UNPACK_BUFFER, pbo[dx]);
GLubyte* ptr = (GLubyte*)glMapBuffer(GL_PIXEL_UNPACK_BUFFER, GL_WRITE_ONLY);
if (NULL != ptr) {
memcpy(ptr, pixels, nbytes); /* takes about 0.0039 seconds for 1920 x 1200 rgba buffer */
glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, width, height, format, GL_UNSIGNED_BYTE, NULL);
/* non-optimal upload */
glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, width, height, format, GL_UNSIGNED_BYTE, pixels);
return 0;
int AsyncUpload::shutdown() {
if (0 != width) {
RX_ERROR("Cannot shutdown as width is 0. Did you init?");
return -1;
glDeleteBuffers(ASYNC_UPLOAD_NUM_BUFFERS, pbo);
width = 0;
height = 0;
dx = 0;
channels = 0;
n = 0;
return 0;
} /* namespace gfx */
oooo d8b .ooooo. oooo ooo 888 oooo oooo
`888""8P d88' `88b `88b..8P' 888 `888 `888
888 888 888 Y888' 888 888 888
888 888 888 .o8"'88b 888 888 888
d888b `Y8bod8P' o88' 888o o888o `V88V"V8P'
This class uses a couple of pixel buffer objects to overcome
synchronization issues when uploading data to the gpu. It's build for
a specific project where we had to upload the complete pixel buffer.
Make sure that you use GL_BGRA as format with GL_UNSIGNED_INT_8_8_8_8_REV
as type for the texture that you're using (on mac). When you use only 3
channel images, you won't get the optimal upload path and performance is
extremelly reduced.
Use something like:
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA8, width, height, 0, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV, NULL);
#include <stdint.h>
#include <glad/glad.h>
#include <tinylib.h>
namespace gfx {
class AsyncUpload {
int init(int w, int h, GLenum fmt); /* allocates memory; creates GL objects. */
int shutdown(); /* frees all memory; destroys GL objects. */
int upload(unsigned char* pixels); /* upload the given pixels, we assume that you have bound a texture of the same dimensions as our buffers */
GLuint pbo[ASYNC_UPLOAD_NUM_BUFFERS]; /* the pbos that are created in init(), and removed in shutdown(). */
GLenum format; /* what format is used */
int dx; /* current index into the pbo array */
int width; /* width of the texture */
int height; /* height of the texture */
int nbytes; /* number of bytes in the PBOs */
int channels; /* number of color channels that are used */
uint64_t n; /* number of uploads, used to 'schedule' what PBO we should use. */
} /* namespace gfx */
With 3 PBOs, format GL_BGRA, type: GL_UNSIGNED_INT_8_8_8_8_REV
2014.08.28_09.56.55_609: verbose [int main():162] = Took: 0.003976
2014.08.28_09.56.55_623: verbose [int main():162] = Took: 0.003766
2014.08.28_09.56.56_658: verbose [int main():162] = Took: 0.004171
2014.08.28_09.56.56_665: verbose [int main():162] = Took: 0.005319
2014.08.28_09.56.56_673: verbose [int main():162] = Took: 0.003831
2014.08.28_09.56.56_690: verbose [int main():162] = Took: 0.004001
2014.08.28_09.56.56_716: verbose [int main():162] = Took: 0.005537
2014.08.28_09.56.56_726: verbose [int main():162] = Took: 0.003836
2014.08.28_09.56.56_739: verbose [int main():162] = Took: 0.003944
2014.08.28_09.56.56_776: verbose [int main():162] = Took: 0.004286
2014.08.28_09.56.56_783: verbose [int main():162] = Took: 0.005428
2014.08.28_09.56.56_790: verbose [int main():162] = Took: 0.004276
2014.08.28_09.56.56_808: verbose [int main():162] = Took: 0.004208
2014.08.28_09.56.56_833: verbose [int main():162] = Took: 0.005733
2014.08.28_09.56.56_843: verbose [int main():162] = Took: 0.003975
2014.08.28_09.56.56_857: verbose [int main():162] = Took: 0.003847
2014.08.28_09.56.56_892: verbose [int main():162] = Took: 0.004260
2014.08.28_09.56.56_898: verbose [int main():162] = Took: 0.004506
2014.08.28_09.56.56_907: verbose [int main():162] = Took: 0.003997
2014.08.28_09.56.56_924: verbose [int main():162] = Took: 0.003954
2014.08.28_09.56.56_948: verbose [int main():162] = Took: 0.004570
2014.08.28_09.56.56_962: verbose [int main():162] = Took: 0.003556
2014.08.28_09.56.56_975: verbose [int main():162] = Took: 0.004052
2014.08.28_09.56.56_14: verbose [int main():162] = Took: 0.015949
2014.08.28_09.56.56_48: verbose [int main():162] = Took: 0.032679
2014.08.28_09.56.56_61: verbose [int main():162] = Took: 0.005951
2014.08.28_09.56.56_75: verbose [int main():162] = Took: 0.003924
2014.08.28_09.56.56_88: verbose [int main():162] = Took: 0.011031
2014.08.28_09.56.56_100: verbose [int main():162] = Took: 0.004205
2014.08.28_09.56.56_122: verbose [int main():162] = Took: 0.004896
2014.08.28_09.56.56_149: verbose [int main():162] = Took: 0.004883
2014.08.28_09.56.56_173: verbose [int main():162] = Took: 0.004348
2014.08.28_09.56.56_201: verbose [int main():162] = Took: 0.006323
2014.08.28_09.56.56_224: verbose [int main():162] = Took: 0.003962
2014.08.28_09.56.56_270: verbose [int main():162] = Took: 0.004066
2014.08.28_09.56.56_290: verbose [int main():162] = Took: 0.005125
2014.08.28_09.56.56_312: verbose [int main():162] = Took: 0.007200
2014.08.28_09.56.56_346: verbose [int main():162] = Took: 0.003861
2014.08.28_09.56.56_361: verbose [int main():162] = Took: 0.003996
2014.08.28_09.56.56_399: verbose [int main():162] = Took: 0.004490
2014.08.28_09.56.56_411: verbose [int main():162] = Took: 0.003905
2014.08.28_09.56.56_449: verbose [int main():162] = Took: 0.004929
2014.08.28_09.56.56_472: verbose [int main():162] = Took: 0.004919
2014.08.28_09.56.56_504: verbose [int main():162] = Took: 0.004780
2014.08.28_09.56.56_524: verbose [int main():162] = Took: 0.004283
2014.08.28_09.56.56_572: verbose [int main():162] = Took: 0.003920
2014.08.28_09.56.56_606: verbose [int main():162] = Took: 0.004039
2014.08.28_09.56.57_656: verbose [int main():162] = Took: 0.004160
2014.08.28_09.56.57_687: verbose [int main():162] = Took: 0.003985
2014.08.28_09.56.57_720: verbose [int main():162] = Took: 0.005126
2014.08.28_09.56.57_740: verbose [int main():162] = Took: 0.004966
2014.08.28_09.56.57_780: verbose [int main():162] = Took: 0.004602
2014.08.28_09.56.57_796: verbose [int main():162] = Took: 0.003868
2014.08.28_09.56.57_834: verbose [int main():162] = Took: 0.004571
2014.08.28_09.56.57_845: verbose [int main():162] = Took: 0.003883
2014.08.28_09.56.57_876: verbose [int main():162] = Took: 0.007581
2014.08.28_09.56.57_896: verbose [int main():162] = Took: 0.004338
2014.08.28_09.56.57_931: verbose [int main():162] = Took: 0.004243
2014.08.28_09.56.57_946: verbose [int main():162] = Took: 0.003892
2014.08.28_09.56.57_973: verbose [int main():162] = Took: 0.003840
2014.08.28_09.56.57_24: verbose [int main():162] = Took: 0.004088
2014.08.28_09.56.57_55: verbose [int main():162] = Took: 0.003907
2014.08.28_09.56.57_88: verbose [int main():162] = Took: 0.003831
2014.08.28_09.56.57_108: verbose [int main():162] = Took: 0.005098
2014.08.28_09.56.57_152: verbose [int main():162] = Took: 0.004121
2014.08.28_09.56.57_164: verbose [int main():162] = Took: 0.003935
2014.08.28_09.56.57_206: verbose [int main():162] = Took: 0.004132
2014.08.28_09.56.57_216: verbose [int main():162] = Took: 0.004512
2014.08.28_09.56.57_241: verbose [int main():162] = Took: 0.004029
2014.08.28_09.56.57_280: verbose [int main():162] = Took: 0.003882
2014.08.28_09.56.57_296: verbose [int main():162] = Took: 0.003888
2014.08.28_09.56.57_311: verbose [int main():162] = Took: 0.003840
2014.08.28_09.56.57_335: verbose [int main():162] = Took: 0.005672
2014.08.28_09.56.57_362: verbose [int main():162] = Took: 0.004059
2014.08.28_09.56.57_391: verbose [int main():162] = Took: 0.003849
2014.08.28_09.56.57_411: verbose [int main():162] = Took: 0.003838
Using glTexSubImage2D w/o PBOs. Same format as above.
2014.08.28_09.57.33_227: verbose [int main():162] = Took: 0.015712
2014.08.28_09.57.33_259: verbose [int main():162] = Took: 0.020895
2014.08.28_09.57.33_283: verbose [int main():162] = Took: 0.021503
2014.08.28_09.57.33_310: verbose [int main():162] = Took: 0.017689
2014.08.28_09.57.33_328: verbose [int main():162] = Took: 0.017360
2014.08.28_09.57.33_359: verbose [int main():162] = Took: 0.029582
2014.08.28_09.57.33_377: verbose [int main():162] = Took: 0.014137
2014.08.28_09.57.33_411: verbose [int main():162] = Took: 0.030622
2014.08.28_09.57.33_424: verbose [int main():162] = Took: 0.009725
2014.08.28_09.57.33_461: verbose [int main():162] = Took: 0.016294
2014.08.28_09.57.33_474: verbose [int main():162] = Took: 0.012158
2014.08.28_09.57.33_495: verbose [int main():162] = Took: 0.019164
2014.08.28_09.57.33_511: verbose [int main():162] = Took: 0.009864
2014.08.28_09.57.33_528: verbose [int main():162] = Took: 0.015752
2014.08.28_09.57.33_541: verbose [int main():162] = Took: 0.011591
2014.08.28_09.57.33_560: verbose [int main():162] = Took: 0.017273
2014.08.28_09.57.33_578: verbose [int main():162] = Took: 0.016341
2014.08.28_09.57.33_592: verbose [int main():162] = Took: 0.012830
2014.08.28_09.57.33_616: verbose [int main():162] = Took: 0.016965
2014.08.28_09.57.34_640: verbose [int main():162] = Took: 0.021996
2014.08.28_09.57.34_662: verbose [int main():162] = Took: 0.004950
2014.08.28_09.57.34_678: verbose [int main():162] = Took: 0.013562
2014.08.28_09.57.34_700: verbose [int main():162] = Took: 0.020186
2014.08.28_09.57.34_716: verbose [int main():162] = Took: 0.009004
2014.08.28_09.57.34_741: verbose [int main():162] = Took: 0.023935
2014.08.28_09.57.34_778: verbose [int main():162] = Took: 0.030341
2014.08.28_09.57.34_796: verbose [int main():162] = Took: 0.016939
2014.08.28_09.57.34_828: verbose [int main():162] = Took: 0.021799
2014.08.28_09.57.34_842: verbose [int main():162] = Took: 0.009358
2014.08.28_09.57.34_878: verbose [int main():162] = Took: 0.019993
2014.08.28_09.57.34_896: verbose [int main():162] = Took: 0.016433
2014.08.28_09.57.34_926: verbose [int main():162] = Took: 0.028643
2014.08.28_09.57.34_945: verbose [int main():162] = Took: 0.014017
2014.08.28_09.57.34_978: verbose [int main():162] = Took: 0.031791
2014.08.28_09.57.34_997: verbose [int main():162] = Took: 0.016920
2014.08.28_09.57.34_16: verbose [int main():162] = Took: 0.018063
2014.08.28_09.57.34_43: verbose [int main():162] = Took: 0.008069
2014.08.28_09.57.34_62: verbose [int main():162] = Took: 0.016659
2014.08.28_09.57.34_95: verbose [int main():162] = Took: 0.031220
2014.08.28_09.57.34_112: verbose [int main():162] = Took: 0.016076
2014.08.28_09.57.34_145: verbose [int main():162] = Took: 0.025039
2014.08.28_09.57.34_163: verbose [int main():162] = Took: 0.016290
2014.08.28_09.57.34_195: verbose [int main():162] = Took: 0.028162
2014.08.28_09.57.34_213: verbose [int main():162] = Took: 0.016082
2014.08.28_09.57.34_245: verbose [int main():162] = Took: 0.031331
2014.08.28_09.57.34_264: verbose [int main():162] = Took: 0.016281
2014.08.28_09.57.34_296: verbose [int main():162] = Took: 0.020673
2014.08.28_09.57.34_313: verbose [int main():162] = Took: 0.015207
2014.08.28_09.57.34_346: verbose [int main():162] = Took: 0.031821
2014.08.28_09.57.34_368: verbose [int main():162] = Took: 0.020428
2014.08.28_09.57.34_397: verbose [int main():162] = Took: 0.022230
2014.08.28_09.57.34_413: verbose [int main():162] = Took: 0.014551
2014.08.28_09.57.34_446: verbose [int main():162] = Took: 0.032402
2014.08.28_09.57.34_466: verbose [int main():162] = Took: 0.017916
2014.08.28_09.57.34_496: verbose [int main():162] = Took: 0.028936
2014.08.28_09.57.34_514: verbose [int main():162] = Took: 0.013146
2014.08.28_09.57.34_546: verbose [int main():162] = Took: 0.022253
2014.08.28_09.57.34_563: verbose [int main():162] = Took: 0.013557
2014.08.28_09.57.34_594: verbose [int main():162] = Took: 0.029427
2014.08.28_09.57.34_610: verbose [int main():162] = Took: 0.013754
2014.08.28_09.57.35_647: verbose [int main():162] = Took: 0.019401
2014.08.28_09.57.35_667: verbose [int main():162] = Took: 0.018647
2014.08.28_09.57.35_695: verbose [int main():162] = Took: 0.024147
2014.08.28_09.57.35_711: verbose [int main():162] = Took: 0.013650
2014.08.28_09.57.35_747: verbose [int main():162] = Took: 0.019825
2014.08.28_09.57.35_761: verbose [int main():162] = Took: 0.010776
2014.08.28_09.57.35_779: verbose [int main():162] = Took: 0.015481
2014.08.28_09.57.35_797: verbose [int main():162] = Took: 0.016900
Copy link

Can you add the main function you're using? I think you get such differing upload times because you don't wait for the completion of the uploads for your time measurements.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment