Last active
April 26, 2019 09:44
-
-
Save roxlu/59a13936f1244de32140 to your computer and use it in GitHub Desktop.
Experimental code to test fast pixel transfers using PBOs.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <gfx/AsyncUpload.h> | |
namespace gfx { | |
AsyncUpload::AsyncUpload() | |
:width(0) | |
,height(0) | |
,dx(0) | |
,channels(0) | |
,n(0) | |
{ | |
} | |
AsyncUpload::~AsyncUpload() { | |
shutdown(); | |
} | |
int AsyncUpload::init(int w, int h, GLenum fmt) { | |
if (0 == w) { | |
RX_ERROR("Width is 0."); | |
return -1; | |
} | |
if (0 == h) { | |
RX_ERROR("Height is 0."); | |
return -2; | |
} | |
if (0 != width) { | |
RX_ERROR("The width is not 0, did you call shutdown?"); | |
return -3; | |
} | |
if (fmt == GL_RGBA || fmt == GL_RGBA8) { | |
format = GL_RGBA; | |
channels = 4; | |
} | |
else if (fmt == GL_RGB || fmt == GL_RGB8) { | |
format = GL_RGB; | |
channels = 3; | |
} | |
else { | |
RX_ERROR("Format is not GL_RGBA, GL_RGBA8, GL_RGB, GL_RGB8; for now only GL_RGBA, GL_RGB are supported."); | |
return -4; | |
} | |
width = w; | |
height = h; | |
format = fmt; | |
dx = 0; | |
nbytes = width * height * channels; | |
glGenBuffers(ASYNC_UPLOAD_NUM_BUFFERS, pbo); | |
for (int i = 0; i < ASYNC_UPLOAD_NUM_BUFFERS; ++i) { | |
glBindBuffer(GL_PIXEL_UNPACK_BUFFER, pbo[i]); | |
glBufferData(GL_PIXEL_UNPACK_BUFFER, nbytes, NULL, GL_STREAM_DRAW); | |
} | |
glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0); | |
RX_VERBOSE("Created %d pixel unpack buffers that can hold %d bytes", ASYNC_UPLOAD_NUM_BUFFERS, nbytes); | |
return 0; | |
} | |
int AsyncUpload::upload(unsigned char* pixels) { | |
if (0 == width || 0 == height || 0 == channels) { | |
RX_ERROR("Trying to upload pixels; but it looks like we're not yet initialized."); | |
return -1; | |
} | |
if (NULL == pixels) { | |
RX_ERROR("Invalid pixels given; NULL"); | |
return -2; | |
} | |
#if 1 | |
/* fast upload */ | |
dx = n % ASYNC_UPLOAD_NUM_BUFFERS; | |
if (n < ASYNC_UPLOAD_NUM_BUFFERS) { | |
glBindBuffer(GL_PIXEL_UNPACK_BUFFER, pbo[dx]); | |
glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, width, height, format, GL_UNSIGNED_BYTE, NULL); | |
} | |
else { | |
glBindBuffer(GL_PIXEL_UNPACK_BUFFER, pbo[dx]); | |
GLubyte* ptr = (GLubyte*)glMapBuffer(GL_PIXEL_UNPACK_BUFFER, GL_WRITE_ONLY); | |
if (NULL != ptr) { | |
memcpy(ptr, pixels, nbytes); /* takes about 0.0039 seconds for 1920 x 1200 rgba buffer */ | |
glUnmapBuffer(GL_PIXEL_UNPACK_BUFFER); | |
glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, width, height, format, GL_UNSIGNED_BYTE, NULL); | |
} | |
} | |
++n; | |
#else | |
/* non-optimal upload */ | |
glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, width, height, format, GL_UNSIGNED_BYTE, pixels); | |
#endif | |
return 0; | |
} | |
int AsyncUpload::shutdown() { | |
if (0 != width) { | |
RX_ERROR("Cannot shutdown as width is 0. Did you init?"); | |
return -1; | |
} | |
glDeleteBuffers(ASYNC_UPLOAD_NUM_BUFFERS, pbo); | |
width = 0; | |
height = 0; | |
dx = 0; | |
channels = 0; | |
n = 0; | |
return 0; | |
} | |
} /* namespace gfx */ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
--------------------------------------------------------------------------------- | |
oooo | |
`888 | |
oooo d8b .ooooo. oooo ooo 888 oooo oooo | |
`888""8P d88' `88b `88b..8P' 888 `888 `888 | |
888 888 888 Y888' 888 888 888 | |
888 888 888 .o8"'88b 888 888 888 | |
d888b `Y8bod8P' o88' 888o o888o `V88V"V8P' | |
www.roxlu.com | |
www.apollomedia.nl | |
www.twitter.com/roxlu | |
--------------------------------------------------------------------------------- | |
AsyncUpload | |
------------ | |
This class uses a couple of pixel buffer objects to overcome | |
synchronization issues when uploading data to the gpu. It's build for | |
a specific project where we had to upload the complete pixel buffer. | |
Make sure that you use GL_BGRA as format with GL_UNSIGNED_INT_8_8_8_8_REV | |
as type for the texture that you're using (on mac). When you use only 3 | |
channel images, you won't get the optimal upload path and performance is | |
extremelly reduced. | |
Use something like: | |
-- | |
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA8, width, height, 0, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV, NULL); | |
-- | |
*/ | |
#ifndef ASYNC_UPLOAD_H | |
#define ASYNC_UPLOAD_H | |
#include <stdint.h> | |
#include <glad/glad.h> | |
#define ROXLU_USE_LOG | |
#define ROXLU_USE_OPENGL | |
#define ROXLU_USE_MATH | |
#include <tinylib.h> | |
#define ASYNC_UPLOAD_NUM_BUFFERS 3 | |
namespace gfx { | |
class AsyncUpload { | |
public: | |
AsyncUpload(); | |
~AsyncUpload(); | |
int init(int w, int h, GLenum fmt); /* allocates memory; creates GL objects. */ | |
int shutdown(); /* frees all memory; destroys GL objects. */ | |
int upload(unsigned char* pixels); /* upload the given pixels, we assume that you have bound a texture of the same dimensions as our buffers */ | |
public: | |
GLuint pbo[ASYNC_UPLOAD_NUM_BUFFERS]; /* the pbos that are created in init(), and removed in shutdown(). */ | |
GLenum format; /* what format is used */ | |
int dx; /* current index into the pbo array */ | |
int width; /* width of the texture */ | |
int height; /* height of the texture */ | |
int nbytes; /* number of bytes in the PBOs */ | |
int channels; /* number of color channels that are used */ | |
uint64_t n; /* number of uploads, used to 'schedule' what PBO we should use. */ | |
}; | |
} /* namespace gfx */ | |
#endif |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
With 3 PBOs, format GL_BGRA, type: GL_UNSIGNED_INT_8_8_8_8_REV | |
------------------------------------------------------------------- | |
2014.08.28_09.56.55_609: verbose [int main():162] = Took: 0.003976 | |
2014.08.28_09.56.55_623: verbose [int main():162] = Took: 0.003766 | |
2014.08.28_09.56.56_658: verbose [int main():162] = Took: 0.004171 | |
2014.08.28_09.56.56_665: verbose [int main():162] = Took: 0.005319 | |
2014.08.28_09.56.56_673: verbose [int main():162] = Took: 0.003831 | |
2014.08.28_09.56.56_690: verbose [int main():162] = Took: 0.004001 | |
2014.08.28_09.56.56_716: verbose [int main():162] = Took: 0.005537 | |
2014.08.28_09.56.56_726: verbose [int main():162] = Took: 0.003836 | |
2014.08.28_09.56.56_739: verbose [int main():162] = Took: 0.003944 | |
2014.08.28_09.56.56_776: verbose [int main():162] = Took: 0.004286 | |
2014.08.28_09.56.56_783: verbose [int main():162] = Took: 0.005428 | |
2014.08.28_09.56.56_790: verbose [int main():162] = Took: 0.004276 | |
2014.08.28_09.56.56_808: verbose [int main():162] = Took: 0.004208 | |
2014.08.28_09.56.56_833: verbose [int main():162] = Took: 0.005733 | |
2014.08.28_09.56.56_843: verbose [int main():162] = Took: 0.003975 | |
2014.08.28_09.56.56_857: verbose [int main():162] = Took: 0.003847 | |
2014.08.28_09.56.56_892: verbose [int main():162] = Took: 0.004260 | |
2014.08.28_09.56.56_898: verbose [int main():162] = Took: 0.004506 | |
2014.08.28_09.56.56_907: verbose [int main():162] = Took: 0.003997 | |
2014.08.28_09.56.56_924: verbose [int main():162] = Took: 0.003954 | |
2014.08.28_09.56.56_948: verbose [int main():162] = Took: 0.004570 | |
2014.08.28_09.56.56_962: verbose [int main():162] = Took: 0.003556 | |
2014.08.28_09.56.56_975: verbose [int main():162] = Took: 0.004052 | |
2014.08.28_09.56.56_14: verbose [int main():162] = Took: 0.015949 | |
2014.08.28_09.56.56_48: verbose [int main():162] = Took: 0.032679 | |
2014.08.28_09.56.56_61: verbose [int main():162] = Took: 0.005951 | |
2014.08.28_09.56.56_75: verbose [int main():162] = Took: 0.003924 | |
2014.08.28_09.56.56_88: verbose [int main():162] = Took: 0.011031 | |
2014.08.28_09.56.56_100: verbose [int main():162] = Took: 0.004205 | |
2014.08.28_09.56.56_122: verbose [int main():162] = Took: 0.004896 | |
2014.08.28_09.56.56_149: verbose [int main():162] = Took: 0.004883 | |
2014.08.28_09.56.56_173: verbose [int main():162] = Took: 0.004348 | |
2014.08.28_09.56.56_201: verbose [int main():162] = Took: 0.006323 | |
2014.08.28_09.56.56_224: verbose [int main():162] = Took: 0.003962 | |
2014.08.28_09.56.56_270: verbose [int main():162] = Took: 0.004066 | |
2014.08.28_09.56.56_290: verbose [int main():162] = Took: 0.005125 | |
2014.08.28_09.56.56_312: verbose [int main():162] = Took: 0.007200 | |
2014.08.28_09.56.56_346: verbose [int main():162] = Took: 0.003861 | |
2014.08.28_09.56.56_361: verbose [int main():162] = Took: 0.003996 | |
2014.08.28_09.56.56_399: verbose [int main():162] = Took: 0.004490 | |
2014.08.28_09.56.56_411: verbose [int main():162] = Took: 0.003905 | |
2014.08.28_09.56.56_449: verbose [int main():162] = Took: 0.004929 | |
2014.08.28_09.56.56_472: verbose [int main():162] = Took: 0.004919 | |
2014.08.28_09.56.56_504: verbose [int main():162] = Took: 0.004780 | |
2014.08.28_09.56.56_524: verbose [int main():162] = Took: 0.004283 | |
2014.08.28_09.56.56_572: verbose [int main():162] = Took: 0.003920 | |
2014.08.28_09.56.56_606: verbose [int main():162] = Took: 0.004039 | |
2014.08.28_09.56.57_656: verbose [int main():162] = Took: 0.004160 | |
2014.08.28_09.56.57_687: verbose [int main():162] = Took: 0.003985 | |
2014.08.28_09.56.57_720: verbose [int main():162] = Took: 0.005126 | |
2014.08.28_09.56.57_740: verbose [int main():162] = Took: 0.004966 | |
2014.08.28_09.56.57_780: verbose [int main():162] = Took: 0.004602 | |
2014.08.28_09.56.57_796: verbose [int main():162] = Took: 0.003868 | |
2014.08.28_09.56.57_834: verbose [int main():162] = Took: 0.004571 | |
2014.08.28_09.56.57_845: verbose [int main():162] = Took: 0.003883 | |
2014.08.28_09.56.57_876: verbose [int main():162] = Took: 0.007581 | |
2014.08.28_09.56.57_896: verbose [int main():162] = Took: 0.004338 | |
2014.08.28_09.56.57_931: verbose [int main():162] = Took: 0.004243 | |
2014.08.28_09.56.57_946: verbose [int main():162] = Took: 0.003892 | |
2014.08.28_09.56.57_973: verbose [int main():162] = Took: 0.003840 | |
2014.08.28_09.56.57_24: verbose [int main():162] = Took: 0.004088 | |
2014.08.28_09.56.57_55: verbose [int main():162] = Took: 0.003907 | |
2014.08.28_09.56.57_88: verbose [int main():162] = Took: 0.003831 | |
2014.08.28_09.56.57_108: verbose [int main():162] = Took: 0.005098 | |
2014.08.28_09.56.57_152: verbose [int main():162] = Took: 0.004121 | |
2014.08.28_09.56.57_164: verbose [int main():162] = Took: 0.003935 | |
2014.08.28_09.56.57_206: verbose [int main():162] = Took: 0.004132 | |
2014.08.28_09.56.57_216: verbose [int main():162] = Took: 0.004512 | |
2014.08.28_09.56.57_241: verbose [int main():162] = Took: 0.004029 | |
2014.08.28_09.56.57_280: verbose [int main():162] = Took: 0.003882 | |
2014.08.28_09.56.57_296: verbose [int main():162] = Took: 0.003888 | |
2014.08.28_09.56.57_311: verbose [int main():162] = Took: 0.003840 | |
2014.08.28_09.56.57_335: verbose [int main():162] = Took: 0.005672 | |
2014.08.28_09.56.57_362: verbose [int main():162] = Took: 0.004059 | |
2014.08.28_09.56.57_391: verbose [int main():162] = Took: 0.003849 | |
2014.08.28_09.56.57_411: verbose [int main():162] = Took: 0.003838 | |
Using glTexSubImage2D w/o PBOs. Same format as above. | |
------------------------------------------------------------------- | |
2014.08.28_09.57.33_227: verbose [int main():162] = Took: 0.015712 | |
2014.08.28_09.57.33_259: verbose [int main():162] = Took: 0.020895 | |
2014.08.28_09.57.33_283: verbose [int main():162] = Took: 0.021503 | |
2014.08.28_09.57.33_310: verbose [int main():162] = Took: 0.017689 | |
2014.08.28_09.57.33_328: verbose [int main():162] = Took: 0.017360 | |
2014.08.28_09.57.33_359: verbose [int main():162] = Took: 0.029582 | |
2014.08.28_09.57.33_377: verbose [int main():162] = Took: 0.014137 | |
2014.08.28_09.57.33_411: verbose [int main():162] = Took: 0.030622 | |
2014.08.28_09.57.33_424: verbose [int main():162] = Took: 0.009725 | |
2014.08.28_09.57.33_461: verbose [int main():162] = Took: 0.016294 | |
2014.08.28_09.57.33_474: verbose [int main():162] = Took: 0.012158 | |
2014.08.28_09.57.33_495: verbose [int main():162] = Took: 0.019164 | |
2014.08.28_09.57.33_511: verbose [int main():162] = Took: 0.009864 | |
2014.08.28_09.57.33_528: verbose [int main():162] = Took: 0.015752 | |
2014.08.28_09.57.33_541: verbose [int main():162] = Took: 0.011591 | |
2014.08.28_09.57.33_560: verbose [int main():162] = Took: 0.017273 | |
2014.08.28_09.57.33_578: verbose [int main():162] = Took: 0.016341 | |
2014.08.28_09.57.33_592: verbose [int main():162] = Took: 0.012830 | |
2014.08.28_09.57.33_616: verbose [int main():162] = Took: 0.016965 | |
2014.08.28_09.57.34_640: verbose [int main():162] = Took: 0.021996 | |
2014.08.28_09.57.34_662: verbose [int main():162] = Took: 0.004950 | |
2014.08.28_09.57.34_678: verbose [int main():162] = Took: 0.013562 | |
2014.08.28_09.57.34_700: verbose [int main():162] = Took: 0.020186 | |
2014.08.28_09.57.34_716: verbose [int main():162] = Took: 0.009004 | |
2014.08.28_09.57.34_741: verbose [int main():162] = Took: 0.023935 | |
2014.08.28_09.57.34_778: verbose [int main():162] = Took: 0.030341 | |
2014.08.28_09.57.34_796: verbose [int main():162] = Took: 0.016939 | |
2014.08.28_09.57.34_828: verbose [int main():162] = Took: 0.021799 | |
2014.08.28_09.57.34_842: verbose [int main():162] = Took: 0.009358 | |
2014.08.28_09.57.34_878: verbose [int main():162] = Took: 0.019993 | |
2014.08.28_09.57.34_896: verbose [int main():162] = Took: 0.016433 | |
2014.08.28_09.57.34_926: verbose [int main():162] = Took: 0.028643 | |
2014.08.28_09.57.34_945: verbose [int main():162] = Took: 0.014017 | |
2014.08.28_09.57.34_978: verbose [int main():162] = Took: 0.031791 | |
2014.08.28_09.57.34_997: verbose [int main():162] = Took: 0.016920 | |
2014.08.28_09.57.34_16: verbose [int main():162] = Took: 0.018063 | |
2014.08.28_09.57.34_43: verbose [int main():162] = Took: 0.008069 | |
2014.08.28_09.57.34_62: verbose [int main():162] = Took: 0.016659 | |
2014.08.28_09.57.34_95: verbose [int main():162] = Took: 0.031220 | |
2014.08.28_09.57.34_112: verbose [int main():162] = Took: 0.016076 | |
2014.08.28_09.57.34_145: verbose [int main():162] = Took: 0.025039 | |
2014.08.28_09.57.34_163: verbose [int main():162] = Took: 0.016290 | |
2014.08.28_09.57.34_195: verbose [int main():162] = Took: 0.028162 | |
2014.08.28_09.57.34_213: verbose [int main():162] = Took: 0.016082 | |
2014.08.28_09.57.34_245: verbose [int main():162] = Took: 0.031331 | |
2014.08.28_09.57.34_264: verbose [int main():162] = Took: 0.016281 | |
2014.08.28_09.57.34_296: verbose [int main():162] = Took: 0.020673 | |
2014.08.28_09.57.34_313: verbose [int main():162] = Took: 0.015207 | |
2014.08.28_09.57.34_346: verbose [int main():162] = Took: 0.031821 | |
2014.08.28_09.57.34_368: verbose [int main():162] = Took: 0.020428 | |
2014.08.28_09.57.34_397: verbose [int main():162] = Took: 0.022230 | |
2014.08.28_09.57.34_413: verbose [int main():162] = Took: 0.014551 | |
2014.08.28_09.57.34_446: verbose [int main():162] = Took: 0.032402 | |
2014.08.28_09.57.34_466: verbose [int main():162] = Took: 0.017916 | |
2014.08.28_09.57.34_496: verbose [int main():162] = Took: 0.028936 | |
2014.08.28_09.57.34_514: verbose [int main():162] = Took: 0.013146 | |
2014.08.28_09.57.34_546: verbose [int main():162] = Took: 0.022253 | |
2014.08.28_09.57.34_563: verbose [int main():162] = Took: 0.013557 | |
2014.08.28_09.57.34_594: verbose [int main():162] = Took: 0.029427 | |
2014.08.28_09.57.34_610: verbose [int main():162] = Took: 0.013754 | |
2014.08.28_09.57.35_647: verbose [int main():162] = Took: 0.019401 | |
2014.08.28_09.57.35_667: verbose [int main():162] = Took: 0.018647 | |
2014.08.28_09.57.35_695: verbose [int main():162] = Took: 0.024147 | |
2014.08.28_09.57.35_711: verbose [int main():162] = Took: 0.013650 | |
2014.08.28_09.57.35_747: verbose [int main():162] = Took: 0.019825 | |
2014.08.28_09.57.35_761: verbose [int main():162] = Took: 0.010776 | |
2014.08.28_09.57.35_779: verbose [int main():162] = Took: 0.015481 | |
2014.08.28_09.57.35_797: verbose [int main():162] = Took: 0.016900 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Can you add the main function you're using? I think you get such differing upload times because you don't wait for the completion of the uploads for your time measurements.