Skip to content

Commit 4c699a2

Browse files
Thiemo Wiedemeyerxlz
authored andcommitted
opencl: Use pinned memory buffers and frames
1 parent 540d369 commit 4c699a2

2 files changed

Lines changed: 129 additions & 23 deletions

File tree

include/internal/libfreenect2/depth_packet_processor.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -177,6 +177,8 @@ class OpenCLDepthPacketProcessor : public DepthPacketProcessor
177177

178178
virtual const char *name() { return "OpenCL"; }
179179
virtual void process(const DepthPacket &packet);
180+
protected:
181+
virtual Allocator *getAllocator();
180182
private:
181183
OpenCLDepthPacketProcessorImpl *impl_;
182184
};

src/opencl_depth_packet_processor.cpp

Lines changed: 127 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,9 @@
5555

5656
#include <cstdlib>
5757

58+
#define CHECK_CL_ERROR(err, str) do {if (err != CL_SUCCESS) {LOG_ERROR << str << " failed: " << err; return false; } } while(0)
59+
#define LOG_CL_ERROR(err, str) if (err != CL_SUCCESS) LOG_ERROR << str << " failed: " << err
60+
5861
namespace libfreenect2
5962
{
6063

@@ -72,13 +75,75 @@ std::string loadCLSource(const std::string &filename)
7275
return std::string(reinterpret_cast<const char *>(data), length);
7376
}
7477

78+
class OpenCLDepthPacketProcessorImpl;
79+
80+
class OpenCLBuffer: public Buffer
81+
{
82+
public:
83+
cl::Buffer buffer;
84+
};
85+
86+
class OpenCLAllocator: public Allocator
87+
{
88+
private:
89+
OpenCLDepthPacketProcessorImpl *impl_;
90+
cl::Buffer buffer;
91+
92+
bool allocate_opencl(OpenCLBuffer *b, size_t size);
93+
94+
public:
95+
OpenCLAllocator(OpenCLDepthPacketProcessorImpl *impl_) : impl_(impl_)
96+
{
97+
}
98+
99+
virtual Buffer *allocate(size_t size)
100+
{
101+
OpenCLBuffer *b = new OpenCLBuffer();
102+
if (!allocate_opencl(b, size)) {
103+
delete b;
104+
b = NULL;
105+
}
106+
return b;
107+
}
108+
109+
virtual void free(Buffer *b)
110+
{
111+
if (b == NULL || b->data == NULL)
112+
return;
113+
delete b;
114+
}
115+
};
116+
117+
class OpenCLFrame: public Frame
118+
{
119+
bool allocate_opencl(size_t size, OpenCLDepthPacketProcessorImpl *impl_);
120+
121+
public:
122+
cl::Buffer frameBuffer;
123+
124+
OpenCLFrame(size_t width, size_t height, size_t bytes_per_pixel, OpenCLDepthPacketProcessorImpl *impl_):
125+
Frame(width, height, bytes_per_pixel, (unsigned char*)-1)
126+
{
127+
data = NULL;
128+
129+
size_t size = width*height*bytes_per_pixel;
130+
allocate_opencl(size, impl_);
131+
}
132+
133+
virtual ~OpenCLFrame()
134+
{
135+
data = NULL;
136+
}
137+
};
138+
75139
class OpenCLDepthPacketProcessorImpl: public WithPerfLogging
76140
{
77141
public:
78142
libfreenect2::DepthPacketProcessor::Config config;
79143
DepthPacketProcessor::Parameters params;
80144

81145
Frame *ir_frame, *depth_frame;
146+
Allocator *allocator;
82147

83148
cl::Context context;
84149
cl::Device device;
@@ -148,10 +213,12 @@ class OpenCLDepthPacketProcessorImpl: public WithPerfLogging
148213
setenv("OCL_STRICT_CONFORMANCE", "0", 0);
149214
#endif
150215

216+
deviceInitialized = initDevice(deviceId);
217+
151218
newIrFrame();
152219
newDepthFrame();
153220

154-
deviceInitialized = initDevice(deviceId);
221+
allocator = new PoolAllocator(new OpenCLAllocator(this));
155222

156223
const int CL_ICDL_VERSION = 2;
157224
typedef cl_int (*icdloader_func)(int, size_t, void*, size_t*);
@@ -181,6 +248,7 @@ class OpenCLDepthPacketProcessorImpl: public WithPerfLogging
181248

182249
~OpenCLDepthPacketProcessorImpl()
183250
{
251+
delete allocator;
184252
delete ir_frame;
185253
delete depth_frame;
186254
}
@@ -238,6 +306,8 @@ class OpenCLDepthPacketProcessorImpl: public WithPerfLogging
238306

239307
oss << " -D MIN_DEPTH=" << config.MinDepth * 1000.0f << "f";
240308
oss << " -D MAX_DEPTH=" << config.MaxDepth * 1000.0f << "f";
309+
310+
oss << " -cl-mad-enable -cl-no-signed-zeros -cl-fast-relaxed-math";
241311
options = oss.str();
242312
}
243313

@@ -320,9 +390,6 @@ class OpenCLDepthPacketProcessorImpl: public WithPerfLogging
320390
return selected;
321391
}
322392

323-
#define CHECK_CL_ERROR(err, str) do {if (err != CL_SUCCESS) {LOG_ERROR << str << " failed: " << err; return false; } } while(0)
324-
#define LOG_CL_ERROR(err, str) if (err != CL_SUCCESS) LOG_ERROR << str << " failed: " << err
325-
326393
bool initDevice(const int deviceId)
327394
{
328395
if(!readProgram(sourceCode))
@@ -373,17 +440,17 @@ class OpenCLDepthPacketProcessorImpl: public WithPerfLogging
373440
buf_z_table_size = image_size * sizeof(cl_float);
374441
buf_packet_size = ((image_size * 11) / 16) * 10 * sizeof(cl_ushort);
375442

376-
buf_lut11to16 = cl::Buffer(context, CL_READ_ONLY_CACHE, buf_lut11to16_size, NULL, &err);
443+
buf_lut11to16 = cl::Buffer(context, CL_MEM_READ_ONLY, buf_lut11to16_size, NULL, &err);
377444
CHECK_CL_ERROR(err, "cl::Buffer");
378-
buf_p0_sin_table = cl::Buffer(context, CL_READ_ONLY_CACHE, buf_p0_table_size, NULL, &err);
445+
buf_p0_sin_table = cl::Buffer(context, CL_MEM_READ_ONLY, buf_p0_table_size, NULL, &err);
379446
CHECK_CL_ERROR(err, "cl::Buffer");
380-
buf_p0_cos_table = cl::Buffer(context, CL_READ_ONLY_CACHE, buf_p0_table_size, NULL, &err);
447+
buf_p0_cos_table = cl::Buffer(context, CL_MEM_READ_ONLY, buf_p0_table_size, NULL, &err);
381448
CHECK_CL_ERROR(err, "cl::Buffer");
382-
buf_x_table = cl::Buffer(context, CL_READ_ONLY_CACHE, buf_x_table_size, NULL, &err);
449+
buf_x_table = cl::Buffer(context, CL_MEM_READ_ONLY, buf_x_table_size, NULL, &err);
383450
CHECK_CL_ERROR(err, "cl::Buffer");
384-
buf_z_table = cl::Buffer(context, CL_READ_ONLY_CACHE, buf_z_table_size, NULL, &err);
451+
buf_z_table = cl::Buffer(context, CL_MEM_READ_ONLY, buf_z_table_size, NULL, &err);
385452
CHECK_CL_ERROR(err, "cl::Buffer");
386-
buf_packet = cl::Buffer(context, CL_READ_ONLY_CACHE, buf_packet_size, NULL, &err);
453+
buf_packet = cl::Buffer(context, CL_MEM_READ_ONLY, buf_packet_size, NULL, &err);
387454
CHECK_CL_ERROR(err, "cl::Buffer");
388455

389456
//Read-Write
@@ -398,25 +465,25 @@ class OpenCLDepthPacketProcessorImpl: public WithPerfLogging
398465
buf_ir_sum_size = image_size * sizeof(cl_float);
399466
buf_filtered_size = image_size * sizeof(cl_float);
400467

401-
buf_a = cl::Buffer(context, CL_READ_WRITE_CACHE, buf_a_size, NULL, &err);
468+
buf_a = cl::Buffer(context, CL_MEM_READ_WRITE, buf_a_size, NULL, &err);
402469
CHECK_CL_ERROR(err, "cl::Buffer");
403-
buf_b = cl::Buffer(context, CL_READ_WRITE_CACHE, buf_b_size, NULL, &err);
470+
buf_b = cl::Buffer(context, CL_MEM_READ_WRITE, buf_b_size, NULL, &err);
404471
CHECK_CL_ERROR(err, "cl::Buffer");
405-
buf_n = cl::Buffer(context, CL_READ_WRITE_CACHE, buf_n_size, NULL, &err);
472+
buf_n = cl::Buffer(context, CL_MEM_READ_WRITE, buf_n_size, NULL, &err);
406473
CHECK_CL_ERROR(err, "cl::Buffer");
407-
buf_ir = cl::Buffer(context, CL_READ_WRITE_CACHE, buf_ir_size, NULL, &err);
474+
buf_ir = cl::Buffer(context, CL_MEM_READ_WRITE, buf_ir_size, NULL, &err);
408475
CHECK_CL_ERROR(err, "cl::Buffer");
409-
buf_a_filtered = cl::Buffer(context, CL_READ_WRITE_CACHE, buf_a_filtered_size, NULL, &err);
476+
buf_a_filtered = cl::Buffer(context, CL_MEM_READ_WRITE, buf_a_filtered_size, NULL, &err);
410477
CHECK_CL_ERROR(err, "cl::Buffer");
411-
buf_b_filtered = cl::Buffer(context, CL_READ_WRITE_CACHE, buf_b_filtered_size, NULL, &err);
478+
buf_b_filtered = cl::Buffer(context, CL_MEM_READ_WRITE, buf_b_filtered_size, NULL, &err);
412479
CHECK_CL_ERROR(err, "cl::Buffer");
413-
buf_edge_test = cl::Buffer(context, CL_READ_WRITE_CACHE, buf_edge_test_size, NULL, &err);
480+
buf_edge_test = cl::Buffer(context, CL_MEM_READ_WRITE, buf_edge_test_size, NULL, &err);
414481
CHECK_CL_ERROR(err, "cl::Buffer");
415-
buf_depth = cl::Buffer(context, CL_READ_WRITE_CACHE, buf_depth_size, NULL, &err);
482+
buf_depth = cl::Buffer(context, CL_MEM_READ_WRITE, buf_depth_size, NULL, &err);
416483
CHECK_CL_ERROR(err, "cl::Buffer");
417-
buf_ir_sum = cl::Buffer(context, CL_READ_WRITE_CACHE, buf_ir_sum_size, NULL, &err);
484+
buf_ir_sum = cl::Buffer(context, CL_MEM_READ_WRITE, buf_ir_sum_size, NULL, &err);
418485
CHECK_CL_ERROR(err, "cl::Buffer");
419-
buf_filtered = cl::Buffer(context, CL_READ_WRITE_CACHE, buf_filtered_size, NULL, &err);
486+
buf_filtered = cl::Buffer(context, CL_MEM_WRITE_ONLY, buf_filtered_size, NULL, &err);
420487
CHECK_CL_ERROR(err, "cl::Buffer");
421488

422489
return true;
@@ -507,7 +574,7 @@ class OpenCLDepthPacketProcessorImpl: public WithPerfLogging
507574
cl::Event event0, event1;
508575

509576
err = queue.enqueueWriteBuffer(buf_packet, CL_FALSE, 0, buf_packet_size, packet.buffer, NULL, &eventWrite[0]);
510-
CHECK_CL_ERROR(err, "enqueueWriteBuffer");
577+
CHECK_CL_ERROR(err, "enqueueMapBuffer");
511578

512579
err = queue.enqueueNDRangeKernel(kernel_processPixelStage1, cl::NullRange, cl::NDRange(image_size), cl::NullRange, &eventWrite, &eventPPS1[0]);
513580
CHECK_CL_ERROR(err, "enqueueNDRangeKernel");
@@ -582,12 +649,12 @@ class OpenCLDepthPacketProcessorImpl: public WithPerfLogging
582649

583650
void newIrFrame()
584651
{
585-
ir_frame = new Frame(512, 424, 4);
652+
ir_frame = new OpenCLFrame(512, 424, 4, this);
586653
}
587654

588655
void newDepthFrame()
589656
{
590-
depth_frame = new Frame(512, 424, 4);
657+
depth_frame = new OpenCLFrame(512, 424, 4, this);
591658
}
592659

593660
void fill_trig_table(const libfreenect2::protocol::P0TablesResponse *p0table)
@@ -679,6 +746,39 @@ class OpenCLDepthPacketProcessorImpl: public WithPerfLogging
679746
}
680747
};
681748

749+
bool OpenCLFrame::allocate_opencl(size_t size, OpenCLDepthPacketProcessorImpl *impl_)
750+
{
751+
if(!impl_->deviceInitialized)
752+
return false;
753+
754+
cl_int err = CL_SUCCESS;
755+
756+
frameBuffer = cl::Buffer(impl_->context, CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR, size, NULL, &err);
757+
CHECK_CL_ERROR(err, "cl::Buffer");
758+
759+
data = (unsigned char*)impl_->queue.enqueueMapBuffer(frameBuffer, CL_TRUE, CL_MAP_READ, 0, size, NULL, NULL, &err);
760+
CHECK_CL_ERROR(err, "cl::Buffer");
761+
return true;
762+
}
763+
764+
bool OpenCLAllocator::allocate_opencl(OpenCLBuffer *b, size_t size)
765+
{
766+
if(!impl_->deviceInitialized)
767+
return false;
768+
769+
cl_int err = CL_SUCCESS;
770+
771+
b->buffer = cl::Buffer(impl_->context, CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR, size, NULL, &err);
772+
CHECK_CL_ERROR(err, "cl::Buffer");
773+
774+
b->data = (unsigned char*)impl_->queue.enqueueMapBuffer(b->buffer, CL_TRUE, CL_MAP_WRITE, 0, size, NULL, NULL, &err);
775+
CHECK_CL_ERROR(err, "enqueueMapBuffer");
776+
777+
b->length = 0;
778+
b->capacity = size;
779+
return true;
780+
}
781+
682782
OpenCLDepthPacketProcessor::OpenCLDepthPacketProcessor(const int deviceId) :
683783
impl_(new OpenCLDepthPacketProcessorImpl(deviceId))
684784
{
@@ -770,5 +870,9 @@ void OpenCLDepthPacketProcessor::process(const DepthPacket &packet)
770870
}
771871
}
772872

873+
Allocator *OpenCLDepthPacketProcessor::getAllocator()
874+
{
875+
return impl_->allocator;
876+
}
773877
} /* namespace libfreenect2 */
774878

0 commit comments

Comments
 (0)