5555
5656#include < cstdlib>
5757
58+ #define CHECK_CL_ERROR (err, str ) do {if (err != CL_SUCCESS) {LOG_ERROR << str << " failed: " << err; return false ; } } while (0 )
59+ #define LOG_CL_ERROR (err, str ) if (err != CL_SUCCESS) LOG_ERROR << str << " failed: " << err
60+
5861namespace libfreenect2
5962{
6063
@@ -72,13 +75,75 @@ std::string loadCLSource(const std::string &filename)
7275 return std::string (reinterpret_cast <const char *>(data), length);
7376}
7477
78+ class OpenCLDepthPacketProcessorImpl ;
79+
80+ class OpenCLBuffer : public Buffer
81+ {
82+ public:
83+ cl::Buffer buffer;
84+ };
85+
86+ class OpenCLAllocator : public Allocator
87+ {
88+ private:
89+ OpenCLDepthPacketProcessorImpl *impl_;
90+ cl::Buffer buffer;
91+
92+ bool allocate_opencl (OpenCLBuffer *b, size_t size);
93+
94+ public:
95+ OpenCLAllocator (OpenCLDepthPacketProcessorImpl *impl_) : impl_(impl_)
96+ {
97+ }
98+
99+ virtual Buffer *allocate (size_t size)
100+ {
101+ OpenCLBuffer *b = new OpenCLBuffer ();
102+ if (!allocate_opencl (b, size)) {
103+ delete b;
104+ b = NULL ;
105+ }
106+ return b;
107+ }
108+
109+ virtual void free (Buffer *b)
110+ {
111+ if (b == NULL || b->data == NULL )
112+ return ;
113+ delete b;
114+ }
115+ };
116+
117+ class OpenCLFrame : public Frame
118+ {
119+ bool allocate_opencl (size_t size, OpenCLDepthPacketProcessorImpl *impl_);
120+
121+ public:
122+ cl::Buffer frameBuffer;
123+
124+ OpenCLFrame (size_t width, size_t height, size_t bytes_per_pixel, OpenCLDepthPacketProcessorImpl *impl_):
125+ Frame (width, height, bytes_per_pixel, (unsigned char *)-1 )
126+ {
127+ data = NULL ;
128+
129+ size_t size = width*height*bytes_per_pixel;
130+ allocate_opencl (size, impl_);
131+ }
132+
133+ virtual ~OpenCLFrame ()
134+ {
135+ data = NULL ;
136+ }
137+ };
138+
75139class OpenCLDepthPacketProcessorImpl : public WithPerfLogging
76140{
77141public:
78142 libfreenect2::DepthPacketProcessor::Config config;
79143 DepthPacketProcessor::Parameters params;
80144
81145 Frame *ir_frame, *depth_frame;
146+ Allocator *allocator;
82147
83148 cl::Context context;
84149 cl::Device device;
@@ -148,10 +213,12 @@ class OpenCLDepthPacketProcessorImpl: public WithPerfLogging
148213 setenv (" OCL_STRICT_CONFORMANCE" , " 0" , 0 );
149214#endif
150215
216+ deviceInitialized = initDevice (deviceId);
217+
151218 newIrFrame ();
152219 newDepthFrame ();
153220
154- deviceInitialized = initDevice (deviceId );
221+ allocator = new PoolAllocator ( new OpenCLAllocator ( this ) );
155222
156223 const int CL_ICDL_VERSION = 2 ;
157224 typedef cl_int (*icdloader_func)(int , size_t , void *, size_t *);
@@ -181,6 +248,7 @@ class OpenCLDepthPacketProcessorImpl: public WithPerfLogging
181248
182249 ~OpenCLDepthPacketProcessorImpl ()
183250 {
251+ delete allocator;
184252 delete ir_frame;
185253 delete depth_frame;
186254 }
@@ -238,6 +306,8 @@ class OpenCLDepthPacketProcessorImpl: public WithPerfLogging
238306
239307 oss << " -D MIN_DEPTH=" << config.MinDepth * 1000 .0f << " f" ;
240308 oss << " -D MAX_DEPTH=" << config.MaxDepth * 1000 .0f << " f" ;
309+
310+ oss << " -cl-mad-enable -cl-no-signed-zeros -cl-fast-relaxed-math" ;
241311 options = oss.str ();
242312 }
243313
@@ -320,9 +390,6 @@ class OpenCLDepthPacketProcessorImpl: public WithPerfLogging
320390 return selected;
321391 }
322392
323- #define CHECK_CL_ERROR (err, str ) do {if (err != CL_SUCCESS) {LOG_ERROR << str << " failed: " << err; return false ; } } while (0 )
324- #define LOG_CL_ERROR (err, str ) if (err != CL_SUCCESS) LOG_ERROR << str << " failed: " << err
325-
326393 bool initDevice (const int deviceId)
327394 {
328395 if (!readProgram (sourceCode))
@@ -373,17 +440,17 @@ class OpenCLDepthPacketProcessorImpl: public WithPerfLogging
373440 buf_z_table_size = image_size * sizeof (cl_float);
374441 buf_packet_size = ((image_size * 11 ) / 16 ) * 10 * sizeof (cl_ushort);
375442
376- buf_lut11to16 = cl::Buffer (context, CL_READ_ONLY_CACHE , buf_lut11to16_size, NULL , &err);
443+ buf_lut11to16 = cl::Buffer (context, CL_MEM_READ_ONLY , buf_lut11to16_size, NULL , &err);
377444 CHECK_CL_ERROR (err, " cl::Buffer" );
378- buf_p0_sin_table = cl::Buffer (context, CL_READ_ONLY_CACHE , buf_p0_table_size, NULL , &err);
445+ buf_p0_sin_table = cl::Buffer (context, CL_MEM_READ_ONLY , buf_p0_table_size, NULL , &err);
379446 CHECK_CL_ERROR (err, " cl::Buffer" );
380- buf_p0_cos_table = cl::Buffer (context, CL_READ_ONLY_CACHE , buf_p0_table_size, NULL , &err);
447+ buf_p0_cos_table = cl::Buffer (context, CL_MEM_READ_ONLY , buf_p0_table_size, NULL , &err);
381448 CHECK_CL_ERROR (err, " cl::Buffer" );
382- buf_x_table = cl::Buffer (context, CL_READ_ONLY_CACHE , buf_x_table_size, NULL , &err);
449+ buf_x_table = cl::Buffer (context, CL_MEM_READ_ONLY , buf_x_table_size, NULL , &err);
383450 CHECK_CL_ERROR (err, " cl::Buffer" );
384- buf_z_table = cl::Buffer (context, CL_READ_ONLY_CACHE , buf_z_table_size, NULL , &err);
451+ buf_z_table = cl::Buffer (context, CL_MEM_READ_ONLY , buf_z_table_size, NULL , &err);
385452 CHECK_CL_ERROR (err, " cl::Buffer" );
386- buf_packet = cl::Buffer (context, CL_READ_ONLY_CACHE , buf_packet_size, NULL , &err);
453+ buf_packet = cl::Buffer (context, CL_MEM_READ_ONLY , buf_packet_size, NULL , &err);
387454 CHECK_CL_ERROR (err, " cl::Buffer" );
388455
389456 // Read-Write
@@ -398,25 +465,25 @@ class OpenCLDepthPacketProcessorImpl: public WithPerfLogging
398465 buf_ir_sum_size = image_size * sizeof (cl_float);
399466 buf_filtered_size = image_size * sizeof (cl_float);
400467
401- buf_a = cl::Buffer (context, CL_READ_WRITE_CACHE , buf_a_size, NULL , &err);
468+ buf_a = cl::Buffer (context, CL_MEM_READ_WRITE , buf_a_size, NULL , &err);
402469 CHECK_CL_ERROR (err, " cl::Buffer" );
403- buf_b = cl::Buffer (context, CL_READ_WRITE_CACHE , buf_b_size, NULL , &err);
470+ buf_b = cl::Buffer (context, CL_MEM_READ_WRITE , buf_b_size, NULL , &err);
404471 CHECK_CL_ERROR (err, " cl::Buffer" );
405- buf_n = cl::Buffer (context, CL_READ_WRITE_CACHE , buf_n_size, NULL , &err);
472+ buf_n = cl::Buffer (context, CL_MEM_READ_WRITE , buf_n_size, NULL , &err);
406473 CHECK_CL_ERROR (err, " cl::Buffer" );
407- buf_ir = cl::Buffer (context, CL_READ_WRITE_CACHE , buf_ir_size, NULL , &err);
474+ buf_ir = cl::Buffer (context, CL_MEM_READ_WRITE , buf_ir_size, NULL , &err);
408475 CHECK_CL_ERROR (err, " cl::Buffer" );
409- buf_a_filtered = cl::Buffer (context, CL_READ_WRITE_CACHE , buf_a_filtered_size, NULL , &err);
476+ buf_a_filtered = cl::Buffer (context, CL_MEM_READ_WRITE , buf_a_filtered_size, NULL , &err);
410477 CHECK_CL_ERROR (err, " cl::Buffer" );
411- buf_b_filtered = cl::Buffer (context, CL_READ_WRITE_CACHE , buf_b_filtered_size, NULL , &err);
478+ buf_b_filtered = cl::Buffer (context, CL_MEM_READ_WRITE , buf_b_filtered_size, NULL , &err);
412479 CHECK_CL_ERROR (err, " cl::Buffer" );
413- buf_edge_test = cl::Buffer (context, CL_READ_WRITE_CACHE , buf_edge_test_size, NULL , &err);
480+ buf_edge_test = cl::Buffer (context, CL_MEM_READ_WRITE , buf_edge_test_size, NULL , &err);
414481 CHECK_CL_ERROR (err, " cl::Buffer" );
415- buf_depth = cl::Buffer (context, CL_READ_WRITE_CACHE , buf_depth_size, NULL , &err);
482+ buf_depth = cl::Buffer (context, CL_MEM_READ_WRITE , buf_depth_size, NULL , &err);
416483 CHECK_CL_ERROR (err, " cl::Buffer" );
417- buf_ir_sum = cl::Buffer (context, CL_READ_WRITE_CACHE , buf_ir_sum_size, NULL , &err);
484+ buf_ir_sum = cl::Buffer (context, CL_MEM_READ_WRITE , buf_ir_sum_size, NULL , &err);
418485 CHECK_CL_ERROR (err, " cl::Buffer" );
419- buf_filtered = cl::Buffer (context, CL_READ_WRITE_CACHE , buf_filtered_size, NULL , &err);
486+ buf_filtered = cl::Buffer (context, CL_MEM_WRITE_ONLY , buf_filtered_size, NULL , &err);
420487 CHECK_CL_ERROR (err, " cl::Buffer" );
421488
422489 return true ;
@@ -507,7 +574,7 @@ class OpenCLDepthPacketProcessorImpl: public WithPerfLogging
507574 cl::Event event0, event1;
508575
509576 err = queue.enqueueWriteBuffer (buf_packet, CL_FALSE, 0 , buf_packet_size, packet.buffer , NULL , &eventWrite[0 ]);
510- CHECK_CL_ERROR (err, " enqueueWriteBuffer " );
577+ CHECK_CL_ERROR (err, " enqueueMapBuffer " );
511578
512579 err = queue.enqueueNDRangeKernel (kernel_processPixelStage1, cl::NullRange, cl::NDRange (image_size), cl::NullRange, &eventWrite, &eventPPS1[0 ]);
513580 CHECK_CL_ERROR (err, " enqueueNDRangeKernel" );
@@ -582,12 +649,12 @@ class OpenCLDepthPacketProcessorImpl: public WithPerfLogging
582649
583650 void newIrFrame ()
584651 {
585- ir_frame = new Frame (512 , 424 , 4 );
652+ ir_frame = new OpenCLFrame (512 , 424 , 4 , this );
586653 }
587654
588655 void newDepthFrame ()
589656 {
590- depth_frame = new Frame (512 , 424 , 4 );
657+ depth_frame = new OpenCLFrame (512 , 424 , 4 , this );
591658 }
592659
593660 void fill_trig_table (const libfreenect2::protocol::P0TablesResponse *p0table)
@@ -679,6 +746,39 @@ class OpenCLDepthPacketProcessorImpl: public WithPerfLogging
679746 }
680747};
681748
749+ bool OpenCLFrame::allocate_opencl (size_t size, OpenCLDepthPacketProcessorImpl *impl_)
750+ {
751+ if (!impl_->deviceInitialized )
752+ return false ;
753+
754+ cl_int err = CL_SUCCESS;
755+
756+ frameBuffer = cl::Buffer (impl_->context , CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR, size, NULL , &err);
757+ CHECK_CL_ERROR (err, " cl::Buffer" );
758+
759+ data = (unsigned char *)impl_->queue .enqueueMapBuffer (frameBuffer, CL_TRUE, CL_MAP_READ, 0 , size, NULL , NULL , &err);
760+ CHECK_CL_ERROR (err, " cl::Buffer" );
761+ return true ;
762+ }
763+
764+ bool OpenCLAllocator::allocate_opencl (OpenCLBuffer *b, size_t size)
765+ {
766+ if (!impl_->deviceInitialized )
767+ return false ;
768+
769+ cl_int err = CL_SUCCESS;
770+
771+ b->buffer = cl::Buffer (impl_->context , CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR, size, NULL , &err);
772+ CHECK_CL_ERROR (err, " cl::Buffer" );
773+
774+ b->data = (unsigned char *)impl_->queue .enqueueMapBuffer (b->buffer , CL_TRUE, CL_MAP_WRITE, 0 , size, NULL , NULL , &err);
775+ CHECK_CL_ERROR (err, " enqueueMapBuffer" );
776+
777+ b->length = 0 ;
778+ b->capacity = size;
779+ return true ;
780+ }
781+
682782OpenCLDepthPacketProcessor::OpenCLDepthPacketProcessor (const int deviceId) :
683783 impl_ (new OpenCLDepthPacketProcessorImpl(deviceId))
684784{
@@ -770,5 +870,9 @@ void OpenCLDepthPacketProcessor::process(const DepthPacket &packet)
770870 }
771871}
772872
873+ Allocator *OpenCLDepthPacketProcessor::getAllocator ()
874+ {
875+ return impl_->allocator ;
876+ }
773877} /* namespace libfreenect2 */
774878
0 commit comments