Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
65 changes: 22 additions & 43 deletions src/common/opencl.c
Original file line number Diff line number Diff line change
Expand Up @@ -327,7 +327,7 @@ gboolean dt_opencl_use_pinned_memory(const int devid)

void dt_opencl_write_device_config(const int devid)
{
if(devid < 0) return;
if(devid <= DT_DEVICE_CPU) return;

/* As we have floats as per-device parameters we keep track of current locale
and do conversions via "C" here and while reading device config
Expand Down Expand Up @@ -366,7 +366,7 @@ void dt_opencl_write_device_config(const int devid)

gboolean dt_opencl_read_device_config(const int devid)
{
if(devid < 0) return FALSE;
if(devid <= DT_DEVICE_CPU) return FALSE;

gchar *locale = g_strdup(setlocale(LC_ALL, NULL));
setlocale(LC_NUMERIC, "C");
Expand Down Expand Up @@ -1698,7 +1698,7 @@ static const char *_opencl_get_vendor_by_id(const unsigned int id)
gboolean dt_opencl_finish(const int devid)
{
dt_opencl_t *cl = darktable.opencl;
if(!cl->inited || devid < 0) return FALSE;
if(!cl->inited || devid <= DT_DEVICE_CPU) return FALSE;

const cl_int err = (cl->dlocl->symbols->dt_clFinish)(cl->dev[devid].cmd_queue);

Expand All @@ -1713,7 +1713,7 @@ gboolean dt_opencl_finish_sync_pipe(const int devid,
const int pipetype)
{
dt_opencl_t *cl = darktable.opencl;
if(!cl->inited || devid < 0) return FALSE;
if(!cl->inited || devid <= DT_DEVICE_CPU) return FALSE;

const gboolean exporting = pipetype & DT_DEV_PIXELPIPE_EXPORT;
const gboolean asyncmode = cl->dev[devid].asyncmode;
Expand Down Expand Up @@ -2596,7 +2596,7 @@ static gboolean _check_kernel(const int dev,
{
dt_opencl_t *cl = darktable.opencl;

if(!cl->inited || dev < 0) return FALSE;
if(!cl->inited || dev <= DT_DEVICE_CPU) return FALSE;
if(kernel < 0 || kernel >= DT_OPENCL_MAX_KERNELS) return FALSE;

if(cl->dev[dev].kernel_used[kernel]) return TRUE;
Expand Down Expand Up @@ -2646,7 +2646,7 @@ int dt_opencl_get_max_work_item_sizes(const int dev,
size_t *sizes)
{
dt_opencl_t *cl = darktable.opencl;
if(!cl->inited || dev < 0) return CL_DEVICE_NOT_AVAILABLE;
if(!cl->inited || dev <= DT_DEVICE_CPU) return CL_DEVICE_NOT_AVAILABLE;
return (cl->dlocl->symbols->dt_clGetDeviceInfo)(cl->dev[dev].devid,
CL_DEVICE_MAX_WORK_ITEM_SIZES,
sizeof(size_t) * 3, sizes, NULL);
Expand All @@ -2659,7 +2659,7 @@ int dt_opencl_get_work_group_limits(const int dev,
unsigned long *localmemsize)
{
dt_opencl_t *cl = darktable.opencl;
if(!cl->inited || dev < 0) return CL_DEVICE_NOT_AVAILABLE;
if(!cl->inited || dev <= DT_DEVICE_CPU) return CL_DEVICE_NOT_AVAILABLE;

if(cl->dev[dev].local_size == 0) return CL_INVALID_WORK_DIMENSION;
*localmemsize = cl->dev[dev].local_size;
Expand Down Expand Up @@ -2870,24 +2870,14 @@ int dt_opencl_copy_device_to_host(const int devid,
const int width,
const int height,
const int bpp)
{
return dt_opencl_read_host_from_device_rowpitch(devid, host, device,
width, height, bpp * width);
}

int dt_opencl_read_host_from_device_rowpitch(const int devid,
void *host,
void *device,
const int width,
const int height,
const int rowpitch)
{
if(!_cldev_running(devid))
return DT_OPENCL_NODEVICE;

const size_t region[2] = { width, height };
// blocking.
return dt_opencl_read_host_from_device_raw(devid, host, device, CLIMG_ORIGIN,
region, rowpitch, TRUE);
region, (size_t)width * bpp, TRUE);
}

int dt_opencl_read_host_from_device_raw(const int devid,
Expand Down Expand Up @@ -2931,25 +2921,14 @@ int dt_opencl_write_host_to_device(const int devid,
const int width,
const int height,
const int bpp)
{
return dt_opencl_write_host_to_device_rowpitch(devid, host, device,
width, height, width * bpp);
}

int dt_opencl_write_host_to_device_rowpitch(const int devid,
const void *host,
void *device,
const int width,
const int height,
const int rowpitch)
{
if(!_cldev_running(devid))
return DT_OPENCL_NODEVICE;

const size_t region[2] = { width, height };
// blocking.
return dt_opencl_write_host_to_device_raw(devid, host, device, CLIMG_ORIGIN,
region, rowpitch, TRUE);
region, (size_t)width * bpp, TRUE);
}

int dt_opencl_write_host_to_device_raw(const int devid,
Expand Down Expand Up @@ -3276,8 +3255,8 @@ void *dt_opencl_map_buffer(const int devid,
cl_mem buffer,
const gboolean blocking,
const int flags,
size_t offset,
size_t size)
const size_t offset,
const size_t size)
{
if(!_cldev_running(devid))
return NULL;
Expand Down Expand Up @@ -3545,10 +3524,10 @@ void dt_opencl_memory_statistics(int devid,
if(!((darktable.unmuted & DT_DEBUG_MEMORY) && (darktable.unmuted & DT_DEBUG_OPENCL)))
return;

if(devid < 0)
if(devid <= DT_DEVICE_CPU)
devid = _opencl_get_mem_context_id(mem);

if(devid < 0)
if(devid <= DT_DEVICE_CPU)
return;

dt_opencl_t *cl = darktable.opencl;
Expand Down Expand Up @@ -3622,7 +3601,7 @@ void dt_opencl_check_tuning(const int devid)

cl_ulong dt_opencl_get_device_available(const int devid)
{
if(!darktable.opencl->inited || devid < 0) return 0;
if(!darktable.opencl->inited || devid <= DT_DEVICE_CPU) return 0;
return darktable.opencl->dev[devid].used_available;
}

Expand All @@ -3633,7 +3612,7 @@ static cl_ulong _opencl_get_device_memalloc(const int devid)

cl_ulong dt_opencl_get_device_memalloc(const int devid)
{
if(!darktable.opencl->inited || devid < 0) return 0;
if(!darktable.opencl->inited || devid <= DT_DEVICE_CPU) return 0;
return _opencl_get_device_memalloc(devid);
}

Expand Down Expand Up @@ -3784,7 +3763,7 @@ static cl_event *_opencl_events_get_slot(const int devid,
const char *tag)
{
dt_opencl_t *cl = darktable.opencl;
if(!cl->inited || devid < 0) return NULL;
if(!cl->inited || devid <= DT_DEVICE_CPU) return NULL;
if(!cl->dev[devid].use_events) return NULL;

static const cl_event zeroevent[1]; // implicitly initialized to zero
Expand Down Expand Up @@ -3886,7 +3865,7 @@ static cl_event *_opencl_events_get_slot(const int devid,
void dt_opencl_events_reset(const int devid)
{
dt_opencl_t *cl = darktable.opencl;
if(!cl->inited || devid < 0) return;
if(!cl->inited || devid <= DT_DEVICE_CPU) return;
if(!cl->dev[devid].use_events) return;

cl_event **eventlist = &(cl->dev[devid].eventlist);
Expand Down Expand Up @@ -3920,7 +3899,7 @@ void dt_opencl_events_reset(const int devid)
static void _opencl_events_wait_for(const int devid)
{
dt_opencl_t *cl = darktable.opencl;
if(!cl->inited || devid < 0) return;
if(!cl->inited || devid <= DT_DEVICE_CPU) return;
if(!cl->dev[devid].use_events) return;

static const cl_event zeroevent[1]; // implicitly initialized to zero
Expand Down Expand Up @@ -3961,7 +3940,7 @@ static void _opencl_events_profiling(const int devid,
const gboolean aggregated)
{
dt_opencl_t *cl = darktable.opencl;
if(!cl->inited || devid < 0) return;
if(!cl->inited || devid <= DT_DEVICE_CPU) return;
if(!cl->dev[devid].use_events) return;

cl_event **eventlist = &(cl->dev[devid].eventlist);
Expand Down Expand Up @@ -4061,7 +4040,7 @@ cl_int dt_opencl_events_flush(const int devid,
const gboolean reset)
{
dt_opencl_t *cl = darktable.opencl;
if(!cl->inited || devid < 0) return CL_SUCCESS;
if(!cl->inited || devid <= DT_DEVICE_CPU) return CL_SUCCESS;
if(!cl->dev[devid].use_events) return CL_SUCCESS;

cl_event **eventlist = &(cl->dev[devid].eventlist);
Expand Down Expand Up @@ -4168,7 +4147,7 @@ cl_int dt_opencl_local_buffer_opt(const int devid,
dt_opencl_local_buffer_t *factors)
{
dt_opencl_t *cl = darktable.opencl;
if(!cl->inited || devid < 0) return DT_OPENCL_NODEVICE;
if(!cl->inited || devid <= DT_DEVICE_CPU) return DT_OPENCL_NODEVICE;

size_t maxsizes[3] = { 0 }; // the maximum dimensions for a work group
size_t workgroupsize = 0; // the maximum number of items in a work group
Expand Down
18 changes: 2 additions & 16 deletions src/common/opencl.h
Original file line number Diff line number Diff line change
Expand Up @@ -442,13 +442,6 @@ int dt_opencl_copy_device_to_host(const int devid,
const int height,
const int bpp);

int dt_opencl_read_host_from_device_rowpitch(const int devid,
void *host,
void *device,
const int width,
const int height,
const int rowpitch);

int dt_opencl_read_host_from_device_raw(const int devid,
void *host,
void *device,
Expand All @@ -464,13 +457,6 @@ int dt_opencl_write_host_to_device(const int devid,
const int height,
const int bpp);

int dt_opencl_write_host_to_device_rowpitch(const int devid,
const void *host,
void *device,
const int width,
const int height,
const int rowpitch);

int dt_opencl_write_host_to_device_raw(const int devid,
const void *host,
void *device,
Expand Down Expand Up @@ -549,8 +535,8 @@ void *dt_opencl_map_buffer(const int devid,
cl_mem buffer,
const gboolean blocking,
const int flags,
size_t offset,
size_t size);
const size_t offset,
const size_t size);

int dt_opencl_unmap_mem_object(const int devid,
cl_mem mem_object,
Expand Down
52 changes: 30 additions & 22 deletions src/develop/blend.c
Original file line number Diff line number Diff line change
Expand Up @@ -1461,40 +1461,48 @@ void tiling_callback_blendop(dt_iop_module_t *self,
dt_develop_tiling_t *tiling)
{
tiling->factor = 0.0f;
tiling->factor_cl = 0.0f;
tiling->maxbuf = 1.0f;
tiling->maxbuf_cl = 1.0f;
tiling->overhead = 0;
tiling->overlap = 0;
tiling->align = 1;

dt_develop_blend_params_t *const bldata = piece->blendop_data;
if(bldata)
Comment thread
TurboGit marked this conversation as resolved.
if(bldata == NULL)
return;

if(bldata->details != 0.0f)
{
if(bldata->details != 0.0f)
// details mask requires 2 additional quarter buffers of details data size
// so normalize to roi_size
dt_dev_detail_mask_t *details = &piece->pipe->scharr;
if(details->data)
{
// details mask requires 2 additional quarter buffers of details data size
// so normalize to roi_size
dt_dev_detail_mask_t *details = &piece->pipe->scharr;
if(details->data)
tiling->factor = 0.5f * (float)(details->roi.width * details->roi.height) / (roi_in->width * roi_in->height);
}

if(bldata->feathering_radius > 0.1f) // we don't feather below that
tiling->factor = 0.5f * (float)(details->roi.width * details->roi.height) / (roi_in->width * roi_in->height);
tiling->factor_cl = tiling->factor;
}
}

if(bldata->feathering_radius > 0.1f) // we don't feather below that
{
const int devid = piece->pipe->devid;
if(devid > DT_DEVICE_CPU)
{
const int devid = piece->pipe->devid;
if(devid > DT_DEVICE_CPU)
{
/* OpenCL feathering does simple internal tiling for less mem pressure,
we still need some mem here for this.
*/
tiling->factor_cl = MAX(tiling->factor, 1.0f);
}
tiling->factor = MAX(tiling->factor, 18.0f * 0.25f); // we need all 18 intermediate guided filter mask buffers
/* OpenCL feathering does simple internal tiling for less mem pressure,
we still need some mem here for this.
*/
tiling->factor_cl = MAX(tiling->factor_cl, 1.0f);
}
tiling->factor = MAX(tiling->factor, 18.0f * 0.25f); // we need all 18 intermediate guided filter mask buffers

tiling->factor += 1.5f; // in + (guide, tmp) + two quarter buffers for the mask
tiling->factor_cl += 1.5f;
}

const float outnorm = (float)(roi_out->width * roi_out->height) / (roi_in->width * roi_in->height);
const float basic = 2.5f + outnorm; // in + out + (guide, tmp) + two quarter buffers for the mask
tiling->factor += basic;
tiling->factor_cl += basic;
tiling->factor += outnorm;
tiling->factor_cl += outnorm;
}

/** check if content of params is all zero, indicating a
Expand Down
9 changes: 5 additions & 4 deletions src/develop/pixelpipe_hb.c
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,8 @@ void dt_print_pipe_ext(const char *title,

if(pipe)
{
snprintf(pname, sizeof(pname), "[%s%s]",
snprintf(pname, sizeof(pname), "[%s%s%s]",
pipe->tiling ? "T" : "",
dt_dev_pixelpipe_type_to_str(pipe->type),
dt_pipe_is_canvas(pipe) && darktable.develop->late_scaling.enabled ? " HQ" : "");
if(pipe->mask_display == DT_DEV_PIXELPIPE_DISPLAY_PASSTHRU)
Expand Down Expand Up @@ -2028,11 +2029,11 @@ static gboolean _dev_pixelpipe_process_rec(dt_dev_pixelpipe_t *pipe,
/* get tiling requirement of module */
dt_develop_tiling_t tiling = { 0 };
// set sentinel value to detect whether callback set sizes
tiling.factor_cl = tiling.maxbuf_cl = -1;
tiling.factor_cl = tiling.maxbuf_cl = -1.0f;
module->tiling_callback(module, piece, &roi_in, roi_out, &tiling);
// default to CPU size if callback didn't set GPU
if(tiling.factor_cl < 0) tiling.factor_cl = tiling.factor;
if(tiling.maxbuf_cl < 0) tiling.maxbuf_cl = tiling.maxbuf;
if(tiling.factor_cl < 0.0f) tiling.factor_cl = tiling.factor;
if(tiling.maxbuf_cl < 0.0f) tiling.maxbuf_cl = tiling.maxbuf;

/* does this module involve blending? */
if(piece->blendop_data
Expand Down
Loading
Loading