Skip to content

Commit 4d270cf

Browse files
EdCauntmloubout
authored andcommitted
compiler: Initial investigations to deviceid handling when visible devices specified
1 parent c207ded commit 4d270cf

2 files changed

Lines changed: 24 additions & 1 deletion

File tree

devito/operator/operator.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1403,6 +1403,9 @@ def nbytes_avail_mapper(self):
14031403
# The amount of space available on the device
14041404
if isinstance(self.platform, Device):
14051405
deviceid = max(self.get('deviceid', 0), 0)
1406+
# FIXME: I think this perhaps picks the wrong device when CUDA_VISIBLE_DEVICES set?
1407+
# Looks like it uses the physical device ID, not the logical one due to dependence
1408+
# on Nvidia SMI -> remote into Timewarp and check this
14061409
mapper[device_layer] = self.platform.memavail(deviceid=deviceid)
14071410

14081411
# The amount of space available on the host

devito/types/parallel.py

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -275,17 +275,37 @@ class DeviceID(DeviceSymbol):
275275

276276
name = 'deviceid'
277277

278-
@property
278+
@cached_property
279279
def default_value(self):
280280
return -1
281281

282+
@cached_property
283+
def visible_devices(self):
284+
device_vars = (
285+
'CUDA_VISIBLE_DEVICES',
286+
'ROCR_VISIBLE_DEVICES',
287+
'HIP_VISIBLE_DEVICES'
288+
)
289+
for v in device_vars:
290+
if v in os.environ:
291+
try:
292+
return tuple(int(i) for i in os.environ[v].split(','))
293+
except ValueError:
294+
# Visible devices set via UUIDs or other non-integer identifiers
295+
continue
296+
297+
return None
298+
282299
def _arg_values(self, **kwargs):
283300
if self.name in kwargs:
284301
return {self.name: kwargs.pop(self.name)}
285302
elif configuration['deviceid'] != self.default_value:
286303
return {self.name: configuration['deviceid']}
287304
else:
288305
return {self.name: self.default_value}
306+
# FIXME: This should first check if CUDA_VISIBLE_DEVICES before the final else
307+
# What should this use for multi-GPU runs?
308+
# Possibly in the default_value
289309

290310

291311
class DeviceRM(DeviceSymbol):

0 commit comments

Comments
 (0)