File tree Expand file tree Collapse file tree
Expand file tree Collapse file tree Original file line number Diff line number Diff line change @@ -1403,6 +1403,9 @@ def nbytes_avail_mapper(self):
14031403 # The amount of space available on the device
14041404 if isinstance (self .platform , Device ):
14051405 deviceid = max (self .get ('deviceid' , 0 ), 0 )
1406+ # FIXME: I think this perhaps picks the wrong device when CUDA_VISIBLE_DEVICES set?
1407+ # Looks like it uses the physical device ID, not the logical one due to dependence
1408+ # on Nvidia SMI -> remote into Timewarp and check this
14061409 mapper [device_layer ] = self .platform .memavail (deviceid = deviceid )
14071410
14081411 # The amount of space available on the host
Original file line number Diff line number Diff line change @@ -275,17 +275,37 @@ class DeviceID(DeviceSymbol):
275275
276276 name = 'deviceid'
277277
278- @property
278+ @cached_property
279279 def default_value (self ):
280280 return - 1
281281
282+ @cached_property
283+ def visible_devices (self ):
284+ device_vars = (
285+ 'CUDA_VISIBLE_DEVICES' ,
286+ 'ROCR_VISIBLE_DEVICES' ,
287+ 'HIP_VISIBLE_DEVICES'
288+ )
289+ for v in device_vars :
290+ if v in os .environ :
291+ try :
292+ return tuple (int (i ) for i in os .environ [v ].split (',' ))
293+ except ValueError :
294+ # Visible devices set via UUIDs or other non-integer identifiers
295+ continue
296+
297+ return None
298+
282299 def _arg_values (self , ** kwargs ):
283300 if self .name in kwargs :
284301 return {self .name : kwargs .pop (self .name )}
285302 elif configuration ['deviceid' ] != self .default_value :
286303 return {self .name : configuration ['deviceid' ]}
287304 else :
288305 return {self .name : self .default_value }
306+ # FIXME: This should first check if CUDA_VISIBLE_DEVICES before the final else
307+ # What should this use for multi-GPU runs?
308+ # Possibly in the default_value
289309
290310
291311class DeviceRM (DeviceSymbol ):
You can’t perform that action at this time.
0 commit comments