Skip to content

Commit 616b3f8

Browse files
Add multipath serial and size checks; enhance unstaging to ensure a correct mpath device is removed
This change adds multiple verifications in the staging and unstaging paths of iSCSI volumes. This ensures, multipath devices are not influenced by any existing Ghost devices, at the same time these checks ensure ghost devices can be safely removed without influencing good devices.
1 parent 5680f76 commit 616b3f8

10 files changed

Lines changed: 683 additions & 76 deletions

File tree

core/orchestrator_core.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3441,8 +3441,9 @@ func (o *TridentOrchestrator) AttachVolume(
34413441
return utils.MountDevice(ctx, loopDeviceName, mountpoint, publishInfo.SubvolumeMountOptions, isRawBlock)
34423442
}
34433443
} else {
3444-
return utils.AttachISCSIVolumeRetry(ctx, volumeName, mountpoint, publishInfo, map[string]string{},
3444+
_, err := utils.AttachISCSIVolumeRetry(ctx, volumeName, mountpoint, publishInfo, map[string]string{},
34453445
AttachISCSIVolumeTimeoutLong)
3446+
return err
34463447
}
34473448
}
34483449

frontend/csi/node_server.go

Lines changed: 93 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -422,7 +422,8 @@ func (p *Plugin) NodeExpandVolume(
422422
}).Warn("Received something other than the expected stagingTargetPath.")
423423
}
424424

425-
err = p.nodeExpandVolume(ctx, &trackingInfo.VolumePublishInfo, requiredBytes, stagingTargetPath, volumeId, req.GetSecrets())
425+
err = p.nodeExpandVolume(ctx, &trackingInfo.VolumePublishInfo, requiredBytes, stagingTargetPath, volumeId,
426+
req.GetSecrets())
426427
if err != nil {
427428
return nil, err
428429
}
@@ -1017,11 +1018,12 @@ func (p *Plugin) populatePublishedSessions(ctx context.Context) {
10171018
volumeIDs := utils.GetAllVolumeIDs(ctx, tridentDeviceInfoPath)
10181019
for _, volumeID := range volumeIDs {
10191020
trackingInfo, err := p.nodeHelper.ReadTrackingInfo(ctx, volumeID)
1020-
if err != nil {
1021+
if err != nil || trackingInfo == nil {
10211022
Logc(ctx).WithFields(LogFields{
1022-
"VolumeID": volumeID,
1023-
"Error": err.Error(),
1024-
}).Error("Volume tracking file info not found.")
1023+
"volumeID": volumeID,
1024+
"error": err.Error(),
1025+
"isEmpty": trackingInfo == nil,
1026+
}).Error("Volume tracking file info not found or is empty.")
10251027

10261028
continue
10271029
}
@@ -1037,6 +1039,26 @@ func (p *Plugin) populatePublishedSessions(ctx context.Context) {
10371039
}
10381040
}
10391041

1042+
func (p *Plugin) readAllTrackingFiles(ctx context.Context) []utils.VolumePublishInfo {
1043+
publishInfos := make([]utils.VolumePublishInfo, 0)
1044+
volumeIDs := utils.GetAllVolumeIDs(ctx, tridentDeviceInfoPath)
1045+
for _, volumeID := range volumeIDs {
1046+
trackingInfo, err := p.nodeHelper.ReadTrackingInfo(ctx, volumeID)
1047+
if err != nil || trackingInfo == nil {
1048+
Logc(ctx).WithError(err).WithFields(LogFields{
1049+
"volumeID": volumeID,
1050+
"isEmpty": trackingInfo == nil,
1051+
}).Error("Volume tracking file info not found or is empty.")
1052+
1053+
continue
1054+
}
1055+
1056+
publishInfos = append(publishInfos, trackingInfo.VolumePublishInfo)
1057+
}
1058+
1059+
return publishInfos
1060+
}
1061+
10401062
func (p *Plugin) nodeStageISCSIVolume(
10411063
ctx context.Context, req *csi.NodeStageVolumeRequest, publishInfo *utils.VolumePublishInfo,
10421064
) error {
@@ -1098,7 +1120,8 @@ func (p *Plugin) nodeStageISCSIVolume(
10981120
}
10991121
}
11001122

1101-
if err = p.ensureAttachISCSIVolume(ctx, req, "", publishInfo, AttachISCSIVolumeTimeoutShort); err != nil {
1123+
mpathSize, err := p.ensureAttachISCSIVolume(ctx, req, "", publishInfo, AttachISCSIVolumeTimeoutShort)
1124+
if err != nil {
11021125
return err
11031126
}
11041127

@@ -1107,7 +1130,8 @@ func (p *Plugin) nodeStageISCSIVolume(
11071130
return err
11081131
}
11091132
if isLUKS {
1110-
luksDevice, err := utils.NewLUKSDeviceFromMappingPath(ctx, publishInfo.DevicePath, req.VolumeContext["internalName"])
1133+
luksDevice, err := utils.NewLUKSDeviceFromMappingPath(ctx, publishInfo.DevicePath,
1134+
req.VolumeContext["internalName"])
11111135
if err != nil {
11121136
return err
11131137
}
@@ -1118,6 +1142,20 @@ func (p *Plugin) nodeStageISCSIVolume(
11181142
}
11191143
}
11201144

1145+
if mpathSize > 0 {
1146+
Logc(ctx).Warn("Multipath device size may not be correct, performing gratuitous resize.")
1147+
1148+
err = p.nodeExpandVolume(ctx, publishInfo, mpathSize, stagingTargetPath, volumeId, req.GetSecrets())
1149+
if err != nil {
1150+
Logc(ctx).WithFields(LogFields{
1151+
"multipathDevice": publishInfo.DevicePath,
1152+
"volumeID": volumeId,
1153+
"size": mpathSize,
1154+
"err": err,
1155+
}).Warn("Attempt to perform gratuitous resize failed.")
1156+
}
1157+
}
1158+
11211159
volTrackingInfo := &utils.VolumeTrackingInfo{
11221160
VolumePublishInfo: *publishInfo,
11231161
StagingTargetPath: stagingTargetPath,
@@ -1139,28 +1177,32 @@ func (p *Plugin) nodeStageISCSIVolume(
11391177
func (p *Plugin) ensureAttachISCSIVolume(
11401178
ctx context.Context, req *csi.NodeStageVolumeRequest, mountpoint string,
11411179
publishInfo *utils.VolumePublishInfo, attachTimeout time.Duration,
1142-
) error {
1180+
) (int64, error) {
1181+
var err error
1182+
var mpathSize int64
1183+
11431184
// Perform the login/rescan/discovery/(optionally)format, mount & get the device back in the publish info
1144-
if err := utils.AttachISCSIVolumeRetry(ctx, req.VolumeContext["internalName"], mountpoint, publishInfo,
1185+
if mpathSize, err = utils.AttachISCSIVolumeRetry(ctx, req.VolumeContext["internalName"], mountpoint, publishInfo,
11451186
req.GetSecrets(), attachTimeout); err != nil {
11461187
// Did we fail to log in?
11471188
if errors.IsAuthError(err) {
11481189
// Update CHAP info from the controller and try one more time.
11491190
Logc(ctx).Warn("iSCSI login failed; will retrieve CHAP credentials from Trident controller and try again.")
11501191
if err = p.updateChapInfoFromController(ctx, req, publishInfo); err != nil {
1151-
return status.Error(codes.Internal, err.Error())
1192+
return mpathSize, status.Error(codes.Internal, err.Error())
11521193
}
1153-
if err = utils.AttachISCSIVolumeRetry(ctx, req.VolumeContext["internalName"], mountpoint, publishInfo,
1194+
if mpathSize, err = utils.AttachISCSIVolumeRetry(ctx, req.VolumeContext["internalName"], mountpoint,
1195+
publishInfo,
11541196
req.GetSecrets(), attachTimeout); err != nil {
11551197
// Bail out no matter what as we've now tried with updated credentials
1156-
return status.Error(codes.Internal, err.Error())
1198+
return mpathSize, status.Error(codes.Internal, err.Error())
11571199
}
11581200
} else {
1159-
return status.Error(codes.Internal, fmt.Sprintf("failed to stage volume: %v", err))
1201+
return mpathSize, status.Error(codes.Internal, fmt.Sprintf("failed to stage volume: %v", err))
11601202
}
11611203
}
11621204

1163-
return nil
1205+
return mpathSize, nil
11641206
}
11651207

11661208
func (p *Plugin) updateChapInfoFromController(
@@ -1192,18 +1234,41 @@ func (p *Plugin) nodeUnstageISCSIVolume(
11921234
return fmt.Errorf("could not parse LUKSEncryption into a bool, got %v", publishInfo.LUKSEncryption)
11931235
}
11941236
if isLUKS {
1195-
err := utils.EnsureLUKSDeviceClosed(ctx, publishInfo.DevicePath)
1237+
// Before closing the device, get the corresponding DM device.
1238+
publishedLUKsDevice, err := utils.GetUnderlyingDevicePathForLUKSDevice(ctx, publishInfo.DevicePath)
1239+
if err != nil {
1240+
// No need to return an error
1241+
Logc(ctx).WithFields(LogFields{
1242+
"devicePath": publishInfo.DevicePath,
1243+
"LUN": publishInfo.IscsiLunNumber,
1244+
"err": err,
1245+
}).Error("Failed to verify the multipath device, could not determine" +
1246+
" underlying device for LUKS mapping.")
1247+
}
1248+
1249+
err = utils.EnsureLUKSDeviceClosed(ctx, publishInfo.DevicePath)
11961250
if err != nil {
11971251
return err
11981252
}
1253+
1254+
// For the future steps LUKs device path is not really useful, either it should be
1255+
// DM device or empty.
1256+
publishInfo.DevicePath = publishedLUKsDevice
11991257
}
12001258
}
12011259

12021260
// Delete the device from the host.
1203-
unmappedMpathDevice, err := utils.PrepareDeviceForRemoval(ctx, int(publishInfo.IscsiLunNumber),
1204-
publishInfo.IscsiTargetIQN, p.unsafeDetach, force)
1205-
if nil != err && !p.unsafeDetach {
1206-
return status.Error(codes.Internal, err.Error())
1261+
unmappedMpathDevice, err := utils.PrepareDeviceForRemoval(ctx, publishInfo, nil, p.unsafeDetach, force)
1262+
if err != nil {
1263+
if errors.IsISCSISameLunNumberError(err) {
1264+
// There is a need to pass all the publish infos this time
1265+
unmappedMpathDevice, err = utils.PrepareDeviceForRemoval(ctx, publishInfo, p.readAllTrackingFiles(ctx),
1266+
p.unsafeDetach, force)
1267+
}
1268+
1269+
if err != nil && !p.unsafeDetach {
1270+
return status.Error(codes.Internal, err.Error())
1271+
}
12071272
}
12081273

12091274
// Get map of hosts and sessions for given Target IQN.
@@ -1325,7 +1390,8 @@ func (p *Plugin) nodePublishISCSIVolume(
13251390
}
13261391
if isLUKS {
13271392
// Rotate the LUKS passphrase if needed, on failure, log and continue to publish
1328-
luksDevice, err := utils.NewLUKSDeviceFromMappingPath(ctx, publishInfo.DevicePath, req.VolumeContext["internalName"])
1393+
luksDevice, err := utils.NewLUKSDeviceFromMappingPath(ctx, publishInfo.DevicePath,
1394+
req.VolumeContext["internalName"])
13291395
if err != nil {
13301396
return nil, status.Error(codes.Internal, err.Error())
13311397
}
@@ -1889,7 +1955,7 @@ func (p *Plugin) selfHealingRectifySession(ctx context.Context, portal string, a
18891955

18901956
publishedCHAPCredentials := publishInfo.IscsiChapInfo
18911957

1892-
if err = p.ensureAttachISCSIVolume(ctx, req, "", &publishInfo, iSCSILoginTimeout); err != nil {
1958+
if _, err = p.ensureAttachISCSIVolume(ctx, req, "", &publishInfo, iSCSILoginTimeout); err != nil {
18931959
return fmt.Errorf("failed to login to the target")
18941960
}
18951961

@@ -2123,7 +2189,8 @@ func (p *Plugin) nodeStageNVMeVolume(
21232189
publishInfo.NVMeTargetIPs = strings.Split(req.PublishContext["nvmeTargetIPs"], ",")
21242190
publishInfo.SANType = req.PublishContext["SANType"]
21252191

2126-
if err := utils.AttachNVMeVolumeRetry(ctx, req.VolumeContext["internalName"], "", publishInfo, nil, nvmeAttachTimeout); err != nil {
2192+
if err := utils.AttachNVMeVolumeRetry(ctx, req.VolumeContext["internalName"], "", publishInfo, nil,
2193+
nvmeAttachTimeout); err != nil {
21272194
return err
21282195
}
21292196

@@ -2133,7 +2200,8 @@ func (p *Plugin) nodeStageNVMeVolume(
21332200
}
21342201

21352202
if isLUKS {
2136-
luksDevice, err := utils.NewLUKSDeviceFromMappingPath(ctx, publishInfo.DevicePath, req.VolumeContext["internalName"])
2203+
luksDevice, err := utils.NewLUKSDeviceFromMappingPath(ctx, publishInfo.DevicePath,
2204+
req.VolumeContext["internalName"])
21372205
if err != nil {
21382206
return err
21392207
}
@@ -2245,7 +2313,8 @@ func (p *Plugin) nodePublishNVMeVolume(
22452313
}
22462314
if isLUKS {
22472315
// Rotate the LUKS passphrase if needed, on failure, log and continue to publish
2248-
luksDevice, err := utils.NewLUKSDeviceFromMappingPath(ctx, publishInfo.DevicePath, req.VolumeContext["internalName"])
2316+
luksDevice, err := utils.NewLUKSDeviceFromMappingPath(ctx, publishInfo.DevicePath,
2317+
req.VolumeContext["internalName"])
22492318
if err != nil {
22502319
return nil, status.Error(codes.Internal, err.Error())
22512320
}

mocks/mock_utils/mock_iscsi_utils.go

Lines changed: 45 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

storage_drivers/ontap/ontap_san.go

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -719,8 +719,18 @@ func (d *SANStorageDriver) Destroy(ctx context.Context, volConfig *storage.Volum
719719
return fmt.Errorf("error reading LUN maps for volume %s: %v", name, err)
720720
}
721721
if lunID >= 0 {
722+
publishInfo := utils.VolumePublishInfo{
723+
DevicePath: "",
724+
VolumeAccessInfo: utils.VolumeAccessInfo{
725+
IscsiAccessInfo: utils.IscsiAccessInfo{
726+
IscsiTargetIQN: iSCSINodeName,
727+
IscsiLunNumber: int32(lunID),
728+
},
729+
},
730+
}
731+
722732
// Inform the host about the device removal
723-
if _, err := utils.PrepareDeviceForRemoval(ctx, lunID, iSCSINodeName, true, false); err != nil {
733+
if _, err := utils.PrepareDeviceForRemoval(ctx, &publishInfo, nil, true, false); err != nil {
724734
Logc(ctx).Error(err)
725735
}
726736
}

storage_drivers/ontap/ontap_san_economy.go

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -884,8 +884,18 @@ func (d *SANEconomyStorageDriver) Destroy(ctx context.Context, volConfig *storag
884884
return fmt.Errorf("error reading LUN maps for volume %s: %v", name, err)
885885
}
886886
if lunID >= 0 {
887+
publishInfo := utils.VolumePublishInfo{
888+
DevicePath: "",
889+
VolumeAccessInfo: utils.VolumeAccessInfo{
890+
IscsiAccessInfo: utils.IscsiAccessInfo{
891+
IscsiTargetIQN: iSCSINodeName,
892+
IscsiLunNumber: int32(lunID),
893+
},
894+
},
895+
}
896+
887897
// Inform the host about the device removal
888-
if _, err := utils.PrepareDeviceForRemoval(ctx, lunID, iSCSINodeName, true, false); err != nil {
898+
if _, err := utils.PrepareDeviceForRemoval(ctx, &publishInfo, nil, true, false); err != nil {
889899
Logc(ctx).Error(err)
890900
}
891901
}

storage_drivers/solidfire/solidfire_san.go

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1134,9 +1134,18 @@ func (d *SANStorageDriver) Destroy(ctx context.Context, volConfig *storage.Volum
11341134
}
11351135

11361136
if d.Config.DriverContext == tridentconfig.ContextDocker {
1137+
publishInfo := utils.VolumePublishInfo{
1138+
DevicePath: "",
1139+
VolumeAccessInfo: utils.VolumeAccessInfo{
1140+
IscsiAccessInfo: utils.IscsiAccessInfo{
1141+
IscsiTargetIQN: v.Iqn,
1142+
IscsiLunNumber: 0,
1143+
},
1144+
},
1145+
}
11371146

11381147
// Inform the host about the device removal
1139-
if _, err = utils.PrepareDeviceForRemoval(ctx, 0, v.Iqn, true, false); err != nil {
1148+
if _, err = utils.PrepareDeviceForRemoval(ctx, &publishInfo, nil, true, false); err != nil {
11401149
Logc(ctx).Warningf("Unable to prepare device for removal, attempting to detach anyway: %v", err)
11411150
}
11421151

0 commit comments

Comments
 (0)