Skip to content

Commit 1734662

Browse files
committed
<feature>[vm]: add metadata compute core and config
Resolves: ZSV-11559 Change-Id: I4ad67f16ba0a37ec0f5add571f4f74752bcfbee5
1 parent 986d802 commit 1734662

12 files changed

Lines changed: 641 additions & 0 deletions

File tree

Lines changed: 106 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,106 @@
1+
package org.zstack.compute.vm;
2+
3+
import org.springframework.beans.factory.annotation.Autowired;
4+
import org.zstack.core.cloudbus.CloudBusCallBack;
5+
import org.zstack.core.componentloader.PluginRegistry;
6+
import org.zstack.core.db.Q;
7+
import org.zstack.core.gc.GC;
8+
import org.zstack.core.gc.GCCompletion;
9+
import org.zstack.core.gc.TimeBasedGarbageCollector;
10+
import org.zstack.header.host.HostVO;
11+
import org.zstack.header.message.MessageReply;
12+
import org.zstack.header.storage.primary.CleanupVmInstanceMetadataOnPrimaryStorageMsg;
13+
import org.zstack.header.storage.primary.PrimaryStorageConstant;
14+
import org.zstack.header.storage.primary.PrimaryStorageVO;
15+
import org.zstack.header.storage.primary.PrimaryStorageVO_;
16+
import org.zstack.header.vm.metadata.VmMetadataPathBuildExtensionPoint;
17+
import org.zstack.utils.Utils;
18+
import org.zstack.utils.logging.CLogger;
19+
20+
public class CleanupVmInstanceMetadataOnPrimaryStorageGC extends TimeBasedGarbageCollector {
21+
private static final CLogger logger = Utils.getLogger(CleanupVmInstanceMetadataOnPrimaryStorageGC.class);
22+
23+
@Autowired
24+
private PluginRegistry pluginRgty;
25+
26+
@GC
27+
public String primaryStorageUuid;
28+
@GC
29+
public String vmUuid;
30+
@GC
31+
public String rootVolumeUuid;
32+
@GC
33+
public String metadataPath;
34+
@GC
35+
public String hostUuid;
36+
37+
public static String getGCName(String vmUuid) {
38+
return String.format("gc-cleanup-vm-metadata-%s", vmUuid);
39+
}
40+
41+
@Override
42+
protected void triggerNow(GCCompletion completion) {
43+
if (!dbf.isExist(primaryStorageUuid, PrimaryStorageVO.class)) {
44+
logger.debug(String.format("[MetadataCleanupGC] primary storage[uuid:%s] no longer exists, " +
45+
"cancel gc for vm[uuid:%s]", primaryStorageUuid, vmUuid));
46+
completion.cancel();
47+
return;
48+
}
49+
50+
String psType = Q.New(PrimaryStorageVO.class).select(PrimaryStorageVO_.type).eq(PrimaryStorageVO_.uuid, primaryStorageUuid).findValue();
51+
if (psType == null) {
52+
logger.debug(String.format("[MetadataCleanupGC] primary storage[uuid:%s] type not found, " +
53+
"cancel gc for vm[uuid:%s]", primaryStorageUuid, vmUuid));
54+
completion.cancel();
55+
return;
56+
}
57+
58+
VmMetadataPathBuildExtensionPoint ext = pluginRgty.getExtensionFromMap(psType, VmMetadataPathBuildExtensionPoint.class);
59+
boolean requireHost = ext != null && ext.requireHostForCleanup();
60+
61+
// Determine effective hostUuid based on whether the PS type requires a host for cleanup.
62+
String effectiveHostUuid = hostUuid;
63+
if (!requireHost) {
64+
effectiveHostUuid = null;
65+
} else {
66+
if (effectiveHostUuid == null) {
67+
logger.debug(String.format("[MetadataCleanupGC] hostUuid is null and ps[uuid:%s, type:%s] " +
68+
"requires host for cleanup, cancel gc for vm[uuid:%s]",
69+
primaryStorageUuid, psType, vmUuid));
70+
completion.cancel();
71+
return;
72+
}
73+
if (!dbf.isExist(effectiveHostUuid, HostVO.class)) {
74+
logger.debug(String.format("[MetadataCleanupGC] host[uuid:%s] no longer exists " +
75+
"and ps[uuid:%s, type:%s] requires host for cleanup, " +
76+
"metadata is unreachable, cancel gc for vm[uuid:%s]",
77+
effectiveHostUuid, primaryStorageUuid, psType, vmUuid));
78+
completion.cancel();
79+
return;
80+
}
81+
}
82+
83+
CleanupVmInstanceMetadataOnPrimaryStorageMsg msg = new CleanupVmInstanceMetadataOnPrimaryStorageMsg();
84+
msg.setPrimaryStorageUuid(primaryStorageUuid);
85+
msg.setVmInstanceUuid(vmUuid);
86+
msg.setRootVolumeUuid(rootVolumeUuid);
87+
msg.setMetadataPath(metadataPath);
88+
msg.setHostUuid(effectiveHostUuid);
89+
90+
bus.makeTargetServiceIdByResourceUuid(msg, PrimaryStorageConstant.SERVICE_ID, primaryStorageUuid);
91+
bus.send(msg, new CloudBusCallBack(completion) {
92+
@Override
93+
public void run(MessageReply reply) {
94+
if (reply.isSuccess()) {
95+
logger.info(String.format("[MetadataCleanupGC] successfully cleaned up metadata " +
96+
"for vm[uuid:%s] on ps[uuid:%s]", vmUuid, primaryStorageUuid));
97+
completion.success();
98+
} else {
99+
logger.warn(String.format("[MetadataCleanupGC] failed to clean up metadata " +
100+
"for vm[uuid:%s] on ps[uuid:%s]: %s", vmUuid, primaryStorageUuid, reply.getError()));
101+
completion.fail(reply.getError());
102+
}
103+
}
104+
});
105+
}
106+
}
Lines changed: 138 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,138 @@
1+
package org.zstack.compute.vm;
2+
3+
import org.springframework.beans.factory.annotation.Autowire;
4+
import org.springframework.beans.factory.annotation.Autowired;
5+
import org.springframework.beans.factory.annotation.Configurable;
6+
import org.zstack.core.cloudbus.CloudBus;
7+
import org.zstack.core.cloudbus.CloudBusCallBack;
8+
import org.zstack.core.componentloader.PluginRegistry;
9+
import org.zstack.core.db.Q;
10+
import org.zstack.header.core.workflow.FlowTrigger;
11+
import org.zstack.header.core.workflow.NoRollbackFlow;
12+
import org.zstack.header.message.MessageReply;
13+
import org.zstack.header.storage.primary.CleanupVmInstanceMetadataOnPrimaryStorageMsg;
14+
import org.zstack.header.storage.primary.PrimaryStorageConstant;
15+
import org.zstack.header.storage.primary.PrimaryStorageVO;
16+
import org.zstack.header.storage.primary.PrimaryStorageVO_;
17+
import org.zstack.header.vm.VmInstanceConstant;
18+
import org.zstack.header.vm.VmInstanceSpec;
19+
import org.zstack.header.vm.metadata.VmMetadataPathBuildExtensionPoint;
20+
import org.zstack.header.volume.VolumeInventory;
21+
import org.zstack.utils.Utils;
22+
import org.zstack.utils.logging.CLogger;
23+
24+
import java.util.Map;
25+
import java.util.concurrent.TimeUnit;
26+
27+
@Configurable(preConstruction = true, autowire = Autowire.BY_TYPE)
28+
public class VmExpungeMetadataFlow extends NoRollbackFlow {
29+
private static final CLogger logger = Utils.getLogger(VmExpungeMetadataFlow.class);
30+
31+
@Autowired
32+
private CloudBus bus;
33+
@Autowired
34+
private PluginRegistry pluginRgty;
35+
36+
@Override
37+
public void run(FlowTrigger trigger, Map data) {
38+
final VmInstanceSpec spec = (VmInstanceSpec) data.get(VmInstanceConstant.Params.VmInstanceSpec.toString());
39+
if (spec == null || spec.getVmInventory() == null) {
40+
logger.warn("[MetadataExpunge] missing VmInstanceSpec or VmInventory, skip metadata cleanup");
41+
trigger.next();
42+
return;
43+
}
44+
45+
final String vmUuid = spec.getVmInventory().getUuid();
46+
47+
VolumeInventory rootVolume = spec.getVmInventory().getRootVolume();
48+
String psUuid = rootVolume != null ? rootVolume.getPrimaryStorageUuid() : null;
49+
if (psUuid == null) {
50+
logger.debug(String.format("[MetadataExpunge] vm[uuid:%s] root volume has no primaryStorageUuid, " +
51+
"skipping metadata cleanup", vmUuid));
52+
trigger.next();
53+
return;
54+
}
55+
56+
57+
String psType = Q.New(PrimaryStorageVO.class).select(PrimaryStorageVO_.type).eq(PrimaryStorageVO_.uuid, psUuid).findValue();
58+
if (psType == null) {
59+
logger.warn(String.format("[MetadataExpunge] primary storage[uuid:%s] not found for vm[uuid:%s], " +
60+
"skip metadata cleanup", psUuid, vmUuid));
61+
trigger.next();
62+
return;
63+
}
64+
65+
VmMetadataPathBuildExtensionPoint ext = pluginRgty.getExtensionFromMap(psType, VmMetadataPathBuildExtensionPoint.class);
66+
if (ext == null) {
67+
logger.warn(String.format("[MetadataExpunge] no VmMetadataPathBuildExtensionPoint found for ps[uuid:%s, type:%s], " +
68+
"skip metadata cleanup", psUuid, psType));
69+
trigger.next();
70+
return;
71+
}
72+
final String metadataPath;
73+
try {
74+
metadataPath = ext.buildVmMetadataPath(psUuid, vmUuid);
75+
} catch (Exception e) {
76+
logger.warn(String.format("[MetadataExpunge] failed to build metadata path for vm[uuid:%s] on ps[uuid:%s], " +
77+
"skip metadata cleanup: %s", vmUuid, psUuid, e.getMessage()));
78+
trigger.next();
79+
return;
80+
}
81+
82+
String hostUuid = null;
83+
if (ext.requireHostForCleanup()) {
84+
hostUuid = spec.getVmInventory().getHostUuid();
85+
if (hostUuid == null) {
86+
hostUuid = spec.getVmInventory().getLastHostUuid();
87+
}
88+
89+
if (hostUuid == null) {
90+
logger.warn(String.format("[MetadataExpunge] vm[uuid:%s] hostUuid is null, " +
91+
"ps[uuid:%s, type:%s] requires host for cleanup, skip without submitting GC",
92+
vmUuid, psUuid, psType));
93+
trigger.next();
94+
return;
95+
}
96+
}
97+
98+
String rootVolumeUuid = rootVolume.getUuid();
99+
CleanupVmInstanceMetadataOnPrimaryStorageMsg cmsg = new CleanupVmInstanceMetadataOnPrimaryStorageMsg();
100+
cmsg.setPrimaryStorageUuid(psUuid);
101+
cmsg.setVmInstanceUuid(vmUuid);
102+
cmsg.setMetadataPath(metadataPath);
103+
cmsg.setRootVolumeUuid(rootVolumeUuid);
104+
cmsg.setHostUuid(hostUuid);
105+
final String finalPsUuid = psUuid;
106+
final String finalHostUuid = hostUuid;
107+
108+
bus.makeTargetServiceIdByResourceUuid(cmsg, PrimaryStorageConstant.SERVICE_ID, psUuid);
109+
bus.send(cmsg, new CloudBusCallBack(trigger) {
110+
@Override
111+
public void run(MessageReply reply) {
112+
if (reply.isSuccess()) {
113+
logger.info(String.format("[MetadataExpunge] successfully deleted metadata for vm[uuid:%s] on ps[uuid:%s]",
114+
vmUuid, finalPsUuid));
115+
} else {
116+
logger.warn(String.format("[MetadataExpunge] failed to delete metadata for vm[uuid:%s] on ps[uuid:%s]: %s, " +
117+
"submitting GC job for retry", vmUuid, finalPsUuid, reply.getError()));
118+
submitGC(finalPsUuid, vmUuid, rootVolumeUuid, metadataPath, finalHostUuid);
119+
}
120+
trigger.next();
121+
}
122+
});
123+
}
124+
125+
private void submitGC(String psUuid, String vmUuid, String rootVolumeUuid, String metadataPath, String hostUuid) {
126+
CleanupVmInstanceMetadataOnPrimaryStorageGC gc = new CleanupVmInstanceMetadataOnPrimaryStorageGC();
127+
gc.NAME = CleanupVmInstanceMetadataOnPrimaryStorageGC.getGCName(vmUuid);
128+
gc.primaryStorageUuid = psUuid;
129+
gc.vmUuid = vmUuid;
130+
gc.rootVolumeUuid = rootVolumeUuid;
131+
gc.metadataPath = metadataPath;
132+
gc.hostUuid = hostUuid;
133+
long gcIntervalSec = TimeUnit.HOURS.toSeconds(VmGlobalConfig.VM_METADATA_CLEANUP_GC_INTERVAL.value(Long.class));
134+
gc.deduplicateSubmit(gcIntervalSec, TimeUnit.SECONDS);
135+
136+
logger.info(String.format("[MetadataExpunge] submitted GC job [%s] for vm[uuid:%s] on ps[uuid:%s]", gc.NAME, vmUuid, psUuid));
137+
}
138+
}

compute/src/main/java/org/zstack/compute/vm/VmGlobalConfig.java

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -143,4 +143,52 @@ public class VmGlobalConfig {
143143
@GlobalConfigDef(defaultValue = "false", type = Boolean.class, description = "allowed TPM VM start without KMS")
144144
@GlobalConfigValidation(validValues = {"true", "false"})
145145
public static GlobalConfig ALLOWED_TPM_VM_WITHOUT_KMS = new GlobalConfig(CATEGORY, "allowed.tpm.vm.without.kms");
146+
147+
@GlobalConfigValidation(validValues = {"true", "false"})
148+
public static GlobalConfig VM_METADATA_ENABLED = new GlobalConfig(CATEGORY, "vm.metadata.enabled");
149+
150+
@GlobalConfigValidation()
151+
public static GlobalConfig VM_METADATA_LAST_REFRESH_VERSION = new GlobalConfig(CATEGORY, "vm.metadata.lastRefreshVersion");
152+
153+
@GlobalConfigValidation(numberGreaterThan = 0, numberLessThan = 100)
154+
public static GlobalConfig VM_METADATA_FLUSH_CONCURRENCY = new GlobalConfig(CATEGORY, "vm.metadata.flush.concurrency");
155+
156+
@GlobalConfigValidation(numberGreaterThan = 0, numberLessThan = 300)
157+
public static GlobalConfig VM_METADATA_FLUSH_POLL_INTERVAL = new GlobalConfig(CATEGORY, "vm.metadata.flush.pollInterval");
158+
159+
@GlobalConfigValidation(numberGreaterThan = 0, numberLessThan = 1000)
160+
public static GlobalConfig VM_METADATA_FLUSH_BATCH_SIZE = new GlobalConfig(CATEGORY, "vm.metadata.flush.batchSize");
161+
162+
@GlobalConfigValidation(numberGreaterThan = 0, numberLessThan = 168)
163+
public static GlobalConfig VM_METADATA_CLEANUP_GC_INTERVAL = new GlobalConfig(CATEGORY, "vm.metadata.cleanup.gc.interval");
164+
165+
@GlobalConfigValidation(numberGreaterThan = 0, numberLessThan = 10)
166+
public static GlobalConfig VM_METADATA_FLUSH_MAX_RETRY = new GlobalConfig(CATEGORY, "vm.metadata.flush.maxRetry");
167+
168+
@GlobalConfigValidation(numberGreaterThan = 0, numberLessThan = 120)
169+
public static GlobalConfig VM_METADATA_FLUSH_ZOMBIE_CLAIM_THRESHOLD = new GlobalConfig(CATEGORY, "vm.metadata.flush.zombieClaimThreshold");
170+
171+
@GlobalConfigValidation(numberGreaterThan = 21599, numberLessThan = 172801)
172+
public static GlobalConfig VM_METADATA_MAINTENANCE_CONTENT_DRIFT_INTERVAL = new GlobalConfig(CATEGORY, "vm.metadata.maintenance.contentDriftInterval");
173+
174+
@GlobalConfigValidation(numberGreaterThan = 0, numberLessThan = 86400)
175+
public static GlobalConfig VM_METADATA_MAINTENANCE_STALE_RECOVERY_INTERVAL = new GlobalConfig(CATEGORY, "vm.metadata.maintenance.staleRecoveryInterval");
176+
177+
@GlobalConfigValidation(numberGreaterThan = 0, numberLessThan = 1000)
178+
public static GlobalConfig VM_METADATA_MAINTENANCE_STALE_RECOVERY_MAX_CYCLES = new GlobalConfig(CATEGORY, "vm.metadata.maintenance.staleRecoveryMaxCycles");
179+
180+
@GlobalConfigValidation(numberGreaterThan = 0)
181+
public static GlobalConfig VM_METADATA_PAYLOAD_REJECT_THRESHOLD = new GlobalConfig(CATEGORY, "vm.metadata.payload.rejectThreshold");
182+
183+
@GlobalConfigValidation(numberGreaterThan = 0, numberLessThan = 86400)
184+
public static GlobalConfig VM_METADATA_MAINTENANCE_ORPHAN_CHECK_INTERVAL = new GlobalConfig(CATEGORY, "vm.metadata.maintenance.orphanCheckInterval");
185+
186+
@GlobalConfigValidation(numberGreaterThan = 0, numberLessThan = 20)
187+
public static GlobalConfig VM_METADATA_MAINTENANCE_STALE_RECOVERY_BATCH_SIZE = new GlobalConfig(CATEGORY, "vm.metadata.maintenance.staleRecoveryBatchSize");
188+
189+
@GlobalConfigValidation(numberGreaterThan = 9, numberLessThan = 201)
190+
public static GlobalConfig VM_METADATA_MAINTENANCE_CONTENT_DRIFT_BATCH_SIZE = new GlobalConfig(CATEGORY, "vm.metadata.maintenance.contentDriftBatchSize");
191+
192+
@GlobalConfigValidation(numberGreaterThan = 0, numberLessThan = 31)
193+
public static GlobalConfig VM_METADATA_MAINTENANCE_CONTENT_DRIFT_BATCH_SLEEP_SEC = new GlobalConfig(CATEGORY, "vm.metadata.maintenance.contentDriftBatchSleepSec");
146194
}

compute/src/main/java/org/zstack/compute/vm/VmInstanceApiInterceptor.java

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,14 +23,22 @@
2323
import org.zstack.header.network.l3.*;
2424
import org.zstack.header.storage.primary.PrimaryStorageClusterRefVO;
2525
import org.zstack.header.storage.primary.PrimaryStorageClusterRefVO_;
26+
import org.zstack.header.storage.primary.PrimaryStorageVO;
27+
import org.zstack.header.storage.primary.PrimaryStorageVO_;
2628
import org.zstack.header.storage.snapshot.VolumeSnapshotVO;
2729
import org.zstack.header.storage.snapshot.VolumeSnapshotVO_;
2830
import org.zstack.header.storage.snapshot.group.VolumeSnapshotGroupVO;
2931
import org.zstack.header.storage.snapshot.group.VolumeSnapshotGroupVO_;
32+
import org.zstack.header.host.HostState;
33+
import org.zstack.header.host.HostStatus;
34+
import org.zstack.header.host.HostVO;
35+
import org.zstack.header.host.HostVO_;
3036
import org.zstack.header.vm.*;
3137
import org.zstack.header.vm.cdrom.*;
38+
import org.zstack.header.vm.APIRegisterVmInstanceFromMetadataMsg;
3239
import org.zstack.header.vm.devices.VmInstanceResourceMetadataGroupVO;
3340
import org.zstack.header.vm.devices.VmInstanceResourceMetadataGroupVO_;
41+
import org.zstack.header.vm.metadata.VmMetadataPathBuildExtensionPoint;
3442
import org.zstack.header.volume.*;
3543
import org.zstack.network.l2.L2NetworkHostUtils;
3644
import org.zstack.resourceconfig.ResourceConfigFacade;
@@ -166,6 +174,8 @@ else if (msg instanceof APIAttachVmNicToVmMsg) {
166174
validate((APIConvertTemplatedVmInstanceToVmInstanceMsg) msg);
167175
} else if (msg instanceof APIDeleteTemplatedVmInstanceMsg) {
168176
validate((APIDeleteTemplatedVmInstanceMsg) msg);
177+
} else if (msg instanceof APIRegisterVmInstanceFromMetadataMsg) {
178+
validate((APIRegisterVmInstanceFromMetadataMsg) msg);
169179
}
170180

171181
if (msg instanceof NewVmInstanceMessage2) {
@@ -1319,4 +1329,65 @@ private void validate(APIFstrimVmMsg msg) {
13191329
}
13201330
msg.setHostUuid(t.get(1, String.class));
13211331
}
1332+
1333+
private void validate(APIRegisterVmInstanceFromMetadataMsg msg) {
1334+
String path = msg.getMetadataPath();
1335+
if (StringUtils.isEmpty(path)) {
1336+
throw new ApiMessageInterceptionException(argerr("metadataPath cannot be empty or null"));
1337+
}
1338+
1339+
// Delegate path validation to the storage-type-specific extension
1340+
String psUuid = msg.getPrimaryStorageUuid();
1341+
String psType = Q.New(PrimaryStorageVO.class).select(PrimaryStorageVO_.type).eq(PrimaryStorageVO_.uuid, psUuid).findValue();
1342+
if (psType == null) {
1343+
throw new ApiMessageInterceptionException(argerr(
1344+
"primary storage[uuid:%s] not found", psUuid));
1345+
}
1346+
1347+
VmMetadataPathBuildExtensionPoint ext = pluginRgty.getExtensionFromMap(psType, VmMetadataPathBuildExtensionPoint.class);
1348+
if (ext == null) {
1349+
throw new ApiMessageInterceptionException(argerr(
1350+
"primary storage[uuid:%s, type:%s] does not support vm metadata", psUuid, psType));
1351+
}
1352+
1353+
String error = ext.validateMetadataPath(psUuid, path);
1354+
if (error != null) {
1355+
throw new ApiMessageInterceptionException(argerr("%s", error));
1356+
}
1357+
1358+
boolean psAttachedToCluster = Q.New(PrimaryStorageClusterRefVO.class)
1359+
.eq(PrimaryStorageClusterRefVO_.primaryStorageUuid, psUuid)
1360+
.eq(PrimaryStorageClusterRefVO_.clusterUuid, msg.getClusterUuid())
1361+
.isExists();
1362+
if (!psAttachedToCluster) {
1363+
throw new ApiMessageInterceptionException(argerr(
1364+
"primary storage[uuid:%s] is not attached to cluster[uuid:%s]",
1365+
psUuid, msg.getClusterUuid()));
1366+
}
1367+
1368+
if (msg.getHostUuid() != null) {
1369+
boolean hostAvailable = Q.New(HostVO.class)
1370+
.eq(HostVO_.uuid, msg.getHostUuid())
1371+
.eq(HostVO_.clusterUuid, msg.getClusterUuid())
1372+
.eq(HostVO_.state, HostState.Enabled)
1373+
.eq(HostVO_.status, HostStatus.Connected)
1374+
.isExists();
1375+
if (!hostAvailable) {
1376+
throw new ApiMessageInterceptionException(argerr(
1377+
"host[uuid:%s] is not in cluster[uuid:%s] or not Enabled/Connected", msg.getHostUuid(), msg.getClusterUuid()));
1378+
}
1379+
} else {
1380+
boolean hasHost = Q.New(HostVO.class)
1381+
.eq(HostVO_.clusterUuid, msg.getClusterUuid())
1382+
.eq(HostVO_.state, HostState.Enabled)
1383+
.eq(HostVO_.status, HostStatus.Connected)
1384+
.isExists();
1385+
if (!hasHost) {
1386+
throw new ApiMessageInterceptionException(argerr(
1387+
"no available host found in cluster[uuid:%s], " +
1388+
"please specify hostUuid or ensure there is at least one connected host in the cluster",
1389+
msg.getClusterUuid()));
1390+
}
1391+
}
1392+
}
13221393
}

0 commit comments

Comments
 (0)