Skip to content

Commit 70b634f

Browse files
authored
Linstor: add HA support and small cleanups (#8407)
* linstor: Outline get storagepools from resourcegroup into function * linstor: move getHostname() to kvm/Pool and reimplement * linstor: implement CloudStack HA support
1 parent d7362dd commit 70b634f

File tree

6 files changed

+178
-90
lines changed

6 files changed

+178
-90
lines changed

plugins/storage/volume/linstor/src/main/java/com/cloud/hypervisor/kvm/storage/LinstorStorageAdaptor.java

Lines changed: 5 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -16,27 +16,23 @@
1616
// under the License.
1717
package com.cloud.hypervisor.kvm.storage;
1818

19-
import java.io.BufferedReader;
20-
import java.io.IOException;
21-
import java.io.InputStreamReader;
2219
import java.util.Collections;
2320
import java.util.HashMap;
2421
import java.util.List;
2522
import java.util.Map;
2623
import java.util.Optional;
27-
import java.util.StringJoiner;
2824

2925
import javax.annotation.Nonnull;
3026

27+
import com.cloud.storage.Storage;
28+
import com.cloud.utils.exception.CloudRuntimeException;
3129
import org.apache.cloudstack.storage.datastore.util.LinstorUtil;
3230
import org.apache.cloudstack.utils.qemu.QemuImg;
3331
import org.apache.cloudstack.utils.qemu.QemuImgException;
3432
import org.apache.cloudstack.utils.qemu.QemuImgFile;
3533
import org.apache.log4j.Logger;
3634
import org.libvirt.LibvirtException;
3735

38-
import com.cloud.storage.Storage;
39-
import com.cloud.utils.exception.CloudRuntimeException;
4036
import com.linbit.linstor.api.ApiClient;
4137
import com.linbit.linstor.api.ApiException;
4238
import com.linbit.linstor.api.Configuration;
@@ -47,7 +43,6 @@
4743
import com.linbit.linstor.api.model.ProviderKind;
4844
import com.linbit.linstor.api.model.ResourceDefinition;
4945
import com.linbit.linstor.api.model.ResourceDefinitionModify;
50-
import com.linbit.linstor.api.model.ResourceGroup;
5146
import com.linbit.linstor.api.model.ResourceGroupSpawn;
5247
import com.linbit.linstor.api.model.ResourceMakeAvailable;
5348
import com.linbit.linstor.api.model.ResourceWithVolumes;
@@ -70,28 +65,6 @@ private static String getLinstorRscName(String name) {
7065
return LinstorUtil.RSC_PREFIX + name;
7166
}
7267

73-
private String getHostname() {
74-
// either there is already some function for that in the agent or a better way.
75-
ProcessBuilder pb = new ProcessBuilder("hostname");
76-
try
77-
{
78-
String result;
79-
Process p = pb.start();
80-
final BufferedReader reader = new BufferedReader(new InputStreamReader(p.getInputStream()));
81-
82-
StringJoiner sj = new StringJoiner(System.getProperty("line.separator"));
83-
reader.lines().iterator().forEachRemaining(sj::add);
84-
result = sj.toString();
85-
86-
p.waitFor();
87-
p.destroy();
88-
return result.trim();
89-
} catch (IOException | InterruptedException exc) {
90-
Thread.currentThread().interrupt();
91-
throw new CloudRuntimeException("Unable to run 'hostname' command.");
92-
}
93-
}
94-
9568
private void logLinstorAnswer(@Nonnull ApiCallRc answer) {
9669
if (answer.isError()) {
9770
s_logger.error(answer.getMessage());
@@ -122,7 +95,7 @@ private void handleLinstorApiAnswers(ApiCallRcList answers, String excMessage) {
12295
}
12396

12497
public LinstorStorageAdaptor() {
125-
localNodeName = getHostname();
98+
localNodeName = LinstorStoragePool.getHostname();
12699
}
127100

128101
@Override
@@ -511,25 +484,7 @@ public long getAvailable(LinstorStoragePool pool) {
511484
DevelopersApi linstorApi = getLinstorAPI(pool);
512485
final String rscGroupName = pool.getResourceGroup();
513486
try {
514-
List<ResourceGroup> rscGrps = linstorApi.resourceGroupList(
515-
Collections.singletonList(rscGroupName),
516-
null,
517-
null,
518-
null);
519-
520-
if (rscGrps.isEmpty()) {
521-
final String errMsg = String.format("Linstor: Resource group '%s' not found", rscGroupName);
522-
s_logger.error(errMsg);
523-
throw new CloudRuntimeException(errMsg);
524-
}
525-
526-
List<StoragePool> storagePools = linstorApi.viewStoragePools(
527-
Collections.emptyList(),
528-
rscGrps.get(0).getSelectFilter().getStoragePoolList(),
529-
null,
530-
null,
531-
null
532-
);
487+
List<StoragePool> storagePools = LinstorUtil.getRscGroupStoragePools(linstorApi, rscGroupName);
533488

534489
final long free = storagePools.stream()
535490
.filter(sp -> sp.getProviderKind() != ProviderKind.DISKLESS)
@@ -547,25 +502,7 @@ public long getUsed(LinstorStoragePool pool) {
547502
DevelopersApi linstorApi = getLinstorAPI(pool);
548503
final String rscGroupName = pool.getResourceGroup();
549504
try {
550-
List<ResourceGroup> rscGrps = linstorApi.resourceGroupList(
551-
Collections.singletonList(rscGroupName),
552-
null,
553-
null,
554-
null);
555-
556-
if (rscGrps.isEmpty()) {
557-
final String errMsg = String.format("Linstor: Resource group '%s' not found", rscGroupName);
558-
s_logger.error(errMsg);
559-
throw new CloudRuntimeException(errMsg);
560-
}
561-
562-
List<StoragePool> storagePools = linstorApi.viewStoragePools(
563-
Collections.emptyList(),
564-
rscGrps.get(0).getSelectFilter().getStoragePoolList(),
565-
null,
566-
null,
567-
null
568-
);
505+
List<StoragePool> storagePools = LinstorUtil.getRscGroupStoragePools(linstorApi, rscGroupName);
569506

570507
final long used = storagePools.stream()
571508
.filter(sp -> sp.getProviderKind() != ProviderKind.DISKLESS)

plugins/storage/volume/linstor/src/main/java/com/cloud/hypervisor/kvm/storage/LinstorStoragePool.java

Lines changed: 123 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -19,20 +19,33 @@
1919
import java.util.List;
2020
import java.util.Map;
2121

22-
import org.apache.cloudstack.utils.qemu.QemuImg;
23-
import org.joda.time.Duration;
24-
2522
import com.cloud.agent.api.to.HostTO;
23+
import com.cloud.agent.properties.AgentProperties;
24+
import com.cloud.agent.properties.AgentPropertiesFileHandler;
2625
import com.cloud.hypervisor.kvm.resource.KVMHABase.HAStoragePool;
2726
import com.cloud.storage.Storage;
27+
import com.cloud.utils.exception.CloudRuntimeException;
28+
import com.cloud.utils.script.OutputInterpreter;
29+
import com.cloud.utils.script.Script;
30+
import com.google.gson.JsonArray;
31+
import com.google.gson.JsonElement;
32+
import com.google.gson.JsonIOException;
33+
import com.google.gson.JsonObject;
34+
import com.google.gson.JsonParser;
35+
import com.google.gson.JsonSyntaxException;
36+
import org.apache.cloudstack.utils.qemu.QemuImg;
37+
import org.apache.log4j.Logger;
38+
import org.joda.time.Duration;
2839

2940
public class LinstorStoragePool implements KVMStoragePool {
41+
private static final Logger s_logger = Logger.getLogger(LinstorStoragePool.class);
3042
private final String _uuid;
3143
private final String _sourceHost;
3244
private final int _sourcePort;
3345
private final Storage.StoragePoolType _storagePoolType;
3446
private final StorageAdaptor _storageAdaptor;
3547
private final String _resourceGroup;
48+
private final String localNodeName;
3649

3750
public LinstorStoragePool(String uuid, String host, int port, String resourceGroup,
3851
Storage.StoragePoolType storagePoolType, StorageAdaptor storageAdaptor) {
@@ -42,6 +55,7 @@ public LinstorStoragePool(String uuid, String host, int port, String resourceGro
4255
_storagePoolType = storagePoolType;
4356
_storageAdaptor = storageAdaptor;
4457
_resourceGroup = resourceGroup;
58+
localNodeName = getHostname();
4559
}
4660

4761
@Override
@@ -200,32 +214,132 @@ public String getResourceGroup() {
200214

201215
@Override
202216
public boolean isPoolSupportHA() {
203-
return false;
217+
return true;
204218
}
205219

206220
@Override
207221
public String getHearthBeatPath() {
208-
return null;
222+
String kvmScriptsDir = AgentPropertiesFileHandler.getPropertyValue(AgentProperties.KVM_SCRIPTS_DIR);
223+
return Script.findScript(kvmScriptsDir, "kvmspheartbeat.sh");
209224
}
210225

211226
@Override
212-
public String createHeartBeatCommand(HAStoragePool primaryStoragePool, String hostPrivateIp,
227+
public String createHeartBeatCommand(HAStoragePool pool, String hostPrivateIp,
213228
boolean hostValidation) {
214-
return null;
229+
s_logger.trace(String.format("Linstor.createHeartBeatCommand: %s, %s, %b", pool.getPoolIp(), hostPrivateIp, hostValidation));
230+
boolean isStorageNodeUp = checkingHeartBeat(pool, null);
231+
if (!isStorageNodeUp && !hostValidation) {
232+
//restart the host
233+
s_logger.debug(String.format("The host [%s] will be restarted because the health check failed for the storage pool [%s]", hostPrivateIp, pool.getPool().getType()));
234+
Script cmd = new Script(pool.getPool().getHearthBeatPath(), Duration.millis(HeartBeatUpdateTimeout), s_logger);
235+
cmd.add("-c");
236+
cmd.execute();
237+
return "Down";
238+
}
239+
return isStorageNodeUp ? null : "Down";
215240
}
216241

217242
@Override
218243
public String getStorageNodeId() {
244+
// only called by storpool
219245
return null;
220246
}
221247

248+
static String getHostname() {
249+
OutputInterpreter.AllLinesParser parser = new OutputInterpreter.AllLinesParser();
250+
Script sc = new Script("hostname", Duration.millis(10000L), s_logger);
251+
String res = sc.execute(parser);
252+
if (res != null) {
253+
throw new CloudRuntimeException(String.format("Unable to run 'hostname' command: %s", res));
254+
}
255+
String response = parser.getLines();
256+
return response.trim();
257+
}
258+
222259
@Override
223260
public Boolean checkingHeartBeat(HAStoragePool pool, HostTO host) {
224-
return null;
261+
String hostName;
262+
if (host == null) {
263+
hostName = localNodeName;
264+
} else {
265+
hostName = host.getParent();
266+
if (hostName == null) {
267+
s_logger.error("No hostname set in host.getParent()");
268+
return false;
269+
}
270+
}
271+
272+
return checkHostUpToDateAndConnected(hostName);
273+
}
274+
275+
private String executeDrbdSetupStatus(OutputInterpreter.AllLinesParser parser) {
276+
Script sc = new Script("drbdsetup", Duration.millis(HeartBeatUpdateTimeout), s_logger);
277+
sc.add("status");
278+
sc.add("--json");
279+
return sc.execute(parser);
280+
}
281+
282+
private boolean checkDrbdSetupStatusOutput(String output, String otherNodeName) {
283+
JsonParser jsonParser = new JsonParser();
284+
JsonArray jResources = (JsonArray) jsonParser.parse(output);
285+
for (JsonElement jElem : jResources) {
286+
JsonObject jRes = (JsonObject) jElem;
287+
JsonArray jConnections = jRes.getAsJsonArray("connections");
288+
for (JsonElement jConElem : jConnections) {
289+
JsonObject jConn = (JsonObject) jConElem;
290+
if (jConn.getAsJsonPrimitive("name").getAsString().equals(otherNodeName)
291+
&& jConn.getAsJsonPrimitive("connection-state").getAsString().equalsIgnoreCase("Connected")) {
292+
return true;
293+
}
294+
}
295+
}
296+
s_logger.warn(String.format("checkDrbdSetupStatusOutput: no resource connected to %s.", otherNodeName));
297+
return false;
298+
}
299+
300+
private String executeDrbdEventsNow(OutputInterpreter.AllLinesParser parser) {
301+
Script sc = new Script("drbdsetup", Duration.millis(HeartBeatUpdateTimeout), s_logger);
302+
sc.add("events2");
303+
sc.add("--now");
304+
return sc.execute(parser);
305+
}
306+
307+
private boolean checkDrbdEventsNowOutput(String output) {
308+
boolean healthy = output.lines().noneMatch(line -> line.matches(".*role:Primary .* promotion_score:0.*"));
309+
if (!healthy) {
310+
s_logger.warn("checkDrbdEventsNowOutput: primary resource with promotion score==0; HA false");
311+
}
312+
return healthy;
313+
}
314+
315+
private boolean checkHostUpToDateAndConnected(String hostName) {
316+
s_logger.trace(String.format("checkHostUpToDateAndConnected: %s/%s", localNodeName, hostName));
317+
OutputInterpreter.AllLinesParser parser = new OutputInterpreter.AllLinesParser();
318+
319+
if (localNodeName.equalsIgnoreCase(hostName)) {
320+
String res = executeDrbdEventsNow(parser);
321+
if (res != null) {
322+
return false;
323+
}
324+
return checkDrbdEventsNowOutput(parser.getLines());
325+
} else {
326+
// check drbd connections
327+
String res = executeDrbdSetupStatus(parser);
328+
if (res != null) {
329+
return false;
330+
}
331+
try {
332+
return checkDrbdSetupStatusOutput(parser.getLines(), hostName);
333+
} catch (JsonIOException | JsonSyntaxException e) {
334+
s_logger.error("Error parsing drbdsetup status --json", e);
335+
}
336+
}
337+
return false;
225338
}
226339

227340
@Override
228341
public Boolean vmActivityCheck(HAStoragePool pool, HostTO host, Duration activityScriptTimeout, String volumeUUIDListString, String vmActivityCheckPath, long duration) {
229-
return null;
342+
s_logger.trace(String.format("Linstor.vmActivityCheck: %s, %s", pool.getPoolIp(), host.getPrivateNetwork().getIp()));
343+
return checkingHeartBeat(pool, host);
230344
}
231345
}

plugins/storage/volume/linstor/src/main/java/org/apache/cloudstack/storage/datastore/driver/LinstorPrimaryDataStoreDriverImpl.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1241,7 +1241,7 @@ public void provideVmTags(long vmId, long volumeId, String tagValue) {
12411241

12421242
@Override
12431243
public boolean isStorageSupportHA(StoragePoolType type) {
1244-
return false;
1244+
return true;
12451245
}
12461246

12471247
@Override
Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
package org.apache.cloudstack.storage.datastore.provider;
18+
19+
import com.cloud.exception.StorageConflictException;
20+
import com.cloud.host.HostVO;
21+
22+
public class LinstorHostListener extends DefaultHostListener {
23+
@Override
24+
public boolean hostConnect(long hostId, long poolId) throws StorageConflictException {
25+
HostVO host = hostDao.findById(hostId);
26+
if (host.getParent() == null) {
27+
host.setParent(host.getName());
28+
hostDao.update(host.getId(), host);
29+
}
30+
return super.hostConnect(hostId, poolId);
31+
}
32+
}

plugins/storage/volume/linstor/src/main/java/org/apache/cloudstack/storage/datastore/provider/LinstorPrimaryDatastoreProviderImpl.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ public DataStoreLifeCycle getDataStoreLifeCycle() {
4848
public boolean configure(Map<String, Object> params) {
4949
lifecycle = ComponentContext.inject(LinstorPrimaryDataStoreLifeCycleImpl.class);
5050
driver = ComponentContext.inject(LinstorPrimaryDataStoreDriverImpl.class);
51-
listener = ComponentContext.inject(DefaultHostListener.class);
51+
listener = ComponentContext.inject(LinstorHostListener.class);
5252
return true;
5353
}
5454

0 commit comments

Comments
 (0)