Skip to content

Commit 02e79b6

Browse files
mayastor-borstiagolobocastro
mayastor-bors
andcommitted
Merge #1788
1788: fix(self-shutdown): abort frozen ios when unsharing shutdown nexus r=tiagolobocastro a=tiagolobocastro These frozen IOs prevent the nexus from shutting down. We don't have any hooks today to do this whilsts the target is stopping so we add a simple loop which tries a number of times. Ideally we should get some sort of callback to trigger this. Co-authored-by: Tiago Castro <tiagolobocastro@gmail.com>
2 parents 88d1cc3 + e77a824 commit 02e79b6

File tree

5 files changed

+142
-17
lines changed

5 files changed

+142
-17
lines changed

io-engine/src/bdev/nexus/nexus_bdev.rs

+12
Original file line numberDiff line numberDiff line change
@@ -1114,6 +1114,18 @@ impl<'n> Nexus<'n> {
11141114
Ok(())
11151115
}
11161116

1117+
/// Aborts all frozen IOs of a shutdown Nexus.
1118+
/// # Warning
1119+
/// These aborts may translate into I/O errors for the initiator.
1120+
pub async fn abort_shutdown_frozen_ios(&self) {
1121+
if self.status() == NexusStatus::Shutdown {
1122+
self.traverse_io_channels_async((), |channel, _| {
1123+
channel.abort_frozen();
1124+
})
1125+
.await;
1126+
}
1127+
}
1128+
11171129
/// Suspend any incoming IO to the bdev pausing the controller allows us to
11181130
/// handle internal events and which is a protocol feature.
11191131
/// In case concurrent pause requests take place, the other callers

io-engine/src/bdev/nexus/nexus_channel.rs

+2-1
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ impl<'n> Debug for NexusChannel<'n> {
3232
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
3333
write!(
3434
f,
35-
"{io} chan '{nex}' core:{core}({cur}) [R:{r} W:{w} D:{d} L:{l} C:{c}]",
35+
"{io} chan '{nex}' core:{core}({cur}) [R:{r} W:{w} D:{d} L:{l} C:{c} F:{f}]",
3636
io = if self.is_io_chan { "I/O" } else { "Aux" },
3737
nex = self.nexus.nexus_name(),
3838
core = self.core,
@@ -42,6 +42,7 @@ impl<'n> Debug for NexusChannel<'n> {
4242
d = self.detached.len(),
4343
l = self.io_logs.len(),
4444
c = self.nexus.child_count(),
45+
f = self.frozen_ios.len()
4546
)
4647
}
4748
}

io-engine/src/bdev/nexus/nexus_share.rs

+33-6
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,13 @@
1-
use crate::bdev::PtplFileOps;
1+
use super::{nexus_err, nexus_lookup, Error, NbdDisk, Nexus, NexusTarget};
2+
use crate::{
3+
bdev::PtplFileOps,
4+
core::{NvmfShareProps, Protocol, PtplProps, Reactors, Share, UpdateProps},
5+
sleep::mayastor_sleep,
6+
};
27
use async_trait::async_trait;
8+
use futures::{channel::oneshot, future::FusedFuture};
39
use snafu::ResultExt;
4-
use std::pin::Pin;
5-
6-
use super::{nexus_err, Error, NbdDisk, Nexus, NexusTarget};
7-
8-
use crate::core::{NvmfShareProps, Protocol, PtplProps, Share, UpdateProps};
10+
use std::{pin::Pin, time::Duration};
911

1012
///
1113
/// The sharing of the nexus is different compared to regular bdevs
@@ -78,6 +80,31 @@ impl<'n> Share for Nexus<'n> {
7880
async fn unshare(mut self: Pin<&mut Self>) -> Result<(), Self::Error> {
7981
info!("{:?}: unsharing nexus bdev...", self);
8082

83+
// TODO: we should not allow new initiator connections for a shutdown
84+
// nexus!
85+
86+
// TODO: we may want to disable the freeze at this point instead,
87+
// allowing new I/Os to fail "normally"
88+
let name = self.name.clone();
89+
let (_s, r) = oneshot::channel::<()>();
90+
Reactors::master().send_future(async move {
91+
for _ in 0 ..= 10 {
92+
mayastor_sleep(Duration::from_secs(2)).await.ok();
93+
if r.is_terminated() {
94+
// This means the unshare is complete, so nothing to do here
95+
return;
96+
}
97+
// If we're not unshared, then abort any I/Os that may have
98+
// reached
99+
if let Some(nexus) = nexus_lookup(&name) {
100+
nexus.abort_shutdown_frozen_ios().await;
101+
}
102+
}
103+
});
104+
105+
// Aborts frozen I/Os a priori
106+
self.abort_shutdown_frozen_ios().await;
107+
81108
let name = self.name.clone();
82109
self.as_mut().pin_bdev_mut().unshare().await.context(
83110
nexus_err::UnshareNexus {

io-engine/tests/persistence.rs

+74-8
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
use crate::common::fio_run_verify;
1+
use crate::common::{dd_random_file, fio_run_verify};
22
use common::compose::{
33
rpc::v0::{
44
mayastor::{
@@ -347,11 +347,80 @@ async fn persistent_store_connection() {
347347
assert!(get_nexus(ms1, nexus_uuid).await.is_some());
348348
}
349349

350+
#[tokio::test(flavor = "multi_thread", worker_threads = 4)]
351+
async fn self_shutdown_destroy() {
352+
let test = start_infrastructure_("self_shutdown_destroy", Some("1")).await;
353+
let grpc = GrpcConnect::new(&test);
354+
let ms1 = &mut grpc.grpc_handle("ms1").await.unwrap();
355+
let ms2 = &mut grpc.grpc_handle("ms2").await.unwrap();
356+
let ms3 = &mut grpc.grpc_handle("ms3").await.unwrap();
357+
358+
// Create bdevs and share over nvmf.
359+
let child1 = create_and_share_bdevs(ms2, CHILD1_UUID).await;
360+
let child2 = create_and_share_bdevs(ms3, CHILD2_UUID).await;
361+
362+
// Create and publish a nexus.
363+
let nexus_uuid = "8272e9d3-3738-4e33-b8c3-769d8eed5771";
364+
create_nexus(ms1, nexus_uuid, vec![child1.clone(), child2.clone()]).await;
365+
let nexus_uri = publish_nexus(ms1, nexus_uuid).await;
366+
367+
// Create and connect NVMF target.
368+
let target = libnvme_rs::NvmeTarget::try_from(nexus_uri.clone())
369+
.unwrap()
370+
.with_reconnect_delay(Some(1))
371+
.ctrl_loss_timeout(Some(1))
372+
.with_rand_hostnqn(true);
373+
target.connect().unwrap();
374+
375+
// simulate node with child and etcd going down
376+
test.stop("etcd").await.unwrap();
377+
test.stop("ms3").await.unwrap();
378+
379+
// allow pstor to timeout and self shutdown
380+
// todo: use wait loop
381+
tokio::time::sleep(Duration::from_secs(2)).await;
382+
383+
let nexus = get_nexus(ms1, nexus_uuid).await.unwrap();
384+
assert_eq!(nexus.state, NexusState::NexusShutdown as i32);
385+
386+
let devices = target.block_devices(2).unwrap();
387+
let fio_hdl = tokio::spawn(async move {
388+
dd_random_file(&devices[0].to_string(), 4096, 1)
389+
});
390+
391+
test.start("etcd").await.unwrap();
392+
393+
ms1.mayastor
394+
.destroy_nexus(DestroyNexusRequest {
395+
uuid: nexus_uuid.to_string(),
396+
})
397+
.await
398+
.expect("Failed to destroy nexus");
399+
400+
// Disconnect NVMF target
401+
target.disconnect().unwrap();
402+
403+
fio_hdl.await.unwrap();
404+
}
405+
350406
/// Start the containers for the tests.
351407
async fn start_infrastructure(test_name: &str) -> ComposeTest {
408+
start_infrastructure_(test_name, None).await
409+
}
410+
411+
/// Start the containers for the tests.
412+
async fn start_infrastructure_(
413+
test_name: &str,
414+
ps_retries: Option<&str>,
415+
) -> ComposeTest {
352416
common::composer_init();
353417

354418
let etcd_endpoint = format!("http://etcd.{test_name}:2379");
419+
let mut args = vec!["-p", &etcd_endpoint];
420+
if let Some(retries) = ps_retries {
421+
args.extend(["--ps-retries", retries]);
422+
}
423+
355424
Builder::new()
356425
.name(test_name)
357426
.add_container_spec(
@@ -371,20 +440,17 @@ async fn start_infrastructure(test_name: &str) -> ComposeTest {
371440
)
372441
.add_container_bin(
373442
"ms1",
374-
Binary::from_dbg("io-engine").with_args(vec!["-p", &etcd_endpoint]),
443+
Binary::from_dbg("io-engine").with_args(args.clone()),
375444
)
376445
.add_container_bin(
377446
"ms2",
378-
Binary::from_dbg("io-engine").with_args(vec!["-p", &etcd_endpoint]),
447+
Binary::from_dbg("io-engine").with_args(args.clone()),
379448
)
380449
.add_container_bin(
381450
"ms3",
382-
Binary::from_dbg("io-engine").with_args(vec!["-p", &etcd_endpoint]),
383-
)
384-
.add_container_bin(
385-
"ms4",
386-
Binary::from_dbg("io-engine").with_args(vec!["-p", &etcd_endpoint]),
451+
Binary::from_dbg("io-engine").with_args(args.clone()),
387452
)
453+
.add_container_bin("ms4", Binary::from_dbg("io-engine").with_args(args))
388454
.build()
389455
.await
390456
.unwrap()

libnvme-rs/src/nvme_uri.rs

+21-2
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,10 @@ pub struct NvmeTarget {
6464
trtype: NvmeTransportType,
6565
/// Auto-Generate random HostNqn.
6666
hostnqn_autogen: bool,
67+
/// The Reconnect Delay.
68+
reconnect_delay: Option<u8>,
69+
/// The Controller Loss Timeout.
70+
ctrl_loss_timeout: Option<u32>,
6771
}
6872

6973
impl TryFrom<String> for NvmeTarget {
@@ -117,6 +121,8 @@ impl TryFrom<&str> for NvmeTarget {
117121
trsvcid: url.port().unwrap_or(4420),
118122
subsysnqn: subnqn,
119123
hostnqn_autogen: false,
124+
reconnect_delay: None,
125+
ctrl_loss_timeout: None,
120126
})
121127
}
122128
}
@@ -128,6 +134,16 @@ impl NvmeTarget {
128134
self.hostnqn_autogen = random;
129135
self
130136
}
137+
/// With the reconnect delay.
138+
pub fn with_reconnect_delay(mut self, delay: Option<u8>) -> Self {
139+
self.reconnect_delay = delay;
140+
self
141+
}
142+
/// With the ctrl loss timeout.
143+
pub fn ctrl_loss_timeout(mut self, timeout: Option<u32>) -> Self {
144+
self.ctrl_loss_timeout = timeout;
145+
self
146+
}
131147
/// Connect to NVMe target
132148
/// Returns Ok on successful connect
133149
pub fn connect(&self) -> Result<(), NvmeError> {
@@ -184,8 +200,11 @@ impl NvmeTarget {
184200
host_iface,
185201
queue_size: 0,
186202
nr_io_queues: 0,
187-
reconnect_delay: 0,
188-
ctrl_loss_tmo: crate::NVMF_DEF_CTRL_LOSS_TMO as i32,
203+
reconnect_delay: self.reconnect_delay.unwrap_or(0) as i32,
204+
ctrl_loss_tmo: self
205+
.ctrl_loss_timeout
206+
.unwrap_or(crate::NVMF_DEF_CTRL_LOSS_TMO)
207+
as i32,
189208
fast_io_fail_tmo: 0,
190209
keep_alive_tmo: 0,
191210
nr_write_queues: 0,

0 commit comments

Comments
 (0)