Skip to content

Commit 88d1cc3

Browse files
mayastor-borsAbhinandan-Purkait
mayastor-bors
andcommitted
Merge #1786
1786: chore: increase ps_retries and silence log once logged r=Abhinandan-Purkait a=Abhinandan-Purkait - Increase the ps_retries to 300. - Silently retry. Co-authored-by: Abhinandan Purkait <purkaitabhinandan@gmail.com>
2 parents d6fcd63 + b64d617 commit 88d1cc3

File tree

3 files changed

+20
-13
lines changed

3 files changed

+20
-13
lines changed

io-engine/src/bdev/nexus/nexus_persistence.rs

+14-7
Original file line numberDiff line numberDiff line change
@@ -104,9 +104,12 @@ impl<'n> Nexus<'n> {
104104
};
105105
nexus_info.children.push(child_info);
106106
});
107-
// We started with this child because it was healthy in etcd, or isn't there at all.
108-
// Being unhealthy here means it is undergoing a fault/retire before nexus is open.
109-
if nexus_info.children.len() == 1 && !nexus_info.children[0].healthy {
107+
// We started with this child because it was healthy in etcd, or
108+
// isn't there at all. Being unhealthy here
109+
// means it is undergoing a fault/retire before nexus is open.
110+
if nexus_info.children.len() == 1
111+
&& !nexus_info.children[0].healthy
112+
{
110113
warn!("{self:?} Not persisting: the only child went unhealthy during nexus creation");
111114
return Err(Error::NexusCreate {
112115
name: self.name.clone(),
@@ -224,6 +227,7 @@ impl<'n> Nexus<'n> {
224227
};
225228

226229
let mut retry = PersistentStore::retries();
230+
let mut logged = false;
227231
loop {
228232
let Err(err) = PersistentStore::put(&key, &info.inner).await else {
229233
trace!(?key, "{self:?}: the state was saved successfully");
@@ -238,10 +242,13 @@ impl<'n> Nexus<'n> {
238242
});
239243
}
240244

241-
error!(
242-
"{self:?}: failed to persist nexus information, \
243-
will retry ({retry} left): {err}"
244-
);
245+
if !logged {
246+
error!(
247+
"{self:?}: failed to persist nexus information, \
248+
will silently retry ({retry} left): {err}"
249+
);
250+
logged = true;
251+
}
245252

246253
// Allow some time for the connection to the persistent
247254
// store to be re-established before retrying the operation.

io-engine/src/core/env.rs

+2-2
Original file line numberDiff line numberDiff line change
@@ -185,7 +185,7 @@ pub struct MayastorCliArgs {
185185
pub ps_timeout: Duration,
186186
#[clap(long = "ps-retries", default_value = "30")]
187187
/// Persistent store operation retries.
188-
pub ps_retries: u8,
188+
pub ps_retries: u16,
189189
#[clap(long = "bdev-pool-size", default_value = "65535")]
190190
/// Number of entries in memory pool for bdev I/O contexts
191191
pub bdev_io_ctx_pool_size: u64,
@@ -387,7 +387,7 @@ pub struct MayastorEnvironment {
387387
pub registration_endpoint: Option<Uri>,
388388
ps_endpoint: Option<String>,
389389
ps_timeout: Duration,
390-
ps_retries: u8,
390+
ps_retries: u16,
391391
mayastor_config: Option<String>,
392392
ptpl_dir: Option<String>,
393393
pool_config: Option<String>,

io-engine/src/persistent_store.rs

+4-4
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ pub struct PersistentStoreBuilder {
3636
/// Operation timeout.
3737
timeout: Duration,
3838
/// Number of operation retries.
39-
retries: u8,
39+
retries: u16,
4040
}
4141

4242
impl Default for PersistentStoreBuilder {
@@ -79,7 +79,7 @@ impl PersistentStoreBuilder {
7979
}
8080

8181
/// Sets number of operation retries.
82-
pub fn with_retries(mut self, retries: u8) -> Self {
82+
pub fn with_retries(mut self, retries: u16) -> Self {
8383
self.retries = retries;
8484
self
8585
}
@@ -101,7 +101,7 @@ pub struct PersistentStore {
101101
/// Operation timeout.
102102
timeout: Duration,
103103
/// Number of operation retries.
104-
retries: u8,
104+
retries: u16,
105105
}
106106

107107
/// Persistent store global instance.
@@ -304,7 +304,7 @@ impl PersistentStore {
304304
}
305305

306306
/// Gets the number of operation retries.
307-
pub fn retries() -> u8 {
307+
pub fn retries() -> u16 {
308308
Self::instance().lock().retries
309309
}
310310

0 commit comments

Comments
 (0)