mirror of https://github.com/railgun-rs/actix-raft
Prep for 0.5.1 release.
Painfully giving in to rustfmt ... Update changelog. Added some CI release automation.
This commit is contained in:
parent
4a7be31abd
commit
d9e4691811
|
@ -0,0 +1,42 @@
|
|||
name: Release
|
||||
on:
|
||||
push:
|
||||
tags:
|
||||
- "memstore-v*"
|
||||
|
||||
jobs:
|
||||
publish_raft:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Setup | Checkout
|
||||
uses: actions/checkout@v2
|
||||
|
||||
- name: Setup | Rust
|
||||
uses: actions-rs/toolchain@v1
|
||||
with:
|
||||
toolchain: stable
|
||||
profile: minimal
|
||||
override: true
|
||||
|
||||
- name: Publish | Async Raft
|
||||
run: cd memstore && cargo publish --token ${{ secrets.CRATES_IO_TOKEN }}
|
||||
|
||||
release:
|
||||
needs: build
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Setup | Checkout
|
||||
uses: actions/checkout@v2
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Setup | Create Release Log
|
||||
run: cat CHANGELOG.md | tail -n +7 | head -n 25 > RELEASE_LOG.md
|
||||
|
||||
- name: Build | Publish Pre-Release
|
||||
uses: softprops/action-gh-release@v1
|
||||
with:
|
||||
body_path: RELEASE_LOG.md
|
||||
prerelease: true
|
||||
env:
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
|
@ -0,0 +1,42 @@
|
|||
name: Release
|
||||
on:
|
||||
push:
|
||||
tags:
|
||||
- "async-raft-v*"
|
||||
|
||||
jobs:
|
||||
publish_raft:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Setup | Checkout
|
||||
uses: actions/checkout@v2
|
||||
|
||||
- name: Setup | Rust
|
||||
uses: actions-rs/toolchain@v1
|
||||
with:
|
||||
toolchain: stable
|
||||
profile: minimal
|
||||
override: true
|
||||
|
||||
- name: Publish | Async Raft
|
||||
run: cd async-raft && cargo publish --token ${{ secrets.CRATES_IO_TOKEN }}
|
||||
|
||||
release:
|
||||
needs: build
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Setup | Checkout
|
||||
uses: actions/checkout@v2
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Setup | Create Release Log
|
||||
run: cat CHANGELOG.md | tail -n +7 | head -n 25 > RELEASE_LOG.md
|
||||
|
||||
- name: Build | Publish Pre-Release
|
||||
uses: softprops/action-gh-release@v1
|
||||
with:
|
||||
body_path: RELEASE_LOG.md
|
||||
prerelease: true
|
||||
env:
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
|
@ -1,10 +1,14 @@
|
|||
changelog
|
||||
=========
|
||||
This changelog follows the patterns described here: https://keepachangelog.com/en/1.0.0/.
|
||||
|
||||
## [unreleased]
|
||||
|
||||
## 0.5.1
|
||||
### changed
|
||||
- `ChangeConfigError::NodeNotLeader` now returns the ID of the current cluster leader if known.
|
||||
- Fix off-by-one error in `get_log_entries` during the replication process.
|
||||
- Added `#[derive(Serialize, Deserialize)]` to `Config`, `ConfigBuilder` & `SnapshotPolicy`.
|
||||
|
||||
## 0.5.0
|
||||
### changed
|
||||
|
|
|
@ -2,11 +2,6 @@ CONTRIBUTING
|
|||
============
|
||||
This is a Rust project, so [rustup](https://rustup.rs/) is the best place to start.
|
||||
|
||||
Check out the `.travis.yml` file to get an idea on how to run tests and the like.
|
||||
|
||||
### clippy
|
||||
Haven't added clippy integration yet, but I am definitely planning on doing so. Don't run rustfmt ...
|
||||
|
||||
### the guide
|
||||
The guide for this project is built using [mdBook](https://rust-lang-nursery.github.io/mdBook/index.html). Review their guide for more details on how to work with mdBook. Here are a few of the pertinents:
|
||||
|
||||
|
@ -23,3 +18,7 @@ mdbook watch
|
|||
|
||||
### release checklist
|
||||
- Any documentation updates should also be reflected in the guide.
|
||||
- Ensure the changelog is up-to-date.
|
||||
- Ensure the Cargo.toml version for async-raft or memstore has been updated, depending on which is being released.
|
||||
- Once the repo is in the desired state, push a tag matching the following pattern: `(async-raft|memstore)-v.+`.
|
||||
- Once the release CI has been finished, navigate to the release page, update the release info and publish the release.
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
[package]
|
||||
name = "async-raft"
|
||||
version = "0.5.0"
|
||||
version = "0.5.1"
|
||||
edition = "2018"
|
||||
authors = ["Anthony Dodd <Dodd.AnthonyJosiah@gmail.com>"]
|
||||
categories = ["algorithms", "asynchronous", "data-structures"]
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
//! Raft runtime configuration.
|
||||
|
||||
use rand::{thread_rng, Rng};
|
||||
use serde::{Serialize, Deserialize};
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::error::ConfigError;
|
||||
|
||||
|
@ -111,7 +111,7 @@ impl Config {
|
|||
/// The directory where the log snapshots are to be kept for a Raft node is required and must
|
||||
/// be specified to start the config builder process.
|
||||
pub fn build(cluster_name: String) -> ConfigBuilder {
|
||||
ConfigBuilder{
|
||||
ConfigBuilder {
|
||||
cluster_name,
|
||||
election_timeout_min: None,
|
||||
election_timeout_max: None,
|
||||
|
@ -135,7 +135,8 @@ impl Config {
|
|||
/// the Raft spec is considered in order to set the appropriate values.
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
pub struct ConfigBuilder {
|
||||
cluster_name: String,
|
||||
/// The application specific name of this Raft cluster.
|
||||
pub cluster_name: String,
|
||||
/// The minimum election timeout, in milliseconds.
|
||||
pub election_timeout_min: Option<u64>,
|
||||
/// The maximum election timeout, in milliseconds.
|
||||
|
@ -212,7 +213,7 @@ impl ConfigBuilder {
|
|||
let replication_lag_threshold = self.replication_lag_threshold.unwrap_or(DEFAULT_REPLICATION_LAG_THRESHOLD);
|
||||
let snapshot_policy = self.snapshot_policy.unwrap_or_else(SnapshotPolicy::default);
|
||||
let snapshot_max_chunk_size = self.snapshot_max_chunk_size.unwrap_or(DEFAULT_SNAPSHOT_CHUNKSIZE);
|
||||
Ok(Config{
|
||||
Ok(Config {
|
||||
cluster_name: self.cluster_name,
|
||||
election_timeout_min,
|
||||
election_timeout_max,
|
||||
|
@ -255,7 +256,8 @@ mod tests {
|
|||
.replication_lag_threshold(100)
|
||||
.snapshot_max_chunk_size(200)
|
||||
.snapshot_policy(SnapshotPolicy::LogsSinceLast(10000))
|
||||
.validate().unwrap();
|
||||
.validate()
|
||||
.unwrap();
|
||||
|
||||
assert!(cfg.election_timeout_min >= 100);
|
||||
assert!(cfg.election_timeout_max <= 200);
|
||||
|
@ -269,7 +271,9 @@ mod tests {
|
|||
#[test]
|
||||
fn test_invalid_election_timeout_config_produces_expected_error() {
|
||||
let res = Config::build("cluster0".into())
|
||||
.election_timeout_min(1000).election_timeout_max(700).validate();
|
||||
.election_timeout_min(1000)
|
||||
.election_timeout_max(700)
|
||||
.validate();
|
||||
assert!(res.is_err());
|
||||
let err = res.unwrap_err();
|
||||
assert_eq!(err, ConfigError::InvalidElectionTimeoutMinMax);
|
||||
|
|
|
@ -3,16 +3,16 @@ use std::collections::HashSet;
|
|||
use futures::future::{FutureExt, TryFutureExt};
|
||||
use tokio::sync::oneshot;
|
||||
|
||||
use crate::{AppData, AppDataResponse, NodeId, RaftNetwork, RaftStorage};
|
||||
use crate::error::{InitializeError, ChangeConfigError, RaftError};
|
||||
use crate::raft::{ChangeMembershipTx, ClientWriteRequest, MembershipConfig};
|
||||
use crate::core::{ConsensusState, LeaderState, NonVoterReplicationState, NonVoterState, State, UpdateCurrentLeader};
|
||||
use crate::core::client::ClientRequestEntry;
|
||||
use crate::core::{ConsensusState, LeaderState, NonVoterReplicationState, NonVoterState, State, UpdateCurrentLeader};
|
||||
use crate::error::{ChangeConfigError, InitializeError, RaftError};
|
||||
use crate::raft::{ChangeMembershipTx, ClientWriteRequest, MembershipConfig};
|
||||
use crate::replication::RaftEvent;
|
||||
use crate::{AppData, AppDataResponse, NodeId, RaftNetwork, RaftStorage};
|
||||
|
||||
impl<'a, D: AppData, R: AppDataResponse, N: RaftNetwork<D>, S: RaftStorage<D, R>> NonVoterState<'a, D, R, N, S> {
|
||||
/// Handle the admin `init_with_config` command.
|
||||
#[tracing::instrument(level="trace", skip(self))]
|
||||
#[tracing::instrument(level = "trace", skip(self))]
|
||||
pub(super) async fn handle_init_with_config(&mut self, mut members: HashSet<NodeId>) -> Result<(), InitializeError> {
|
||||
if self.core.last_log_index != 0 || self.core.current_term != 0 {
|
||||
tracing::error!({self.core.last_log_index, self.core.current_term}, "rejecting init_with_config request as last_log_index or current_term is 0");
|
||||
|
@ -26,7 +26,10 @@ impl<'a, D: AppData, R: AppDataResponse, N: RaftNetwork<D>, S: RaftStorage<D, R>
|
|||
|
||||
// Build a new membership config from given init data & assign it as the new cluster
|
||||
// membership config in memory only.
|
||||
self.core.membership = MembershipConfig{members, members_after_consensus: None};
|
||||
self.core.membership = MembershipConfig {
|
||||
members,
|
||||
members_after_consensus: None,
|
||||
};
|
||||
|
||||
// Become a candidate and start campaigning for leadership. If this node is the only node
|
||||
// in the cluster, then become leader without holding an election. If members len == 1, we
|
||||
|
@ -47,13 +50,20 @@ impl<'a, D: AppData, R: AppDataResponse, N: RaftNetwork<D>, S: RaftStorage<D, R>
|
|||
impl<'a, D: AppData, R: AppDataResponse, N: RaftNetwork<D>, S: RaftStorage<D, R>> LeaderState<'a, D, R, N, S> {
|
||||
/// Add a new node to the cluster as a non-voter, bringing it up-to-speed, and then responding
|
||||
/// on the given channel.
|
||||
#[tracing::instrument(level="trace", skip(self, tx))]
|
||||
#[tracing::instrument(level = "trace", skip(self, tx))]
|
||||
pub(super) fn add_member(&mut self, target: NodeId, tx: oneshot::Sender<Result<(), ChangeConfigError>>) {
|
||||
// Ensure the node doesn't already exist in the current config, in the set of new nodes
|
||||
// alreading being synced, or in the nodes being removed.
|
||||
if self.core.membership.members.contains(&target)
|
||||
|| self.core.membership.members_after_consensus.as_ref().map(|new| new.contains(&target)).unwrap_or(false)
|
||||
|| self.non_voters.contains_key(&target) {
|
||||
|| self
|
||||
.core
|
||||
.membership
|
||||
.members_after_consensus
|
||||
.as_ref()
|
||||
.map(|new| new.contains(&target))
|
||||
.unwrap_or(false)
|
||||
|| self.non_voters.contains_key(&target)
|
||||
{
|
||||
tracing::debug!("target node is already a cluster member or is being synced");
|
||||
let _ = tx.send(Err(ChangeConfigError::Noop));
|
||||
return;
|
||||
|
@ -62,10 +72,17 @@ impl<'a, D: AppData, R: AppDataResponse, N: RaftNetwork<D>, S: RaftStorage<D, R>
|
|||
// Spawn a replication stream for the new member. Track state as a non-voter so that it
|
||||
// can be updated to be added to the cluster config once it has been brought up-to-date.
|
||||
let state = self.spawn_replication_stream(target);
|
||||
self.non_voters.insert(target, NonVoterReplicationState{state, is_ready_to_join: false, tx: Some(tx)});
|
||||
self.non_voters.insert(
|
||||
target,
|
||||
NonVoterReplicationState {
|
||||
state,
|
||||
is_ready_to_join: false,
|
||||
tx: Some(tx),
|
||||
},
|
||||
);
|
||||
}
|
||||
|
||||
#[tracing::instrument(level="trace", skip(self, tx))]
|
||||
#[tracing::instrument(level = "trace", skip(self, tx))]
|
||||
pub(super) async fn change_membership(&mut self, members: HashSet<NodeId>, tx: ChangeMembershipTx) {
|
||||
// Ensure cluster will have at least one node.
|
||||
if members.is_empty() {
|
||||
|
@ -76,10 +93,10 @@ impl<'a, D: AppData, R: AppDataResponse, N: RaftNetwork<D>, S: RaftStorage<D, R>
|
|||
// Only allow config updates when currently in a uniform consensus state.
|
||||
match &self.consensus_state {
|
||||
ConsensusState::Uniform => (),
|
||||
ConsensusState::NonVoterSync{..} | ConsensusState::Joint{..} => {
|
||||
ConsensusState::NonVoterSync { .. } | ConsensusState::Joint { .. } => {
|
||||
let _ = tx.send(Err(ChangeConfigError::ConfigChangeInProgress));
|
||||
return;
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
// Check the proposed config for any new nodes. If ALL new nodes already have replication
|
||||
|
@ -89,7 +106,8 @@ impl<'a, D: AppData, R: AppDataResponse, N: RaftNetwork<D>, S: RaftStorage<D, R>
|
|||
// Here, all we do is check to see which nodes still need to be synced, which determines
|
||||
// we can proceed.
|
||||
let diff = members.difference(&self.core.membership.members).cloned().collect::<Vec<_>>();
|
||||
let awaiting = diff.into_iter()
|
||||
let awaiting = diff
|
||||
.into_iter()
|
||||
.filter(|new_node| match self.non_voters.get(&new_node) {
|
||||
Some(node) if node.is_ready_to_join => false,
|
||||
Some(_) => true,
|
||||
|
@ -97,7 +115,14 @@ impl<'a, D: AppData, R: AppDataResponse, N: RaftNetwork<D>, S: RaftStorage<D, R>
|
|||
// Spawn a replication stream for the new member. Track state as a non-voter so that it
|
||||
// can be updated to be added to the cluster config once it has been brought up-to-date.
|
||||
let state = self.spawn_replication_stream(*new_node);
|
||||
self.non_voters.insert(*new_node, NonVoterReplicationState{state, is_ready_to_join: false, tx: None});
|
||||
self.non_voters.insert(
|
||||
*new_node,
|
||||
NonVoterReplicationState {
|
||||
state,
|
||||
is_ready_to_join: false,
|
||||
tx: None,
|
||||
},
|
||||
);
|
||||
true
|
||||
}
|
||||
})
|
||||
|
@ -105,7 +130,7 @@ impl<'a, D: AppData, R: AppDataResponse, N: RaftNetwork<D>, S: RaftStorage<D, R>
|
|||
// If there are new nodes which need to sync, then we need to wait until they are synced.
|
||||
// Once they've finished, this routine will be called again to progress further.
|
||||
if !awaiting.is_empty() {
|
||||
self.consensus_state = ConsensusState::NonVoterSync{awaiting, members, tx};
|
||||
self.consensus_state = ConsensusState::NonVoterSync { awaiting, members, tx };
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -113,7 +138,7 @@ impl<'a, D: AppData, R: AppDataResponse, N: RaftNetwork<D>, S: RaftStorage<D, R>
|
|||
if !members.contains(&self.core.id) {
|
||||
self.is_stepping_down = true;
|
||||
}
|
||||
self.consensus_state = ConsensusState::Joint{is_committed: false};
|
||||
self.consensus_state = ConsensusState::Joint { is_committed: false };
|
||||
self.core.membership.members_after_consensus = Some(members);
|
||||
|
||||
// Propagate the command as any other client request.
|
||||
|
@ -138,20 +163,22 @@ impl<'a, D: AppData, R: AppDataResponse, N: RaftNetwork<D>, S: RaftStorage<D, R>
|
|||
let res = rx_cfg_change
|
||||
.map_err(|_| RaftError::ShuttingDown)
|
||||
.into_future()
|
||||
.then(|res| futures::future::ready(match res {
|
||||
Ok(Ok(_)) => Ok(()),
|
||||
Ok(Err(err)) => Err(ChangeConfigError::from(err)),
|
||||
Err(err) => Err(ChangeConfigError::from(err)),
|
||||
}))
|
||||
.then(|res| {
|
||||
futures::future::ready(match res {
|
||||
Ok(Ok(_)) => Ok(()),
|
||||
Ok(Err(err)) => Err(ChangeConfigError::from(err)),
|
||||
Err(err) => Err(ChangeConfigError::from(err)),
|
||||
})
|
||||
})
|
||||
.await;
|
||||
let _ = tx.send(res);
|
||||
});
|
||||
}
|
||||
|
||||
/// Handle the commitment of a joint consensus cluster configuration.
|
||||
#[tracing::instrument(level="trace", skip(self))]
|
||||
#[tracing::instrument(level = "trace", skip(self))]
|
||||
pub(super) async fn handle_joint_consensus_committed(&mut self) -> Result<(), RaftError> {
|
||||
if let ConsensusState::Joint{is_committed, ..} = &mut self.consensus_state {
|
||||
if let ConsensusState::Joint { is_committed, .. } = &mut self.consensus_state {
|
||||
*is_committed = true; // Mark as comitted.
|
||||
}
|
||||
// Only proceed to finalize this joint consensus if there are no remaining nodes being synced.
|
||||
|
@ -162,7 +189,7 @@ impl<'a, D: AppData, R: AppDataResponse, N: RaftNetwork<D>, S: RaftStorage<D, R>
|
|||
}
|
||||
|
||||
/// Finalize the comitted joint consensus.
|
||||
#[tracing::instrument(level="trace", skip(self))]
|
||||
#[tracing::instrument(level = "trace", skip(self))]
|
||||
pub(super) async fn finalize_joint_consensus(&mut self) -> Result<(), RaftError> {
|
||||
// Only proceed if it is safe to do so.
|
||||
if !self.consensus_state.is_joint_consensus_safe_to_finalize() {
|
||||
|
@ -200,7 +227,7 @@ impl<'a, D: AppData, R: AppDataResponse, N: RaftNetwork<D>, S: RaftStorage<D, R>
|
|||
}
|
||||
|
||||
/// Handle the commitment of a uniform consensus cluster configuration.
|
||||
#[tracing::instrument(level="trace", skip(self))]
|
||||
#[tracing::instrument(level = "trace", skip(self))]
|
||||
pub(super) async fn handle_uniform_consensus_committed(&mut self, index: u64) -> Result<(), RaftError> {
|
||||
// Step down if needed.
|
||||
if self.is_stepping_down {
|
||||
|
@ -214,7 +241,9 @@ impl<'a, D: AppData, R: AppDataResponse, N: RaftNetwork<D>, S: RaftStorage<D, R>
|
|||
// cluster members. All other replication streams which are no longer cluster members, but
|
||||
// which have not yet replicated this config will be marked for removal.
|
||||
let membership = &self.core.membership;
|
||||
let nodes_to_remove: Vec<_> = self.nodes.iter_mut()
|
||||
let nodes_to_remove: Vec<_> = self
|
||||
.nodes
|
||||
.iter_mut()
|
||||
.filter(|(id, _)| !membership.contains(id))
|
||||
.filter_map(|(idx, replstate)| {
|
||||
if replstate.match_index >= index {
|
||||
|
@ -223,9 +252,10 @@ impl<'a, D: AppData, R: AppDataResponse, N: RaftNetwork<D>, S: RaftStorage<D, R>
|
|||
replstate.remove_after_commit = Some(index);
|
||||
None
|
||||
}
|
||||
}).collect();
|
||||
})
|
||||
.collect();
|
||||
for node in nodes_to_remove {
|
||||
tracing::debug!({target=node}, "removing target node from replication pool");
|
||||
tracing::debug!({ target = node }, "removing target node from replication pool");
|
||||
if let Some(node) = self.nodes.remove(&node) {
|
||||
let _ = node.replstream.repltx.send(RaftEvent::Terminate);
|
||||
}
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
use crate::{AppData, AppDataResponse, RaftNetwork, RaftStorage};
|
||||
use crate::core::{RaftCore, State, UpdateCurrentLeader};
|
||||
use crate::error::RaftResult;
|
||||
use crate::raft::{AppendEntriesRequest, AppendEntriesResponse, ConflictOpt, Entry, EntryPayload};
|
||||
use crate::core::{RaftCore, State, UpdateCurrentLeader};
|
||||
use crate::{AppData, AppDataResponse, RaftNetwork, RaftStorage};
|
||||
|
||||
impl<D: AppData, R: AppDataResponse, N: RaftNetwork<D>, S: RaftStorage<D, R>> RaftCore<D, R, N, S> {
|
||||
/// An RPC invoked by the leader to replicate log entries (§5.3); also used as heartbeat (§5.2).
|
||||
|
@ -15,7 +15,11 @@ impl<D: AppData, R: AppDataResponse, N: RaftNetwork<D>, S: RaftStorage<D, R>> Ra
|
|||
// If message's term is less than most recent term, then we do not honor the request.
|
||||
if msg.term < self.current_term {
|
||||
tracing::trace!({self.current_term, rpc_term=msg.term}, "AppendEntries RPC term is less than current term");
|
||||
return Ok(AppendEntriesResponse{term: self.current_term, success: false, conflict_opt: None});
|
||||
return Ok(AppendEntriesResponse {
|
||||
term: self.current_term,
|
||||
success: false,
|
||||
conflict_opt: None,
|
||||
});
|
||||
}
|
||||
|
||||
// Update election timeout.
|
||||
|
@ -52,7 +56,11 @@ impl<D: AppData, R: AppDataResponse, N: RaftNetwork<D>, S: RaftStorage<D, R>> Ra
|
|||
if report_metrics {
|
||||
self.report_metrics();
|
||||
}
|
||||
return Ok(AppendEntriesResponse{term: self.current_term, success: true, conflict_opt: None});
|
||||
return Ok(AppendEntriesResponse {
|
||||
term: self.current_term,
|
||||
success: true,
|
||||
conflict_opt: None,
|
||||
});
|
||||
}
|
||||
|
||||
// Else, append log entries.
|
||||
|
@ -61,7 +69,11 @@ impl<D: AppData, R: AppDataResponse, N: RaftNetwork<D>, S: RaftStorage<D, R>> Ra
|
|||
if report_metrics {
|
||||
self.report_metrics();
|
||||
}
|
||||
return Ok(AppendEntriesResponse{term: self.current_term, success: true, conflict_opt: None});
|
||||
return Ok(AppendEntriesResponse {
|
||||
term: self.current_term,
|
||||
success: true,
|
||||
conflict_opt: None,
|
||||
});
|
||||
}
|
||||
|
||||
/////////////////////////////////////
|
||||
|
@ -69,7 +81,11 @@ impl<D: AppData, R: AppDataResponse, N: RaftNetwork<D>, S: RaftStorage<D, R>> Ra
|
|||
tracing::trace!("begin log consistency check");
|
||||
|
||||
// Previous log info doesn't immediately line up, so perform log consistency check and proceed based on its result.
|
||||
let entries = self.storage.get_log_entries(msg.prev_log_index, msg.prev_log_index).await.map_err(|err| self.map_fatal_storage_error(err))?;
|
||||
let entries = self
|
||||
.storage
|
||||
.get_log_entries(msg.prev_log_index, msg.prev_log_index + 1)
|
||||
.await
|
||||
.map_err(|err| self.map_fatal_storage_error(err))?;
|
||||
let target_entry = match entries.first() {
|
||||
Some(target_entry) => target_entry,
|
||||
// The target entry was not found. This can only mean that we don't have the
|
||||
|
@ -78,9 +94,13 @@ impl<D: AppData, R: AppDataResponse, N: RaftNetwork<D>, S: RaftStorage<D, R>> Ra
|
|||
if report_metrics {
|
||||
self.report_metrics();
|
||||
}
|
||||
return Ok(AppendEntriesResponse{
|
||||
term: self.current_term, success: false,
|
||||
conflict_opt: Some(ConflictOpt{term: self.last_log_term, index: self.last_log_index}),
|
||||
return Ok(AppendEntriesResponse {
|
||||
term: self.current_term,
|
||||
success: false,
|
||||
conflict_opt: Some(ConflictOpt {
|
||||
term: self.last_log_term,
|
||||
index: self.last_log_index,
|
||||
}),
|
||||
});
|
||||
}
|
||||
};
|
||||
|
@ -90,8 +110,15 @@ impl<D: AppData, R: AppDataResponse, N: RaftNetwork<D>, S: RaftStorage<D, R>> Ra
|
|||
// We've found a point of agreement with the leader. If we have any logs present
|
||||
// with an index greater than this, then we must delete them per §5.3.
|
||||
if self.last_log_index > target_entry.index {
|
||||
self.storage.delete_logs_from(target_entry.index + 1, None).await.map_err(|err| self.map_fatal_storage_error(err))?;
|
||||
let membership = self.storage.get_membership_config().await.map_err(|err| self.map_fatal_storage_error(err))?;
|
||||
self.storage
|
||||
.delete_logs_from(target_entry.index + 1, None)
|
||||
.await
|
||||
.map_err(|err| self.map_fatal_storage_error(err))?;
|
||||
let membership = self
|
||||
.storage
|
||||
.get_membership_config()
|
||||
.await
|
||||
.map_err(|err| self.map_fatal_storage_error(err))?;
|
||||
self.update_membership(membership)?;
|
||||
}
|
||||
}
|
||||
|
@ -99,15 +126,29 @@ impl<D: AppData, R: AppDataResponse, N: RaftNetwork<D>, S: RaftStorage<D, R>> Ra
|
|||
// entry of that payload which is still in the target term for conflict optimization.
|
||||
else {
|
||||
let start = if msg.prev_log_index >= 50 { msg.prev_log_index - 50 } else { 0 };
|
||||
let old_entries = self.storage.get_log_entries(start, msg.prev_log_index).await.map_err(|err| self.map_fatal_storage_error(err))?;
|
||||
let old_entries = self
|
||||
.storage
|
||||
.get_log_entries(start, msg.prev_log_index)
|
||||
.await
|
||||
.map_err(|err| self.map_fatal_storage_error(err))?;
|
||||
let opt = match old_entries.iter().find(|entry| entry.term == msg.prev_log_term) {
|
||||
Some(entry) => Some(ConflictOpt{term: entry.term, index: entry.index}),
|
||||
None => Some(ConflictOpt{term: self.last_log_term, index: self.last_log_index}),
|
||||
Some(entry) => Some(ConflictOpt {
|
||||
term: entry.term,
|
||||
index: entry.index,
|
||||
}),
|
||||
None => Some(ConflictOpt {
|
||||
term: self.last_log_term,
|
||||
index: self.last_log_index,
|
||||
}),
|
||||
};
|
||||
if report_metrics {
|
||||
self.report_metrics();
|
||||
}
|
||||
return Ok(AppendEntriesResponse{term: self.current_term, success: false, conflict_opt: opt});
|
||||
return Ok(AppendEntriesResponse {
|
||||
term: self.current_term,
|
||||
success: false,
|
||||
conflict_opt: opt,
|
||||
});
|
||||
}
|
||||
|
||||
///////////////////////////////////
|
||||
|
@ -119,17 +160,22 @@ impl<D: AppData, R: AppDataResponse, N: RaftNetwork<D>, S: RaftStorage<D, R>> Ra
|
|||
if report_metrics {
|
||||
self.report_metrics();
|
||||
}
|
||||
Ok(AppendEntriesResponse{term: self.current_term, success: true, conflict_opt: None})
|
||||
Ok(AppendEntriesResponse {
|
||||
term: self.current_term,
|
||||
success: true,
|
||||
conflict_opt: None,
|
||||
})
|
||||
}
|
||||
|
||||
/// Append the given entries to the log.
|
||||
///
|
||||
/// Configuration changes are also detected and applied here. See `configuration changes`
|
||||
/// in the raft-essentials.md in this repo.
|
||||
#[tracing::instrument(level="trace", skip(self, entries))]
|
||||
#[tracing::instrument(level = "trace", skip(self, entries))]
|
||||
async fn append_log_entries(&mut self, entries: &[Entry<D>]) -> RaftResult<()> {
|
||||
// Check the given entries for any config changes and take the most recent.
|
||||
let last_conf_change = entries.iter()
|
||||
let last_conf_change = entries
|
||||
.iter()
|
||||
.filter_map(|ent| match &ent.payload {
|
||||
EntryPayload::ConfigChange(conf) => Some(conf),
|
||||
_ => None,
|
||||
|
@ -141,7 +187,10 @@ impl<D: AppData, R: AppDataResponse, N: RaftNetwork<D>, S: RaftStorage<D, R>> Ra
|
|||
};
|
||||
|
||||
// Replicate entries to log (same as append, but in follower mode).
|
||||
self.storage.replicate_to_log(entries).await.map_err(|err| self.map_fatal_storage_error(err))?;
|
||||
self.storage
|
||||
.replicate_to_log(entries)
|
||||
.await
|
||||
.map_err(|err| self.map_fatal_storage_error(err))?;
|
||||
if let Some(entry) = entries.last() {
|
||||
self.last_log_index = entry.index;
|
||||
self.last_log_term = entry.term;
|
||||
|
@ -150,17 +199,22 @@ impl<D: AppData, R: AppDataResponse, N: RaftNetwork<D>, S: RaftStorage<D, R>> Ra
|
|||
}
|
||||
|
||||
/// Replicate outstanding logs to the state machine if needed.
|
||||
#[tracing::instrument(level="trace", skip(self, report_metrics))]
|
||||
#[tracing::instrument(level = "trace", skip(self, report_metrics))]
|
||||
async fn replicate_to_state_machine_if_needed(&mut self, report_metrics: &mut bool) -> RaftResult<()> {
|
||||
if self.commit_index > self.last_applied {
|
||||
// Fetch the series of entries which must be applied to the state machine, and apply them.
|
||||
let stop = std::cmp::min(self.commit_index, self.last_log_index) + 1;
|
||||
let entries = self.storage.get_log_entries(self.last_applied + 1, stop).await.map_err(|err| self.map_fatal_storage_error(err))?;
|
||||
let entries = self
|
||||
.storage
|
||||
.get_log_entries(self.last_applied + 1, stop)
|
||||
.await
|
||||
.map_err(|err| self.map_fatal_storage_error(err))?;
|
||||
if let Some(entry) = entries.last() {
|
||||
self.last_applied = entry.index;
|
||||
*report_metrics = true;
|
||||
}
|
||||
let data_entries: Vec<_> = entries.iter()
|
||||
let data_entries: Vec<_> = entries
|
||||
.iter()
|
||||
.filter_map(|entry| match &entry.payload {
|
||||
EntryPayload::Normal(inner) => Some((&entry.index, &inner.data)),
|
||||
_ => None,
|
||||
|
@ -169,7 +223,10 @@ impl<D: AppData, R: AppDataResponse, N: RaftNetwork<D>, S: RaftStorage<D, R>> Ra
|
|||
if data_entries.is_empty() {
|
||||
return Ok(());
|
||||
}
|
||||
self.storage.replicate_to_state_machine(&data_entries).await.map_err(|err| self.map_fatal_storage_error(err))?;
|
||||
self.storage
|
||||
.replicate_to_state_machine(&data_entries)
|
||||
.await
|
||||
.map_err(|err| self.map_fatal_storage_error(err))?;
|
||||
|
||||
// Request async compaction, if needed.
|
||||
self.trigger_log_compaction_if_needed();
|
||||
|
|
|
@ -1,18 +1,18 @@
|
|||
use std::sync::Arc;
|
||||
|
||||
use anyhow::anyhow;
|
||||
use futures::stream::FuturesUnordered;
|
||||
use futures::future::TryFutureExt;
|
||||
use futures::stream::FuturesUnordered;
|
||||
use tokio::stream::StreamExt;
|
||||
use tokio::sync::oneshot;
|
||||
use tokio::time::{Duration, timeout};
|
||||
use tokio::time::{timeout, Duration};
|
||||
|
||||
use crate::{AppData, AppDataResponse, RaftNetwork, RaftStorage};
|
||||
use crate::core::{LeaderState, State};
|
||||
use crate::error::{ClientReadError, ClientWriteError, RaftError, RaftResult};
|
||||
use crate::raft::{ClientWriteRequest, ClientWriteResponse, ClientReadResponseTx, ClientWriteResponseTx, Entry, EntryPayload};
|
||||
use crate::raft::{AppendEntriesRequest};
|
||||
use crate::raft::AppendEntriesRequest;
|
||||
use crate::raft::{ClientReadResponseTx, ClientWriteRequest, ClientWriteResponse, ClientWriteResponseTx, Entry, EntryPayload};
|
||||
use crate::replication::RaftEvent;
|
||||
use crate::{AppData, AppDataResponse, RaftNetwork, RaftStorage};
|
||||
|
||||
/// A wrapper around a ClientRequest which has been transformed into an Entry, along with its response channel.
|
||||
pub(super) struct ClientRequestEntry<D: AppData, R: AppDataResponse> {
|
||||
|
@ -28,7 +28,10 @@ pub(super) struct ClientRequestEntry<D: AppData, R: AppDataResponse> {
|
|||
impl<D: AppData, R: AppDataResponse> ClientRequestEntry<D, R> {
|
||||
/// Create a new instance from the raw components of a client request.
|
||||
pub(crate) fn from_entry<T: Into<ClientOrInternalResponseTx<D, R>>>(entry: Entry<D>, tx: T) -> Self {
|
||||
Self{entry: Arc::new(entry), tx: tx.into()}
|
||||
Self {
|
||||
entry: Arc::new(entry),
|
||||
tx: tx.into(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -41,7 +44,7 @@ pub enum ClientOrInternalResponseTx<D: AppData, R: AppDataResponse> {
|
|||
|
||||
impl<'a, D: AppData, R: AppDataResponse, N: RaftNetwork<D>, S: RaftStorage<D, R>> LeaderState<'a, D, R, N, S> {
|
||||
/// Commit the initial entry which new leaders are obligated to create when first coming to power, per §8.
|
||||
#[tracing::instrument(level="trace", skip(self))]
|
||||
#[tracing::instrument(level = "trace", skip(self))]
|
||||
pub(super) async fn commit_initial_leader_entry(&mut self) -> RaftResult<()> {
|
||||
// If the cluster has just formed, and the current index is 0, then commit the current
|
||||
// config, else a blank payload.
|
||||
|
@ -93,17 +96,21 @@ impl<'a, D: AppData, R: AppDataResponse, N: RaftNetwork<D>, S: RaftStorage<D, R>
|
|||
/// consensus. Each request will have a timeout, and we respond once we have a majority
|
||||
/// agreement from each config group. Most of the time, we will have a single uniform
|
||||
/// config group.
|
||||
#[tracing::instrument(level="trace", skip(self, tx))]
|
||||
#[tracing::instrument(level = "trace", skip(self, tx))]
|
||||
pub(super) async fn handle_client_read_request(&mut self, tx: ClientReadResponseTx) {
|
||||
// Setup sentinel values to track when we've received majority confirmation of leadership.
|
||||
let len_members = self.core.membership.members.len();
|
||||
let mut c0_confirmed = 0usize;
|
||||
let c0_needed: usize = if (len_members % 2) == 0 { (len_members/2)-1 } else { len_members/2 };
|
||||
let c0_needed: usize = if (len_members % 2) == 0 {
|
||||
(len_members / 2) - 1
|
||||
} else {
|
||||
len_members / 2
|
||||
};
|
||||
let mut c1_confirmed = 0usize;
|
||||
let mut c1_needed = 0usize;
|
||||
if let Some(joint_members) = &self.core.membership.members_after_consensus {
|
||||
let len = joint_members.len(); // Will never be zero, as we don't allow it when proposing config changes.
|
||||
c1_needed = if (len % 2) == 0 { (len/2)-1 } else { len/2 };
|
||||
c1_needed = if (len % 2) == 0 { (len / 2) - 1 } else { len / 2 };
|
||||
}
|
||||
|
||||
// As long as we are not about to step down, then increment for our vote.
|
||||
|
@ -111,7 +118,14 @@ impl<'a, D: AppData, R: AppDataResponse, N: RaftNetwork<D>, S: RaftStorage<D, R>
|
|||
if self.core.membership.members.contains(&self.core.id) {
|
||||
c0_confirmed += 1;
|
||||
}
|
||||
if self.core.membership.members_after_consensus.as_ref().map(|members| members.contains(&self.core.id)).unwrap_or(false) {
|
||||
if self
|
||||
.core
|
||||
.membership
|
||||
.members_after_consensus
|
||||
.as_ref()
|
||||
.map(|members| members.contains(&self.core.id))
|
||||
.unwrap_or(false)
|
||||
{
|
||||
c1_confirmed += 1;
|
||||
}
|
||||
}
|
||||
|
@ -119,7 +133,7 @@ impl<'a, D: AppData, R: AppDataResponse, N: RaftNetwork<D>, S: RaftStorage<D, R>
|
|||
// Spawn parallel requests, all with the standard timeout for heartbeats.
|
||||
let mut pending = FuturesUnordered::new();
|
||||
for (id, node) in self.nodes.iter() {
|
||||
let rpc = AppendEntriesRequest{
|
||||
let rpc = AppendEntriesRequest {
|
||||
term: self.core.current_term,
|
||||
leader_id: self.core.id,
|
||||
prev_log_index: node.match_index,
|
||||
|
@ -136,7 +150,8 @@ impl<'a, D: AppData, R: AppDataResponse, N: RaftNetwork<D>, S: RaftStorage<D, R>
|
|||
Ok(Err(err)) => Err((target, err)),
|
||||
Err(_timeout) => Err((target, anyhow!("timeout waiting for leadership confirmation"))),
|
||||
}
|
||||
}).map_err(move |err| (*id, err));
|
||||
})
|
||||
.map_err(move |err| (*id, err));
|
||||
pending.push(task);
|
||||
}
|
||||
|
||||
|
@ -149,7 +164,7 @@ impl<'a, D: AppData, R: AppDataResponse, N: RaftNetwork<D>, S: RaftStorage<D, R>
|
|||
continue;
|
||||
}
|
||||
Err((target, err)) => {
|
||||
tracing::error!({target}, "{}", err);
|
||||
tracing::error!({ target }, "{}", err);
|
||||
continue;
|
||||
}
|
||||
};
|
||||
|
@ -164,7 +179,14 @@ impl<'a, D: AppData, R: AppDataResponse, N: RaftNetwork<D>, S: RaftStorage<D, R>
|
|||
if self.core.membership.members.contains(&target) {
|
||||
c0_confirmed += 1;
|
||||
}
|
||||
if self.core.membership.members_after_consensus.as_ref().map(|members| members.contains(&target)).unwrap_or(false) {
|
||||
if self
|
||||
.core
|
||||
.membership
|
||||
.members_after_consensus
|
||||
.as_ref()
|
||||
.map(|members| members.contains(&target))
|
||||
.unwrap_or(false)
|
||||
{
|
||||
c1_confirmed += 1;
|
||||
}
|
||||
if c0_confirmed >= c0_needed && c1_confirmed >= c1_needed {
|
||||
|
@ -175,13 +197,13 @@ impl<'a, D: AppData, R: AppDataResponse, N: RaftNetwork<D>, S: RaftStorage<D, R>
|
|||
|
||||
// If we've hit this location, then we've failed to gather needed confirmations due to
|
||||
// request failures.
|
||||
let _ = tx.send(Err(ClientReadError::RaftError(
|
||||
RaftError::RaftNetwork(anyhow!("too many requests failed, could not confirm leadership"))
|
||||
)));
|
||||
let _ = tx.send(Err(ClientReadError::RaftError(RaftError::RaftNetwork(anyhow!(
|
||||
"too many requests failed, could not confirm leadership"
|
||||
)))));
|
||||
}
|
||||
|
||||
/// Handle client write requests.
|
||||
#[tracing::instrument(level="trace", skip(self, rpc, tx))]
|
||||
#[tracing::instrument(level = "trace", skip(self, rpc, tx))]
|
||||
pub(super) async fn handle_client_write_request(&mut self, rpc: ClientWriteRequest<D>, tx: ClientWriteResponseTx<D, R>) {
|
||||
let entry = match self.append_payload_to_log(rpc.entry).await {
|
||||
Ok(entry) => ClientRequestEntry::from_entry(entry, tx),
|
||||
|
@ -194,10 +216,18 @@ impl<'a, D: AppData, R: AppDataResponse, N: RaftNetwork<D>, S: RaftStorage<D, R>
|
|||
}
|
||||
|
||||
/// Transform the given payload into an entry, assign an index and term, and append the entry to the log.
|
||||
#[tracing::instrument(level="trace", skip(self, payload))]
|
||||
#[tracing::instrument(level = "trace", skip(self, payload))]
|
||||
pub(super) async fn append_payload_to_log(&mut self, payload: EntryPayload<D>) -> RaftResult<Entry<D>> {
|
||||
let entry = Entry{index: self.core.last_log_index + 1, term: self.core.current_term, payload};
|
||||
self.core.storage.append_entry_to_log(&entry).await.map_err(|err| self.core.map_fatal_storage_error(err))?;
|
||||
let entry = Entry {
|
||||
index: self.core.last_log_index + 1,
|
||||
term: self.core.current_term,
|
||||
payload,
|
||||
};
|
||||
self.core
|
||||
.storage
|
||||
.append_entry_to_log(&entry)
|
||||
.await
|
||||
.map_err(|err| self.core.map_fatal_storage_error(err))?;
|
||||
self.core.last_log_index = entry.index;
|
||||
Ok(entry)
|
||||
}
|
||||
|
@ -207,7 +237,7 @@ impl<'a, D: AppData, R: AppDataResponse, N: RaftNetwork<D>, S: RaftStorage<D, R>
|
|||
/// NOTE WELL: this routine does not wait for the request to actually finish replication, it
|
||||
/// merely beings the process. Once the request is committed to the cluster, its response will
|
||||
/// be generated asynchronously.
|
||||
#[tracing::instrument(level="trace", skip(self, req))]
|
||||
#[tracing::instrument(level = "trace", skip(self, req))]
|
||||
pub(super) async fn replicate_client_request(&mut self, req: ClientRequestEntry<D, R>) {
|
||||
// Replicate the request if there are other cluster members. The client response will be
|
||||
// returned elsewhere after the entry has been committed to the cluster.
|
||||
|
@ -215,7 +245,7 @@ impl<'a, D: AppData, R: AppDataResponse, N: RaftNetwork<D>, S: RaftStorage<D, R>
|
|||
if !self.nodes.is_empty() {
|
||||
self.awaiting_committed.push(req);
|
||||
for node in self.nodes.values() {
|
||||
let _ = node.replstream.repltx.send(RaftEvent::Replicate{
|
||||
let _ = node.replstream.repltx.send(RaftEvent::Replicate {
|
||||
entry: entry_arc.clone(),
|
||||
commit_index: self.core.commit_index,
|
||||
});
|
||||
|
@ -230,7 +260,7 @@ impl<'a, D: AppData, R: AppDataResponse, N: RaftNetwork<D>, S: RaftStorage<D, R>
|
|||
// Replicate to non-voters.
|
||||
if !self.non_voters.is_empty() {
|
||||
for node in self.non_voters.values() {
|
||||
let _ = node.state.replstream.repltx.send(RaftEvent::Replicate{
|
||||
let _ = node.state.replstream.repltx.send(RaftEvent::Replicate {
|
||||
entry: entry_arc.clone(),
|
||||
commit_index: self.core.commit_index,
|
||||
});
|
||||
|
@ -239,21 +269,22 @@ impl<'a, D: AppData, R: AppDataResponse, N: RaftNetwork<D>, S: RaftStorage<D, R>
|
|||
}
|
||||
|
||||
/// Handle the post-commit logic for a client request.
|
||||
#[tracing::instrument(level="trace", skip(self, req))]
|
||||
#[tracing::instrument(level = "trace", skip(self, req))]
|
||||
pub(super) async fn client_request_post_commit(&mut self, req: ClientRequestEntry<D, R>) {
|
||||
match req.tx {
|
||||
// If this is a client response channel, then it means that we are dealing with
|
||||
ClientOrInternalResponseTx::Client(tx) => match &req.entry.payload {
|
||||
EntryPayload::Normal(inner) => {
|
||||
match self.apply_entry_to_state_machine(&req.entry.index, &inner.data).await {
|
||||
Ok(data) => {
|
||||
let _ = tx.send(Ok(ClientWriteResponse{index: req.entry.index, data}));
|
||||
}
|
||||
Err(err) => {
|
||||
let _ = tx.send(Err(ClientWriteError::RaftError(err)));
|
||||
}
|
||||
EntryPayload::Normal(inner) => match self.apply_entry_to_state_machine(&req.entry.index, &inner.data).await {
|
||||
Ok(data) => {
|
||||
let _ = tx.send(Ok(ClientWriteResponse {
|
||||
index: req.entry.index,
|
||||
data,
|
||||
}));
|
||||
}
|
||||
}
|
||||
Err(err) => {
|
||||
let _ = tx.send(Err(ClientWriteError::RaftError(err)));
|
||||
}
|
||||
},
|
||||
_ => {
|
||||
// Why is this a bug, and why are we shutting down? This is because we can not easily
|
||||
// encode these constraints in the type system, and client requests should be the only
|
||||
|
@ -262,7 +293,7 @@ impl<'a, D: AppData, R: AppDataResponse, N: RaftNetwork<D>, S: RaftStorage<D, R>
|
|||
tracing::error!("critical error in Raft, this is a programming bug, please open an issue");
|
||||
self.core.set_target_state(State::Shutdown);
|
||||
}
|
||||
}
|
||||
},
|
||||
ClientOrInternalResponseTx::Internal(tx) => {
|
||||
self.core.last_applied = req.entry.index;
|
||||
self.core.report_metrics();
|
||||
|
@ -275,7 +306,7 @@ impl<'a, D: AppData, R: AppDataResponse, N: RaftNetwork<D>, S: RaftStorage<D, R>
|
|||
}
|
||||
|
||||
/// Apply the given log entry to the state machine.
|
||||
#[tracing::instrument(level="trace", skip(self, entry))]
|
||||
#[tracing::instrument(level = "trace", skip(self, entry))]
|
||||
pub(super) async fn apply_entry_to_state_machine(&mut self, index: &u64, entry: &D) -> RaftResult<R> {
|
||||
// First, we just ensure that we apply any outstanding up to, but not including, the index
|
||||
// of the given entry. We need to be able to return the data response from applying this
|
||||
|
@ -284,23 +315,38 @@ impl<'a, D: AppData, R: AppDataResponse, N: RaftNetwork<D>, S: RaftStorage<D, R>
|
|||
// Note that this would only ever happen if a node had unapplied logs from before becoming leader.
|
||||
let expected_next_index = self.core.last_applied + 1;
|
||||
if index != &expected_next_index {
|
||||
let entries = self.core.storage.get_log_entries(expected_next_index, *index).await.map_err(|err| self.core.map_fatal_storage_error(err))?;
|
||||
let entries = self
|
||||
.core
|
||||
.storage
|
||||
.get_log_entries(expected_next_index, *index)
|
||||
.await
|
||||
.map_err(|err| self.core.map_fatal_storage_error(err))?;
|
||||
if let Some(entry) = entries.last() {
|
||||
self.core.last_applied = entry.index;
|
||||
}
|
||||
let data_entries: Vec<_> = entries.iter()
|
||||
let data_entries: Vec<_> = entries
|
||||
.iter()
|
||||
.filter_map(|entry| match &entry.payload {
|
||||
EntryPayload::Normal(inner) => Some((&entry.index, &inner.data)),
|
||||
_ => None,
|
||||
})
|
||||
.collect();
|
||||
if !data_entries.is_empty() {
|
||||
self.core.storage.replicate_to_state_machine(&data_entries).await.map_err(|err| self.core.map_fatal_storage_error(err))?;
|
||||
self.core
|
||||
.storage
|
||||
.replicate_to_state_machine(&data_entries)
|
||||
.await
|
||||
.map_err(|err| self.core.map_fatal_storage_error(err))?;
|
||||
}
|
||||
}
|
||||
|
||||
// Apply this entry to the state machine and return its data response.
|
||||
let res = self.core.storage.apply_entry_to_state_machine(index, entry).await.map_err(|err| self.core.map_fatal_storage_error(err))?;
|
||||
let res = self
|
||||
.core
|
||||
.storage
|
||||
.apply_entry_to_state_machine(index, entry)
|
||||
.await
|
||||
.map_err(|err| self.core.map_fatal_storage_error(err))?;
|
||||
self.core.last_applied = *index;
|
||||
self.core.report_metrics();
|
||||
Ok(res)
|
||||
|
|
|
@ -2,10 +2,10 @@ use std::io::SeekFrom;
|
|||
|
||||
use tokio::io::{AsyncSeekExt, AsyncWriteExt};
|
||||
|
||||
use crate::{AppData, AppDataResponse, RaftNetwork, RaftStorage};
|
||||
use crate::core::{State, RaftCore, SnapshotState, UpdateCurrentLeader};
|
||||
use crate::core::{RaftCore, SnapshotState, State, UpdateCurrentLeader};
|
||||
use crate::error::RaftResult;
|
||||
use crate::raft::{InstallSnapshotRequest, InstallSnapshotResponse};
|
||||
use crate::{AppData, AppDataResponse, RaftNetwork, RaftStorage};
|
||||
|
||||
impl<D: AppData, R: AppDataResponse, N: RaftNetwork<D>, S: RaftStorage<D, R>> RaftCore<D, R, N, S> {
|
||||
/// Invoked by leader to send chunks of a snapshot to a follower (§7).
|
||||
|
@ -13,11 +13,11 @@ impl<D: AppData, R: AppDataResponse, N: RaftNetwork<D>, S: RaftStorage<D, R>> Ra
|
|||
/// Leaders always send chunks in order. It is important to note that, according to the Raft spec,
|
||||
/// a log may only have one snapshot at any time. As snapshot contents are application specific,
|
||||
/// the Raft log will only store a pointer to the snapshot file along with the index & term.
|
||||
#[tracing::instrument(level="trace", skip(self, req))]
|
||||
#[tracing::instrument(level = "trace", skip(self, req))]
|
||||
pub(super) async fn handle_install_snapshot_request(&mut self, req: InstallSnapshotRequest) -> RaftResult<InstallSnapshotResponse> {
|
||||
// If message's term is less than most recent term, then we do not honor the request.
|
||||
if req.term < self.current_term {
|
||||
return Ok(InstallSnapshotResponse{term: self.current_term});
|
||||
return Ok(InstallSnapshotResponse { term: self.current_term });
|
||||
}
|
||||
|
||||
// Update election timeout.
|
||||
|
@ -49,43 +49,43 @@ impl<D: AppData, R: AppDataResponse, N: RaftNetwork<D>, S: RaftStorage<D, R>> Ra
|
|||
// Compare current snapshot state with received RPC and handle as needed.
|
||||
match self.snapshot_state.take() {
|
||||
None => Ok(self.begin_installing_snapshot(req).await?),
|
||||
Some(SnapshotState::Snapshotting{handle, ..}) => {
|
||||
Some(SnapshotState::Snapshotting { handle, .. }) => {
|
||||
handle.abort(); // Abort the current compaction in favor of installation from leader.
|
||||
Ok(self.begin_installing_snapshot(req).await?)
|
||||
}
|
||||
Some(SnapshotState::Streaming{snapshot, id, offset}) => Ok(self.continue_installing_snapshot(req, offset, id, snapshot).await?),
|
||||
Some(SnapshotState::Streaming { snapshot, id, offset }) => Ok(self.continue_installing_snapshot(req, offset, id, snapshot).await?),
|
||||
}
|
||||
}
|
||||
|
||||
#[tracing::instrument(level="trace", skip(self, req))]
|
||||
#[tracing::instrument(level = "trace", skip(self, req))]
|
||||
async fn begin_installing_snapshot(&mut self, req: InstallSnapshotRequest) -> RaftResult<InstallSnapshotResponse> {
|
||||
// Create a new snapshot and begin writing its contents.
|
||||
let (id, mut snapshot) = self.storage.create_snapshot().await
|
||||
.map_err(|err| self.map_fatal_storage_error(err))?;
|
||||
let (id, mut snapshot) = self.storage.create_snapshot().await.map_err(|err| self.map_fatal_storage_error(err))?;
|
||||
snapshot.as_mut().write_all(&req.data).await?;
|
||||
|
||||
// If this was a small snapshot, and it is already done, then finish up.
|
||||
if req.done {
|
||||
self.finalize_snapshot_installation(req, id, snapshot).await?;
|
||||
return Ok(InstallSnapshotResponse{term: self.current_term});
|
||||
return Ok(InstallSnapshotResponse { term: self.current_term });
|
||||
}
|
||||
|
||||
// Else, retain snapshot components for later segments & respod.
|
||||
self.snapshot_state = Some(SnapshotState::Streaming{
|
||||
self.snapshot_state = Some(SnapshotState::Streaming {
|
||||
offset: req.data.len() as u64,
|
||||
id, snapshot,
|
||||
id,
|
||||
snapshot,
|
||||
});
|
||||
return Ok(InstallSnapshotResponse{term: self.current_term});
|
||||
return Ok(InstallSnapshotResponse { term: self.current_term });
|
||||
}
|
||||
|
||||
#[tracing::instrument(level="trace", skip(self, req, offset, snapshot))]
|
||||
#[tracing::instrument(level = "trace", skip(self, req, offset, snapshot))]
|
||||
async fn continue_installing_snapshot(
|
||||
&mut self, req: InstallSnapshotRequest, mut offset: u64, id: String, mut snapshot: Box<S::Snapshot>,
|
||||
) -> RaftResult<InstallSnapshotResponse> {
|
||||
// Always seek to the target offset if not an exact match.
|
||||
if req.offset != offset {
|
||||
if let Err(err) = snapshot.as_mut().seek(SeekFrom::Start(req.offset)).await {
|
||||
self.snapshot_state = Some(SnapshotState::Streaming{offset, id, snapshot});
|
||||
self.snapshot_state = Some(SnapshotState::Streaming { offset, id, snapshot });
|
||||
return Err(err.into());
|
||||
}
|
||||
offset = req.offset;
|
||||
|
@ -93,7 +93,7 @@ impl<D: AppData, R: AppDataResponse, N: RaftNetwork<D>, S: RaftStorage<D, R>> Ra
|
|||
|
||||
// Write the next segment & update offset.
|
||||
if let Err(err) = snapshot.as_mut().write_all(&req.data).await {
|
||||
self.snapshot_state = Some(SnapshotState::Streaming{offset, id, snapshot});
|
||||
self.snapshot_state = Some(SnapshotState::Streaming { offset, id, snapshot });
|
||||
return Err(err.into());
|
||||
}
|
||||
offset += req.data.len() as u64;
|
||||
|
@ -102,25 +102,35 @@ impl<D: AppData, R: AppDataResponse, N: RaftNetwork<D>, S: RaftStorage<D, R>> Ra
|
|||
if req.done {
|
||||
self.finalize_snapshot_installation(req, id, snapshot).await?;
|
||||
} else {
|
||||
self.snapshot_state = Some(SnapshotState::Streaming{offset, id, snapshot});
|
||||
self.snapshot_state = Some(SnapshotState::Streaming { offset, id, snapshot });
|
||||
}
|
||||
return Ok(InstallSnapshotResponse{term: self.current_term});
|
||||
return Ok(InstallSnapshotResponse { term: self.current_term });
|
||||
}
|
||||
|
||||
/// Finalize the installation of a new snapshot.
|
||||
///
|
||||
/// Any errors which come up from this routine will cause the Raft node to go into shutdown.
|
||||
#[tracing::instrument(level="trace", skip(self, req, snapshot))]
|
||||
#[tracing::instrument(level = "trace", skip(self, req, snapshot))]
|
||||
async fn finalize_snapshot_installation(&mut self, req: InstallSnapshotRequest, id: String, mut snapshot: Box<S::Snapshot>) -> RaftResult<()> {
|
||||
snapshot.as_mut().shutdown().await.map_err(|err| self.map_fatal_storage_error(err.into()))?;
|
||||
snapshot
|
||||
.as_mut()
|
||||
.shutdown()
|
||||
.await
|
||||
.map_err(|err| self.map_fatal_storage_error(err.into()))?;
|
||||
let delete_through = if self.last_log_index > req.last_included_index {
|
||||
Some(req.last_included_index)
|
||||
} else {
|
||||
None
|
||||
};
|
||||
self.storage.finalize_snapshot_installation(req.last_included_index, req.last_included_term, delete_through, id, snapshot).await
|
||||
self.storage
|
||||
.finalize_snapshot_installation(req.last_included_index, req.last_included_term, delete_through, id, snapshot)
|
||||
.await
|
||||
.map_err(|err| self.map_fatal_storage_error(err))?;
|
||||
let membership = self
|
||||
.storage
|
||||
.get_membership_config()
|
||||
.await
|
||||
.map_err(|err| self.map_fatal_storage_error(err))?;
|
||||
let membership = self.storage.get_membership_config().await.map_err(|err| self.map_fatal_storage_error(err))?;
|
||||
self.update_membership(membership)?;
|
||||
self.last_log_index = req.last_included_index;
|
||||
self.last_log_term = req.last_included_term;
|
||||
|
|
|
@ -8,25 +8,25 @@ pub(crate) mod replication;
|
|||
mod vote;
|
||||
|
||||
use std::collections::{BTreeMap, HashSet};
|
||||
use std::sync::Arc;
|
||||
use std::sync::atomic::{AtomicBool, Ordering};
|
||||
use std::sync::Arc;
|
||||
|
||||
use futures::future::{Abortable, AbortHandle};
|
||||
use futures::future::{AbortHandle, Abortable};
|
||||
use futures::stream::FuturesOrdered;
|
||||
use tokio::stream::StreamExt;
|
||||
use tokio::sync::{broadcast, mpsc, oneshot, watch};
|
||||
use tokio::task::JoinHandle;
|
||||
use tokio::time::{Instant, Duration, delay_until};
|
||||
use tokio::time::{delay_until, Duration, Instant};
|
||||
use tracing_futures::Instrument;
|
||||
|
||||
use crate::{AppData, AppDataResponse, RaftNetwork, RaftStorage, NodeId};
|
||||
use crate::config::{Config, SnapshotPolicy};
|
||||
use crate::core::client::ClientRequestEntry;
|
||||
use crate::error::{ClientReadError, ClientWriteError, ChangeConfigError, InitializeError, RaftError, RaftResult};
|
||||
use crate::error::{ChangeConfigError, ClientReadError, ClientWriteError, InitializeError, RaftError, RaftResult};
|
||||
use crate::metrics::RaftMetrics;
|
||||
use crate::raft::{ChangeMembershipTx, ClientWriteRequest, ClientReadResponseTx, ClientWriteResponseTx, RaftMsg, MembershipConfig};
|
||||
use crate::replication::{RaftEvent, ReplicationStream, ReplicaEvent};
|
||||
use crate::raft::{ChangeMembershipTx, ClientReadResponseTx, ClientWriteRequest, ClientWriteResponseTx, MembershipConfig, RaftMsg};
|
||||
use crate::replication::{RaftEvent, ReplicaEvent, ReplicationStream};
|
||||
use crate::storage::HardState;
|
||||
use crate::{AppData, AppDataResponse, NodeId, RaftNetwork, RaftStorage};
|
||||
|
||||
/// The core type implementing the Raft protocol.
|
||||
pub struct RaftCore<D: AppData, R: AppDataResponse, N: RaftNetwork<D>, S: RaftStorage<D, R>> {
|
||||
|
@ -104,21 +104,33 @@ pub struct RaftCore<D: AppData, R: AppDataResponse, N: RaftNetwork<D>, S: RaftSt
|
|||
|
||||
impl<D: AppData, R: AppDataResponse, N: RaftNetwork<D>, S: RaftStorage<D, R>> RaftCore<D, R, N, S> {
|
||||
pub(crate) fn spawn(
|
||||
id: NodeId, config: Arc<Config>, network: Arc<N>, storage: Arc<S>,
|
||||
rx_api: mpsc::UnboundedReceiver<RaftMsg<D, R>>,
|
||||
tx_metrics: watch::Sender<RaftMetrics>,
|
||||
needs_shutdown: Arc<AtomicBool>,
|
||||
id: NodeId, config: Arc<Config>, network: Arc<N>, storage: Arc<S>, rx_api: mpsc::UnboundedReceiver<RaftMsg<D, R>>,
|
||||
tx_metrics: watch::Sender<RaftMetrics>, needs_shutdown: Arc<AtomicBool>,
|
||||
) -> JoinHandle<RaftResult<()>> {
|
||||
let membership = MembershipConfig::new_initial(id); // This is updated from storage in the main loop.
|
||||
let (tx_compaction, rx_compaction) = mpsc::channel(1);
|
||||
let this = Self{
|
||||
id, config, membership, network, storage,
|
||||
let this = Self {
|
||||
id,
|
||||
config,
|
||||
membership,
|
||||
network,
|
||||
storage,
|
||||
target_state: State::Follower,
|
||||
commit_index: 0, last_applied: 0, current_term: 0, current_leader: None, voted_for: None,
|
||||
last_log_index: 0, last_log_term: 0,
|
||||
snapshot_state: None, snapshot_index: 0,
|
||||
last_heartbeat: None, next_election_timeout: None,
|
||||
tx_compaction, rx_compaction, rx_api, tx_metrics,
|
||||
commit_index: 0,
|
||||
last_applied: 0,
|
||||
current_term: 0,
|
||||
current_leader: None,
|
||||
voted_for: None,
|
||||
last_log_index: 0,
|
||||
last_log_term: 0,
|
||||
snapshot_state: None,
|
||||
snapshot_index: 0,
|
||||
last_heartbeat: None,
|
||||
next_election_timeout: None,
|
||||
tx_compaction,
|
||||
rx_compaction,
|
||||
rx_api,
|
||||
tx_metrics,
|
||||
needs_shutdown,
|
||||
};
|
||||
tokio::spawn(this.main())
|
||||
|
@ -141,7 +153,12 @@ impl<D: AppData, R: AppDataResponse, N: RaftNetwork<D>, S: RaftStorage<D, R>> Ra
|
|||
self.commit_index = 0;
|
||||
|
||||
// Fetch the most recent snapshot in the system.
|
||||
if let Some(snapshot) = self.storage.get_current_snapshot().await.map_err(|err| self.map_fatal_storage_error(err))? {
|
||||
if let Some(snapshot) = self
|
||||
.storage
|
||||
.get_current_snapshot()
|
||||
.await
|
||||
.map_err(|err| self.map_fatal_storage_error(err))?
|
||||
{
|
||||
self.snapshot_index = snapshot.index;
|
||||
}
|
||||
|
||||
|
@ -177,9 +194,9 @@ impl<D: AppData, R: AppDataResponse, N: RaftNetwork<D>, S: RaftStorage<D, R>> Ra
|
|||
}
|
||||
|
||||
/// Report a metrics payload on the current state of the Raft node.
|
||||
#[tracing::instrument(level="trace", skip(self))]
|
||||
#[tracing::instrument(level = "trace", skip(self))]
|
||||
fn report_metrics(&mut self) {
|
||||
let res = self.tx_metrics.broadcast(RaftMetrics{
|
||||
let res = self.tx_metrics.broadcast(RaftMetrics {
|
||||
id: self.id,
|
||||
state: self.target_state,
|
||||
current_term: self.current_term,
|
||||
|
@ -194,14 +211,17 @@ impl<D: AppData, R: AppDataResponse, N: RaftNetwork<D>, S: RaftStorage<D, R>> Ra
|
|||
}
|
||||
|
||||
/// Save the Raft node's current hard state to disk.
|
||||
#[tracing::instrument(level="trace", skip(self))]
|
||||
#[tracing::instrument(level = "trace", skip(self))]
|
||||
async fn save_hard_state(&mut self) -> RaftResult<()> {
|
||||
let hs = HardState{current_term: self.current_term, voted_for: self.voted_for};
|
||||
let hs = HardState {
|
||||
current_term: self.current_term,
|
||||
voted_for: self.voted_for,
|
||||
};
|
||||
Ok(self.storage.save_hard_state(&hs).await.map_err(|err| self.map_fatal_storage_error(err))?)
|
||||
}
|
||||
|
||||
/// Update core's target state, ensuring all invariants are upheld.
|
||||
#[tracing::instrument(level="trace", skip(self))]
|
||||
#[tracing::instrument(level = "trace", skip(self))]
|
||||
fn set_target_state(&mut self, target_state: State) {
|
||||
if target_state == State::Follower && !self.membership.contains(&self.id) {
|
||||
self.target_state = State::NonVoter;
|
||||
|
@ -210,7 +230,7 @@ impl<D: AppData, R: AppDataResponse, N: RaftNetwork<D>, S: RaftStorage<D, R>> Ra
|
|||
}
|
||||
|
||||
/// Get the next election timeout, generating a new value if not set.
|
||||
#[tracing::instrument(level="trace", skip(self))]
|
||||
#[tracing::instrument(level = "trace", skip(self))]
|
||||
fn get_next_election_timeout(&mut self) -> Instant {
|
||||
match self.next_election_timeout {
|
||||
Some(inst) => inst,
|
||||
|
@ -223,13 +243,13 @@ impl<D: AppData, R: AppDataResponse, N: RaftNetwork<D>, S: RaftStorage<D, R>> Ra
|
|||
}
|
||||
|
||||
/// Set a value for the next election timeout.
|
||||
#[tracing::instrument(level="trace", skip(self))]
|
||||
#[tracing::instrument(level = "trace", skip(self))]
|
||||
fn update_next_election_timeout(&mut self) {
|
||||
self.next_election_timeout = Some(Instant::now() + Duration::from_millis(self.config.new_rand_election_timeout()));
|
||||
}
|
||||
|
||||
/// Update the value of the `current_leader` property.
|
||||
#[tracing::instrument(level="trace", skip(self))]
|
||||
#[tracing::instrument(level = "trace", skip(self))]
|
||||
fn update_current_leader(&mut self, update: UpdateCurrentLeader) {
|
||||
match update {
|
||||
UpdateCurrentLeader::ThisNode => {
|
||||
|
@ -240,12 +260,12 @@ impl<D: AppData, R: AppDataResponse, N: RaftNetwork<D>, S: RaftStorage<D, R>> Ra
|
|||
}
|
||||
UpdateCurrentLeader::Unknown => {
|
||||
self.current_leader = None;
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Encapsulate the process of updating the current term, as updating the `voted_for` state must also be updated.
|
||||
#[tracing::instrument(level="trace", skip(self))]
|
||||
#[tracing::instrument(level = "trace", skip(self))]
|
||||
fn update_current_term(&mut self, new_term: u64, voted_for: Option<NodeId>) {
|
||||
if new_term > self.current_term {
|
||||
self.current_term = new_term;
|
||||
|
@ -258,7 +278,7 @@ impl<D: AppData, R: AppDataResponse, N: RaftNetwork<D>, S: RaftStorage<D, R>> Ra
|
|||
/// This method assumes that a storage error observed here is non-recoverable. As such, the
|
||||
/// Raft node will be instructed to stop. If such behavior is not needed, then don't use this
|
||||
/// interface.
|
||||
#[tracing::instrument(level="trace", skip(self))]
|
||||
#[tracing::instrument(level = "trace", skip(self))]
|
||||
fn map_fatal_storage_error(&mut self, err: anyhow::Error) -> RaftError {
|
||||
tracing::error!({error=%err, id=self.id}, "fatal storage error, shutting down");
|
||||
self.set_target_state(State::Shutdown);
|
||||
|
@ -266,7 +286,7 @@ impl<D: AppData, R: AppDataResponse, N: RaftNetwork<D>, S: RaftStorage<D, R>> Ra
|
|||
}
|
||||
|
||||
/// Update the node's current membership config & save hard state.
|
||||
#[tracing::instrument(level="trace", skip(self))]
|
||||
#[tracing::instrument(level = "trace", skip(self))]
|
||||
fn update_membership(&mut self, cfg: MembershipConfig) -> RaftResult<()> {
|
||||
// If the given config does not contain this node's ID, it means one of the following:
|
||||
//
|
||||
|
@ -287,19 +307,19 @@ impl<D: AppData, R: AppDataResponse, N: RaftNetwork<D>, S: RaftStorage<D, R>> Ra
|
|||
}
|
||||
|
||||
/// Update the system's snapshot state based on the given data.
|
||||
#[tracing::instrument(level="trace", skip(self))]
|
||||
#[tracing::instrument(level = "trace", skip(self))]
|
||||
fn update_snapshot_state(&mut self, update: SnapshotUpdate) {
|
||||
if let SnapshotUpdate::SnapshotComplete(index) = update {
|
||||
self.snapshot_index = index
|
||||
}
|
||||
// If snapshot state is anything other than streaming, then drop it.
|
||||
if let Some(state @ SnapshotState::Streaming{..}) = self.snapshot_state.take() {
|
||||
if let Some(state @ SnapshotState::Streaming { .. }) = self.snapshot_state.take() {
|
||||
self.snapshot_state = Some(state)
|
||||
}
|
||||
}
|
||||
|
||||
/// Trigger a log compaction (snapshot) job if needed.
|
||||
#[tracing::instrument(level="trace", skip(self))]
|
||||
#[tracing::instrument(level = "trace", skip(self))]
|
||||
pub(self) fn trigger_log_compaction_if_needed(&mut self) {
|
||||
if self.snapshot_state.is_some() {
|
||||
return;
|
||||
|
@ -320,47 +340,54 @@ impl<D: AppData, R: AppDataResponse, N: RaftNetwork<D>, S: RaftStorage<D, R>> Ra
|
|||
let (handle, reg) = AbortHandle::new_pair();
|
||||
let (chan_tx, _) = broadcast::channel(1);
|
||||
let mut tx_compaction = self.tx_compaction.clone();
|
||||
self.snapshot_state = Some(SnapshotState::Snapshotting{through: through_index, handle, sender: chan_tx.clone()});
|
||||
tokio::spawn(async move {
|
||||
let res = Abortable::new(storage.do_log_compaction(through_index), reg).await;
|
||||
match res {
|
||||
Ok(res) => match res {
|
||||
Ok(snapshot) => {
|
||||
let _ = tx_compaction.try_send(SnapshotUpdate::SnapshotComplete(snapshot.index));
|
||||
let _ = chan_tx.send(snapshot.index); // This will always succeed.
|
||||
}
|
||||
Err(err) => {
|
||||
tracing::error!({error=%err}, "error while generating snapshot");
|
||||
self.snapshot_state = Some(SnapshotState::Snapshotting {
|
||||
through: through_index,
|
||||
handle,
|
||||
sender: chan_tx.clone(),
|
||||
});
|
||||
tokio::spawn(
|
||||
async move {
|
||||
let res = Abortable::new(storage.do_log_compaction(through_index), reg).await;
|
||||
match res {
|
||||
Ok(res) => match res {
|
||||
Ok(snapshot) => {
|
||||
let _ = tx_compaction.try_send(SnapshotUpdate::SnapshotComplete(snapshot.index));
|
||||
let _ = chan_tx.send(snapshot.index); // This will always succeed.
|
||||
}
|
||||
Err(err) => {
|
||||
tracing::error!({error=%err}, "error while generating snapshot");
|
||||
let _ = tx_compaction.try_send(SnapshotUpdate::SnapshotFailed);
|
||||
}
|
||||
},
|
||||
Err(_aborted) => {
|
||||
let _ = tx_compaction.try_send(SnapshotUpdate::SnapshotFailed);
|
||||
}
|
||||
},
|
||||
Err(_aborted) => {
|
||||
let _ = tx_compaction.try_send(SnapshotUpdate::SnapshotFailed);
|
||||
}
|
||||
}
|
||||
}.instrument(tracing::debug_span!("beginning new log compaction process")));
|
||||
.instrument(tracing::debug_span!("beginning new log compaction process")),
|
||||
);
|
||||
}
|
||||
|
||||
/// Reject an init config request due to the Raft node being in a state which prohibits the request.
|
||||
#[tracing::instrument(level="trace", skip(self, tx))]
|
||||
#[tracing::instrument(level = "trace", skip(self, tx))]
|
||||
fn reject_init_with_config(&self, tx: oneshot::Sender<Result<(), InitializeError>>) {
|
||||
let _ = tx.send(Err(InitializeError::NotAllowed));
|
||||
}
|
||||
|
||||
/// Reject a proposed config change request due to the Raft node being in a state which prohibits the request.
|
||||
#[tracing::instrument(level="trace", skip(self, tx))]
|
||||
#[tracing::instrument(level = "trace", skip(self, tx))]
|
||||
fn reject_config_change_not_leader(&self, tx: oneshot::Sender<Result<(), ChangeConfigError>>) {
|
||||
let _ = tx.send(Err(ChangeConfigError::NodeNotLeader(self.current_leader)));
|
||||
}
|
||||
|
||||
/// Forward the given client write request to the leader.
|
||||
#[tracing::instrument(level="trace", skip(self, req, tx))]
|
||||
#[tracing::instrument(level = "trace", skip(self, req, tx))]
|
||||
fn forward_client_write_request(&self, req: ClientWriteRequest<D>, tx: ClientWriteResponseTx<D, R>) {
|
||||
let _ = tx.send(Err(ClientWriteError::ForwardToLeader(req, self.current_leader)));
|
||||
}
|
||||
|
||||
/// Forward the given client read request to the leader.
|
||||
#[tracing::instrument(level="trace", skip(self, tx))]
|
||||
#[tracing::instrument(level = "trace", skip(self, tx))]
|
||||
fn forward_client_read_request(&self, tx: ClientReadResponseTx) {
|
||||
let _ = tx.send(Err(ClientReadError::ForwardToLeader(self.current_leader)));
|
||||
}
|
||||
|
@ -426,22 +453,38 @@ pub enum State {
|
|||
impl State {
|
||||
/// Check if currently in non-voter state.
|
||||
pub fn is_non_voter(&self) -> bool {
|
||||
if let Self::NonVoter = self { true } else { false }
|
||||
if let Self::NonVoter = self {
|
||||
true
|
||||
} else {
|
||||
false
|
||||
}
|
||||
}
|
||||
|
||||
/// Check if currently in follower state.
|
||||
pub fn is_follower(&self) -> bool {
|
||||
if let Self::Follower = self { true } else { false }
|
||||
if let Self::Follower = self {
|
||||
true
|
||||
} else {
|
||||
false
|
||||
}
|
||||
}
|
||||
|
||||
/// Check if currently in candidate state.
|
||||
pub fn is_candidate(&self) -> bool {
|
||||
if let Self::Candidate = self { true } else { false }
|
||||
if let Self::Candidate = self {
|
||||
true
|
||||
} else {
|
||||
false
|
||||
}
|
||||
}
|
||||
|
||||
/// Check if currently in leader state.
|
||||
pub fn is_leader(&self) -> bool {
|
||||
if let Self::Leader = self { true } else { false }
|
||||
if let Self::Leader = self {
|
||||
true
|
||||
} else {
|
||||
false
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -479,15 +522,22 @@ impl<'a, D: AppData, R: AppDataResponse, N: RaftNetwork<D>, S: RaftStorage<D, R>
|
|||
/// Create a new instance.
|
||||
pub(self) fn new(core: &'a mut RaftCore<D, R, N, S>) -> Self {
|
||||
let consensus_state = if core.membership.is_in_joint_consensus() {
|
||||
ConsensusState::Joint{is_committed: false}
|
||||
ConsensusState::Joint { is_committed: false }
|
||||
} else {
|
||||
ConsensusState::Uniform
|
||||
};
|
||||
let (replicationtx, replicationrx) = mpsc::unbounded_channel();
|
||||
Self{
|
||||
core, nodes: BTreeMap::new(), non_voters: BTreeMap::new(), is_stepping_down: false,
|
||||
replicationtx, replicationrx, consensus_state, awaiting_committed: Vec::new(),
|
||||
propose_config_change_cb: None, joint_consensus_cb: FuturesOrdered::new(),
|
||||
Self {
|
||||
core,
|
||||
nodes: BTreeMap::new(),
|
||||
non_voters: BTreeMap::new(),
|
||||
is_stepping_down: false,
|
||||
replicationtx,
|
||||
replicationrx,
|
||||
consensus_state,
|
||||
awaiting_committed: Vec::new(),
|
||||
propose_config_change_cb: None,
|
||||
joint_consensus_cb: FuturesOrdered::new(),
|
||||
uniform_consensus_cb: FuturesOrdered::new(),
|
||||
}
|
||||
}
|
||||
|
@ -496,7 +546,11 @@ impl<'a, D: AppData, R: AppDataResponse, N: RaftNetwork<D>, S: RaftStorage<D, R>
|
|||
#[tracing::instrument(level="trace", skip(self), fields(id=self.core.id, raft_state="leader"))]
|
||||
pub(self) async fn run(mut self) -> RaftResult<()> {
|
||||
// Spawn replication streams.
|
||||
let targets = self.core.membership.all_nodes().into_iter()
|
||||
let targets = self
|
||||
.core
|
||||
.membership
|
||||
.all_nodes()
|
||||
.into_iter()
|
||||
.filter(|elem| elem != &self.core.id)
|
||||
.collect::<Vec<_>>();
|
||||
for target in targets {
|
||||
|
@ -523,7 +577,7 @@ impl<'a, D: AppData, R: AppDataResponse, N: RaftNetwork<D>, S: RaftStorage<D, R>
|
|||
}
|
||||
return Ok(());
|
||||
}
|
||||
tokio::select!{
|
||||
tokio::select! {
|
||||
Some(msg) = self.core.rx_api.next() => match msg {
|
||||
RaftMsg::AppendEntries{rpc, tx} => {
|
||||
let _ = tx.send(self.core.handle_append_entries_request(rpc).await);
|
||||
|
@ -629,7 +683,7 @@ impl ConsensusState {
|
|||
/// 2. the corresponding config for this consensus state has been committed to the cluster.
|
||||
pub fn is_joint_consensus_safe_to_finalize(&self) -> bool {
|
||||
match self {
|
||||
ConsensusState::Joint{is_committed} => *is_committed,
|
||||
ConsensusState::Joint { is_committed } => *is_committed,
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
|
@ -653,7 +707,13 @@ struct CandidateState<'a, D: AppData, R: AppDataResponse, N: RaftNetwork<D>, S:
|
|||
|
||||
impl<'a, D: AppData, R: AppDataResponse, N: RaftNetwork<D>, S: RaftStorage<D, R>> CandidateState<'a, D, R, N, S> {
|
||||
pub(self) fn new(core: &'a mut RaftCore<D, R, N, S>) -> Self {
|
||||
Self{core, votes_granted_old: 0, votes_needed_old: 0, votes_granted_new: 0, votes_needed_new: 0}
|
||||
Self {
|
||||
core,
|
||||
votes_granted_old: 0,
|
||||
votes_needed_old: 0,
|
||||
votes_granted_new: 0,
|
||||
votes_needed_new: 0,
|
||||
}
|
||||
}
|
||||
|
||||
/// Run the candidate loop.
|
||||
|
@ -687,7 +747,7 @@ impl<'a, D: AppData, R: AppDataResponse, N: RaftNetwork<D>, S: RaftStorage<D, R>
|
|||
}
|
||||
|
||||
let mut timeout_fut = delay_until(self.core.get_next_election_timeout());
|
||||
tokio::select!{
|
||||
tokio::select! {
|
||||
_ = &mut timeout_fut => break, // This election has timed-out. Break to outer loop, which starts a new term.
|
||||
Some((res, peer)) = pending_votes.recv() => self.handle_vote_response(res, peer).await?,
|
||||
Some(msg) = self.core.rx_api.next() => match msg {
|
||||
|
@ -733,7 +793,7 @@ pub struct FollowerState<'a, D: AppData, R: AppDataResponse, N: RaftNetwork<D>,
|
|||
|
||||
impl<'a, D: AppData, R: AppDataResponse, N: RaftNetwork<D>, S: RaftStorage<D, R>> FollowerState<'a, D, R, N, S> {
|
||||
pub(self) fn new(core: &'a mut RaftCore<D, R, N, S>) -> Self {
|
||||
Self{core}
|
||||
Self { core }
|
||||
}
|
||||
|
||||
/// Run the follower loop.
|
||||
|
@ -746,7 +806,7 @@ impl<'a, D: AppData, R: AppDataResponse, N: RaftNetwork<D>, S: RaftStorage<D, R>
|
|||
}
|
||||
|
||||
let mut election_timeout = delay_until(self.core.get_next_election_timeout()); // Value is updated as heartbeats are received.
|
||||
tokio::select!{
|
||||
tokio::select! {
|
||||
// If an election timeout is hit, then we need to transition to candidate.
|
||||
_ = &mut election_timeout => self.core.set_target_state(State::Candidate),
|
||||
Some(msg) = self.core.rx_api.next() => match msg {
|
||||
|
@ -791,7 +851,7 @@ pub struct NonVoterState<'a, D: AppData, R: AppDataResponse, N: RaftNetwork<D>,
|
|||
|
||||
impl<'a, D: AppData, R: AppDataResponse, N: RaftNetwork<D>, S: RaftStorage<D, R>> NonVoterState<'a, D, R, N, S> {
|
||||
pub(self) fn new(core: &'a mut RaftCore<D, R, N, S>) -> Self {
|
||||
Self{core}
|
||||
Self { core }
|
||||
}
|
||||
|
||||
/// Run the non-voter loop.
|
||||
|
@ -802,7 +862,7 @@ impl<'a, D: AppData, R: AppDataResponse, N: RaftNetwork<D>, S: RaftStorage<D, R>
|
|||
if !self.core.target_state.is_non_voter() || self.core.needs_shutdown.load(Ordering::SeqCst) {
|
||||
return Ok(());
|
||||
}
|
||||
tokio::select!{
|
||||
tokio::select! {
|
||||
Some(msg) = self.core.rx_api.next() => match msg {
|
||||
RaftMsg::AppendEntries{rpc, tx} => {
|
||||
let _ = tx.send(self.core.handle_append_entries_request(rpc).await);
|
||||
|
|
|
@ -1,22 +1,29 @@
|
|||
use tokio::sync::oneshot;
|
||||
|
||||
use crate::{AppData, AppDataResponse, NodeId, RaftNetwork, RaftStorage};
|
||||
use crate::config::SnapshotPolicy;
|
||||
use crate::error::RaftResult;
|
||||
use crate::core::{ConsensusState, LeaderState, ReplicationState, SnapshotState, State, UpdateCurrentLeader};
|
||||
use crate::error::RaftResult;
|
||||
use crate::replication::{RaftEvent, ReplicaEvent, ReplicationStream};
|
||||
use crate::storage::CurrentSnapshotData;
|
||||
use crate::{AppData, AppDataResponse, NodeId, RaftNetwork, RaftStorage};
|
||||
|
||||
impl<'a, D: AppData, R: AppDataResponse, N: RaftNetwork<D>, S: RaftStorage<D, R>> LeaderState<'a, D, R, N, S> {
|
||||
/// Spawn a new replication stream returning its replication state handle.
|
||||
#[tracing::instrument(level="trace", skip(self))]
|
||||
#[tracing::instrument(level = "trace", skip(self))]
|
||||
pub(super) fn spawn_replication_stream(&self, target: NodeId) -> ReplicationState<D> {
|
||||
let replstream = ReplicationStream::new(
|
||||
self.core.id, target, self.core.current_term, self.core.config.clone(),
|
||||
self.core.last_log_index, self.core.last_log_term, self.core.commit_index,
|
||||
self.core.network.clone(), self.core.storage.clone(), self.replicationtx.clone(),
|
||||
self.core.id,
|
||||
target,
|
||||
self.core.current_term,
|
||||
self.core.config.clone(),
|
||||
self.core.last_log_index,
|
||||
self.core.last_log_term,
|
||||
self.core.commit_index,
|
||||
self.core.network.clone(),
|
||||
self.core.storage.clone(),
|
||||
self.replicationtx.clone(),
|
||||
);
|
||||
ReplicationState{
|
||||
ReplicationState {
|
||||
match_index: self.core.last_log_index,
|
||||
match_term: self.core.current_term,
|
||||
is_at_line_rate: false,
|
||||
|
@ -26,13 +33,17 @@ impl<'a, D: AppData, R: AppDataResponse, N: RaftNetwork<D>, S: RaftStorage<D, R>
|
|||
}
|
||||
|
||||
/// Handle a replication event coming from one of the replication streams.
|
||||
#[tracing::instrument(level="trace", skip(self, event))]
|
||||
#[tracing::instrument(level = "trace", skip(self, event))]
|
||||
pub(super) async fn handle_replica_event(&mut self, event: ReplicaEvent<S::Snapshot>) {
|
||||
let res = match event {
|
||||
ReplicaEvent::RateUpdate{target, is_line_rate} => self.handle_rate_update(target, is_line_rate).await,
|
||||
ReplicaEvent::RevertToFollower{target, term} => self.handle_revert_to_follower(target, term).await,
|
||||
ReplicaEvent::UpdateMatchIndex{target, match_index, match_term} => self.handle_update_match_index(target, match_index, match_term).await,
|
||||
ReplicaEvent::NeedsSnapshot{target, tx} => self.handle_needs_snapshot(target, tx).await,
|
||||
ReplicaEvent::RateUpdate { target, is_line_rate } => self.handle_rate_update(target, is_line_rate).await,
|
||||
ReplicaEvent::RevertToFollower { target, term } => self.handle_revert_to_follower(target, term).await,
|
||||
ReplicaEvent::UpdateMatchIndex {
|
||||
target,
|
||||
match_index,
|
||||
match_term,
|
||||
} => self.handle_update_match_index(target, match_index, match_term).await,
|
||||
ReplicaEvent::NeedsSnapshot { target, tx } => self.handle_needs_snapshot(target, tx).await,
|
||||
ReplicaEvent::Shutdown => {
|
||||
self.core.set_target_state(State::Shutdown);
|
||||
return;
|
||||
|
@ -44,7 +55,7 @@ impl<'a, D: AppData, R: AppDataResponse, N: RaftNetwork<D>, S: RaftStorage<D, R>
|
|||
}
|
||||
|
||||
/// Handle events from replication streams updating their replication rate tracker.
|
||||
#[tracing::instrument(level="trace", skip(self, target, is_line_rate))]
|
||||
#[tracing::instrument(level = "trace", skip(self, target, is_line_rate))]
|
||||
async fn handle_rate_update(&mut self, target: NodeId, is_line_rate: bool) -> RaftResult<()> {
|
||||
// Get a handle the target's replication stat & update it as needed.
|
||||
if let Some(state) = self.nodes.get_mut(&target) {
|
||||
|
@ -62,7 +73,7 @@ impl<'a, D: AppData, R: AppDataResponse, N: RaftNetwork<D>, S: RaftStorage<D, R>
|
|||
}
|
||||
// If we are in NonVoterSync state, and this is one of the nodes being awaiting, then update.
|
||||
match std::mem::replace(&mut self.consensus_state, ConsensusState::Uniform) {
|
||||
ConsensusState::NonVoterSync{mut awaiting, members, tx} => {
|
||||
ConsensusState::NonVoterSync { mut awaiting, members, tx } => {
|
||||
awaiting.remove(&target);
|
||||
if awaiting.is_empty() {
|
||||
// We are ready to move forward with entering joint consensus.
|
||||
|
@ -70,7 +81,7 @@ impl<'a, D: AppData, R: AppDataResponse, N: RaftNetwork<D>, S: RaftStorage<D, R>
|
|||
self.change_membership(members, tx).await;
|
||||
} else {
|
||||
// We are still awaiting additional nodes, so replace our original state.
|
||||
self.consensus_state = ConsensusState::NonVoterSync{awaiting, members, tx};
|
||||
self.consensus_state = ConsensusState::NonVoterSync { awaiting, members, tx };
|
||||
}
|
||||
}
|
||||
other => self.consensus_state = other, // Set the original value back to what it was.
|
||||
|
@ -81,7 +92,7 @@ impl<'a, D: AppData, R: AppDataResponse, N: RaftNetwork<D>, S: RaftStorage<D, R>
|
|||
}
|
||||
|
||||
/// Handle events from replication streams for when this node needs to revert to follower state.
|
||||
#[tracing::instrument(level="trace", skip(self, term))]
|
||||
#[tracing::instrument(level = "trace", skip(self, term))]
|
||||
async fn handle_revert_to_follower(&mut self, _: NodeId, term: u64) -> RaftResult<()> {
|
||||
if term > self.core.current_term {
|
||||
self.core.update_current_term(term, None);
|
||||
|
@ -93,7 +104,7 @@ impl<'a, D: AppData, R: AppDataResponse, N: RaftNetwork<D>, S: RaftStorage<D, R>
|
|||
}
|
||||
|
||||
/// Handle events from a replication stream which updates the target node's match index.
|
||||
#[tracing::instrument(level="trace", skip(self, target, match_index))]
|
||||
#[tracing::instrument(level = "trace", skip(self, target, match_index))]
|
||||
async fn handle_update_match_index(&mut self, target: NodeId, match_index: u64, match_term: u64) -> RaftResult<()> {
|
||||
// If this is a non-voter, then update and return.
|
||||
if let Some(state) = self.non_voters.get_mut(&target) {
|
||||
|
@ -113,7 +124,7 @@ impl<'a, D: AppData, R: AppDataResponse, N: RaftNetwork<D>, S: RaftStorage<D, R>
|
|||
needs_removal = true;
|
||||
}
|
||||
}
|
||||
},
|
||||
}
|
||||
_ => return Ok(()), // Node not found.
|
||||
}
|
||||
|
||||
|
@ -125,7 +136,9 @@ impl<'a, D: AppData, R: AppDataResponse, N: RaftNetwork<D>, S: RaftStorage<D, R>
|
|||
}
|
||||
|
||||
// Determine the new commit index of the current membership config nodes.
|
||||
let mut indices_c0 = self.nodes.iter()
|
||||
let mut indices_c0 = self
|
||||
.nodes
|
||||
.iter()
|
||||
.filter(|(id, _)| self.core.membership.members.contains(id))
|
||||
.map(|(_, node)| node.match_index)
|
||||
.collect::<Vec<_>>();
|
||||
|
@ -137,7 +150,9 @@ impl<'a, D: AppData, R: AppDataResponse, N: RaftNetwork<D>, S: RaftStorage<D, R>
|
|||
// If we are in joint consensus, then calculate the new commit index of the new membership config nodes.
|
||||
let mut commit_index_c1 = commit_index_c0; // Defaults to just matching C0.
|
||||
if let Some(members) = &self.core.membership.members_after_consensus {
|
||||
let indices_c1 = self.nodes.iter()
|
||||
let indices_c1 = self
|
||||
.nodes
|
||||
.iter()
|
||||
.filter(|(id, _)| members.contains(id))
|
||||
.map(|(_, node)| node.match_index)
|
||||
.collect();
|
||||
|
@ -152,14 +167,21 @@ impl<'a, D: AppData, R: AppDataResponse, N: RaftNetwork<D>, S: RaftStorage<D, R>
|
|||
|
||||
// Update all replication streams based on new commit index.
|
||||
for node in self.nodes.values() {
|
||||
let _ = node.replstream.repltx.send(RaftEvent::UpdateCommitIndex{commit_index: self.core.commit_index});
|
||||
let _ = node.replstream.repltx.send(RaftEvent::UpdateCommitIndex {
|
||||
commit_index: self.core.commit_index,
|
||||
});
|
||||
}
|
||||
for node in self.non_voters.values() {
|
||||
let _ = node.state.replstream.repltx.send(RaftEvent::UpdateCommitIndex{commit_index: self.core.commit_index});
|
||||
let _ = node.state.replstream.repltx.send(RaftEvent::UpdateCommitIndex {
|
||||
commit_index: self.core.commit_index,
|
||||
});
|
||||
}
|
||||
|
||||
// Check if there are any pending requests which need to be processed.
|
||||
let filter = self.awaiting_committed.iter().enumerate()
|
||||
let filter = self
|
||||
.awaiting_committed
|
||||
.iter()
|
||||
.enumerate()
|
||||
.take_while(|(_idx, elem)| elem.entry.index <= self.core.commit_index)
|
||||
.last()
|
||||
.map(|(idx, _)| idx);
|
||||
|
@ -175,7 +197,7 @@ impl<'a, D: AppData, R: AppDataResponse, N: RaftNetwork<D>, S: RaftStorage<D, R>
|
|||
}
|
||||
|
||||
/// Handle events from replication streams requesting for snapshot info.
|
||||
#[tracing::instrument(level="trace", skip(self, tx))]
|
||||
#[tracing::instrument(level = "trace", skip(self, tx))]
|
||||
async fn handle_needs_snapshot(&mut self, _: NodeId, tx: oneshot::Sender<CurrentSnapshotData<S::Snapshot>>) -> RaftResult<()> {
|
||||
// Ensure snapshotting is configured, else do nothing.
|
||||
let threshold = match &self.core.config.snapshot_policy {
|
||||
|
@ -183,7 +205,11 @@ impl<'a, D: AppData, R: AppDataResponse, N: RaftNetwork<D>, S: RaftStorage<D, R>
|
|||
};
|
||||
|
||||
// Check for existence of current snapshot.
|
||||
let current_snapshot_opt = self.core.storage.get_current_snapshot().await
|
||||
let current_snapshot_opt = self
|
||||
.core
|
||||
.storage
|
||||
.get_current_snapshot()
|
||||
.await
|
||||
.map_err(|err| self.core.map_fatal_storage_error(err))?;
|
||||
if let Some(snapshot) = current_snapshot_opt {
|
||||
// If snapshot exists, ensure its distance from the leader's last log index is <= half
|
||||
|
@ -198,13 +224,13 @@ impl<'a, D: AppData, R: AppDataResponse, N: RaftNetwork<D>, S: RaftStorage<D, R>
|
|||
// completion (or cancellation), and respond to the replication stream. The repl stream
|
||||
// will wait for the completion and will then send anothe request to fetch the finished snapshot.
|
||||
// Else we just drop any other state and continue. Leaders never enter `Streaming` state.
|
||||
if let Some(SnapshotState::Snapshotting{through, handle, sender}) = self.core.snapshot_state.take() {
|
||||
if let Some(SnapshotState::Snapshotting { through, handle, sender }) = self.core.snapshot_state.take() {
|
||||
let mut chan = sender.subscribe();
|
||||
tokio::spawn(async move {
|
||||
let _ = chan.recv().await;
|
||||
drop(tx);
|
||||
});
|
||||
self.core.snapshot_state = Some(SnapshotState::Snapshotting{through, handle, sender});
|
||||
self.core.snapshot_state = Some(SnapshotState::Snapshotting { through, handle, sender });
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
|
@ -240,7 +266,7 @@ fn calculate_new_commit_index(mut entries: Vec<u64>, current_commit: u64) -> u64
|
|||
|
||||
// Calculate offset which will give the majority slice of high-end.
|
||||
entries.sort();
|
||||
let offset = if (len % 2) == 0 { (len/2)-1 } else { len/2 };
|
||||
let offset = if (len % 2) == 0 { (len / 2) - 1 } else { len / 2 };
|
||||
let new_val = entries.get(offset).unwrap_or(¤t_commit);
|
||||
if new_val < ¤t_commit {
|
||||
current_commit
|
||||
|
@ -252,7 +278,11 @@ fn calculate_new_commit_index(mut entries: Vec<u64>, current_commit: u64) -> u64
|
|||
/// Check if the given snapshot data is within half of the configured threshold.
|
||||
fn snapshot_is_within_half_of_threshold(snapshot_last_index: &u64, last_log_index: &u64, threshold: &u64) -> bool {
|
||||
// Calculate distance from actor's last log index.
|
||||
let distance_from_line = if snapshot_last_index > last_log_index { 0u64 } else { last_log_index - snapshot_last_index }; // Guard against underflow.
|
||||
let distance_from_line = if snapshot_last_index > last_log_index {
|
||||
0u64
|
||||
} else {
|
||||
last_log_index - snapshot_last_index
|
||||
}; // Guard against underflow.
|
||||
let half_of_threshold = threshold / 2;
|
||||
distance_from_line <= half_of_threshold
|
||||
}
|
||||
|
@ -277,7 +307,7 @@ mod tests {
|
|||
let res = snapshot_is_within_half_of_threshold($snapshot_last_index, $last_log, $thresh);
|
||||
assert_eq!(res, $exp)
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
test_snapshot_is_within_half_of_threshold!({
|
||||
|
@ -311,37 +341,19 @@ mod tests {
|
|||
entries.sort();
|
||||
assert_eq!(output, $expected, "Sorted values: {:?}", entries);
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
test_calculate_new_commit_index!(
|
||||
basic_values,
|
||||
10, 5, vec![20, 5, 0, 15, 10]
|
||||
);
|
||||
test_calculate_new_commit_index!(basic_values, 10, 5, vec![20, 5, 0, 15, 10]);
|
||||
|
||||
test_calculate_new_commit_index!(
|
||||
len_zero_should_return_current_commit,
|
||||
20, 20, vec![]
|
||||
);
|
||||
test_calculate_new_commit_index!(len_zero_should_return_current_commit, 20, 20, vec![]);
|
||||
|
||||
test_calculate_new_commit_index!(
|
||||
len_one_where_greater_than_current,
|
||||
100, 0, vec![100]
|
||||
);
|
||||
test_calculate_new_commit_index!(len_one_where_greater_than_current, 100, 0, vec![100]);
|
||||
|
||||
test_calculate_new_commit_index!(
|
||||
len_one_where_less_than_current,
|
||||
100, 100, vec![50]
|
||||
);
|
||||
test_calculate_new_commit_index!(len_one_where_less_than_current, 100, 100, vec![50]);
|
||||
|
||||
test_calculate_new_commit_index!(
|
||||
even_number_of_nodes,
|
||||
0, 0, vec![0, 100, 0, 100, 0, 100]
|
||||
);
|
||||
test_calculate_new_commit_index!(even_number_of_nodes, 0, 0, vec![0, 100, 0, 100, 0, 100]);
|
||||
|
||||
test_calculate_new_commit_index!(
|
||||
majority_wins,
|
||||
100, 0, vec![0, 100, 0, 100, 0, 100, 100]
|
||||
);
|
||||
test_calculate_new_commit_index!(majority_wins, 100, 0, vec![0, 100, 0, 100, 0, 100, 100]);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,22 +1,25 @@
|
|||
use tokio::time::Instant;
|
||||
use tokio::sync::mpsc;
|
||||
use tokio::time::Instant;
|
||||
use tracing_futures::Instrument;
|
||||
|
||||
use crate::{AppData, AppDataResponse, NodeId, RaftNetwork, RaftStorage};
|
||||
use crate::error::RaftResult;
|
||||
use crate::core::{CandidateState, RaftCore, State, UpdateCurrentLeader};
|
||||
use crate::error::RaftResult;
|
||||
use crate::raft::{VoteRequest, VoteResponse};
|
||||
use crate::{AppData, AppDataResponse, NodeId, RaftNetwork, RaftStorage};
|
||||
|
||||
impl<D: AppData, R: AppDataResponse, N: RaftNetwork<D>, S: RaftStorage<D, R>> RaftCore<D, R, N, S> {
|
||||
/// An RPC invoked by candidates to gather votes (§5.2).
|
||||
///
|
||||
/// See `receiver implementation: RequestVote RPC` in raft-essentials.md in this repo.
|
||||
#[tracing::instrument(level="trace", skip(self, msg))]
|
||||
#[tracing::instrument(level = "trace", skip(self, msg))]
|
||||
pub(super) async fn handle_vote_request(&mut self, msg: VoteRequest) -> RaftResult<VoteResponse> {
|
||||
// If candidate's current term is less than this nodes current term, reject.
|
||||
if msg.term < self.current_term {
|
||||
tracing::trace!({candidate=msg.candidate_id, self.current_term, rpc_term=msg.term}, "RequestVote RPC term is less than current term");
|
||||
return Ok(VoteResponse{term: self.current_term, vote_granted: false});
|
||||
return Ok(VoteResponse {
|
||||
term: self.current_term,
|
||||
vote_granted: false,
|
||||
});
|
||||
}
|
||||
|
||||
// Do not respond to the request if we've received a heartbeat within the election timeout minimum.
|
||||
|
@ -24,8 +27,14 @@ impl<D: AppData, R: AppDataResponse, N: RaftNetwork<D>, S: RaftStorage<D, R>> Ra
|
|||
let now = Instant::now();
|
||||
let delta = now.duration_since(*inst);
|
||||
if self.config.election_timeout_min >= (delta.as_millis() as u64) {
|
||||
tracing::trace!({candidate=msg.candidate_id}, "rejecting vote request received within election timeout minimum");
|
||||
return Ok(VoteResponse{term: self.current_term, vote_granted: false});
|
||||
tracing::trace!(
|
||||
{ candidate = msg.candidate_id },
|
||||
"rejecting vote request received within election timeout minimum"
|
||||
);
|
||||
return Ok(VoteResponse {
|
||||
term: self.current_term,
|
||||
vote_granted: false,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -43,18 +52,28 @@ impl<D: AppData, R: AppDataResponse, N: RaftNetwork<D>, S: RaftStorage<D, R>> Ra
|
|||
// If candidate's log is not at least as up-to-date as this node, then reject.
|
||||
let client_is_uptodate = (msg.last_log_term >= self.last_log_term) && (msg.last_log_index >= self.last_log_index);
|
||||
if !client_is_uptodate {
|
||||
tracing::trace!({candidate=msg.candidate_id}, "rejecting vote request as candidate's log is not up-to-date");
|
||||
return Ok(VoteResponse{term: self.current_term, vote_granted: false});
|
||||
tracing::trace!(
|
||||
{ candidate = msg.candidate_id },
|
||||
"rejecting vote request as candidate's log is not up-to-date"
|
||||
);
|
||||
return Ok(VoteResponse {
|
||||
term: self.current_term,
|
||||
vote_granted: false,
|
||||
});
|
||||
}
|
||||
|
||||
// Candidate's log is up-to-date so handle voting conditions.
|
||||
match &self.voted_for {
|
||||
// This node has already voted for the candidate.
|
||||
Some(candidate_id) if candidate_id == &msg.candidate_id => {
|
||||
Ok(VoteResponse{term: self.current_term, vote_granted: true})
|
||||
}
|
||||
Some(candidate_id) if candidate_id == &msg.candidate_id => Ok(VoteResponse {
|
||||
term: self.current_term,
|
||||
vote_granted: true,
|
||||
}),
|
||||
// This node has already voted for a different candidate.
|
||||
Some(_) => Ok(VoteResponse{term: self.current_term, vote_granted: false}),
|
||||
Some(_) => Ok(VoteResponse {
|
||||
term: self.current_term,
|
||||
vote_granted: false,
|
||||
}),
|
||||
// This node has not yet voted for the current term, so vote for the candidate.
|
||||
None => {
|
||||
self.voted_for = Some(msg.candidate_id);
|
||||
|
@ -62,15 +81,18 @@ impl<D: AppData, R: AppDataResponse, N: RaftNetwork<D>, S: RaftStorage<D, R>> Ra
|
|||
self.update_next_election_timeout();
|
||||
self.save_hard_state().await?;
|
||||
tracing::trace!({candidate=msg.candidate_id, msg.term}, "voted for candidate");
|
||||
Ok(VoteResponse{term: self.current_term, vote_granted: true})
|
||||
},
|
||||
Ok(VoteResponse {
|
||||
term: self.current_term,
|
||||
vote_granted: true,
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a, D: AppData, R: AppDataResponse, N: RaftNetwork<D>, S: RaftStorage<D, R>> CandidateState<'a, D, R, N, S> {
|
||||
/// Handle response from a vote request sent to a peer.
|
||||
#[tracing::instrument(level="trace", skip(self, res, target))]
|
||||
#[tracing::instrument(level = "trace", skip(self, res, target))]
|
||||
pub(super) async fn handle_vote_response(&mut self, res: VoteResponse, target: NodeId) -> RaftResult<()> {
|
||||
// If peer's term is greater than current term, revert to follower state.
|
||||
if res.term > self.core.current_term {
|
||||
|
@ -89,7 +111,14 @@ impl<'a, D: AppData, R: AppDataResponse, N: RaftNetwork<D>, S: RaftStorage<D, R>
|
|||
self.votes_granted_old += 1;
|
||||
}
|
||||
// Handle vote responses from members of C1 config group.
|
||||
if self.core.membership.members_after_consensus.as_ref().map(|members| members.contains(&target)).unwrap_or(false) {
|
||||
if self
|
||||
.core
|
||||
.membership
|
||||
.members_after_consensus
|
||||
.as_ref()
|
||||
.map(|members| members.contains(&target))
|
||||
.unwrap_or(false)
|
||||
{
|
||||
self.votes_granted_new += 1;
|
||||
}
|
||||
// If we've received enough votes from both config groups, then transition to leader state`.
|
||||
|
@ -105,21 +134,24 @@ impl<'a, D: AppData, R: AppDataResponse, N: RaftNetwork<D>, S: RaftStorage<D, R>
|
|||
}
|
||||
|
||||
/// Spawn parallel vote requests to all cluster members.
|
||||
#[tracing::instrument(level="trace", skip(self))]
|
||||
#[tracing::instrument(level = "trace", skip(self))]
|
||||
pub(super) fn spawn_parallel_vote_requests(&self) -> mpsc::Receiver<(VoteResponse, NodeId)> {
|
||||
let all_members = self.core.membership.all_nodes();
|
||||
let (tx, rx) = mpsc::channel(all_members.len());
|
||||
for member in all_members.into_iter().filter(|member| member != &self.core.id) {
|
||||
let rpc = VoteRequest::new(self.core.current_term, self.core.id, self.core.last_log_index, self.core.last_log_term);
|
||||
let (network, mut tx_inner) = (self.core.network.clone(), tx.clone());
|
||||
let _ = tokio::spawn(async move {
|
||||
match network.vote(member, rpc).await {
|
||||
Ok(res) => {
|
||||
let _ = tx_inner.send((res, member)).await;
|
||||
let _ = tokio::spawn(
|
||||
async move {
|
||||
match network.vote(member, rpc).await {
|
||||
Ok(res) => {
|
||||
let _ = tx_inner.send((res, member)).await;
|
||||
}
|
||||
Err(err) => tracing::error!({error=%err, peer=member}, "error while requesting vote from peer"),
|
||||
}
|
||||
Err(err) => tracing::error!({error=%err, peer=member}, "error while requesting vote from peer"),
|
||||
}
|
||||
}.instrument(tracing::trace_span!("requesting vote from peer", target=member)));
|
||||
.instrument(tracing::trace_span!("requesting vote from peer", target = member)),
|
||||
);
|
||||
}
|
||||
rx
|
||||
}
|
||||
|
|
|
@ -2,8 +2,8 @@
|
|||
|
||||
use thiserror::Error;
|
||||
|
||||
use crate::{AppData, NodeId};
|
||||
use crate::raft::ClientWriteRequest;
|
||||
use crate::{AppData, NodeId};
|
||||
|
||||
/// A result type where the error variant is always a `RaftError`.
|
||||
pub type RaftResult<T> = std::result::Result<T, RaftError>;
|
||||
|
|
|
@ -1,23 +1,23 @@
|
|||
#![cfg_attr(feature="docinclude", feature(external_doc))]
|
||||
#![cfg_attr(feature="docinclude", doc(include="../README.md"))]
|
||||
#![cfg_attr(feature = "docinclude", feature(external_doc))]
|
||||
#![cfg_attr(feature = "docinclude", doc(include = "../README.md"))]
|
||||
|
||||
pub mod config;
|
||||
mod core;
|
||||
pub mod error;
|
||||
pub mod metrics;
|
||||
pub mod network;
|
||||
mod replication;
|
||||
pub mod raft;
|
||||
mod replication;
|
||||
pub mod storage;
|
||||
|
||||
use std::fmt::Debug;
|
||||
|
||||
use serde::{Serialize, de::DeserializeOwned};
|
||||
use serde::{de::DeserializeOwned, Serialize};
|
||||
|
||||
pub use crate::{
|
||||
config::{Config, ConfigBuilder, SnapshotPolicy},
|
||||
core::State,
|
||||
error::{ClientWriteError, ConfigError, InitializeError, ChangeConfigError, RaftError},
|
||||
error::{ChangeConfigError, ClientWriteError, ConfigError, InitializeError, RaftError},
|
||||
metrics::RaftMetrics,
|
||||
network::RaftNetwork,
|
||||
raft::Raft,
|
||||
|
|
|
@ -7,9 +7,9 @@
|
|||
//! Metrics are observed on a running Raft node via the `Raft::metrics()` method, which will
|
||||
//! return a stream of metrics.
|
||||
|
||||
use crate::NodeId;
|
||||
use crate::core::State;
|
||||
use crate::raft::MembershipConfig;
|
||||
use crate::NodeId;
|
||||
|
||||
/// A set of metrics describing the current state of a Raft node.
|
||||
#[derive(Clone, Debug, PartialEq, Eq)]
|
||||
|
@ -33,6 +33,14 @@ pub struct RaftMetrics {
|
|||
impl RaftMetrics {
|
||||
pub(crate) fn new_initial(id: NodeId) -> Self {
|
||||
let membership_config = MembershipConfig::new_initial(id);
|
||||
Self{id, state: State::Follower, current_term: 0, last_log_index: 0, last_applied: 0, current_leader: None, membership_config}
|
||||
Self {
|
||||
id,
|
||||
state: State::Follower,
|
||||
current_term: 0,
|
||||
last_log_index: 0,
|
||||
last_applied: 0,
|
||||
current_leader: None,
|
||||
membership_config,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -3,10 +3,10 @@
|
|||
use anyhow::Result;
|
||||
use async_trait::async_trait;
|
||||
|
||||
use crate::{AppData, NodeId};
|
||||
use crate::raft::{AppendEntriesRequest, AppendEntriesResponse};
|
||||
use crate::raft::{InstallSnapshotRequest, InstallSnapshotResponse};
|
||||
use crate::raft::{VoteRequest, VoteResponse};
|
||||
use crate::{AppData, NodeId};
|
||||
|
||||
/// A trait defining the interface for a Raft network between cluster members.
|
||||
///
|
||||
|
@ -14,8 +14,8 @@ use crate::raft::{VoteRequest, VoteResponse};
|
|||
/// for details and discussion on this trait and how to implement it.
|
||||
#[async_trait]
|
||||
pub trait RaftNetwork<D>: Send + Sync + 'static
|
||||
where
|
||||
D: AppData,
|
||||
where
|
||||
D: AppData,
|
||||
{
|
||||
/// Send an AppendEntries RPC to the target Raft node (§5).
|
||||
async fn append_entries(&self, target: NodeId, rpc: AppendEntriesRequest<D>) -> Result<AppendEntriesResponse>;
|
||||
|
|
|
@ -1,18 +1,18 @@
|
|||
//! Public Raft interface and data types.
|
||||
|
||||
use std::collections::HashSet;
|
||||
use std::sync::Arc;
|
||||
use std::sync::atomic::{AtomicBool, Ordering};
|
||||
use std::sync::Arc;
|
||||
|
||||
use serde::{Serialize, Deserialize};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use tokio::sync::{mpsc, oneshot, watch};
|
||||
use tokio::task::JoinHandle;
|
||||
|
||||
use crate::{AppData, AppDataResponse, NodeId, RaftNetwork, RaftStorage};
|
||||
use crate::config::Config;
|
||||
use crate::error::{ClientReadError, ClientWriteError, ChangeConfigError, InitializeError, RaftError, RaftResult};
|
||||
use crate::metrics::RaftMetrics;
|
||||
use crate::core::RaftCore;
|
||||
use crate::error::{ChangeConfigError, ClientReadError, ClientWriteError, InitializeError, RaftError, RaftResult};
|
||||
use crate::metrics::RaftMetrics;
|
||||
use crate::{AppData, AppDataResponse, NodeId, RaftNetwork, RaftStorage};
|
||||
|
||||
/// The Raft API.
|
||||
///
|
||||
|
@ -63,14 +63,14 @@ impl<D: AppData, R: AppDataResponse, N: RaftNetwork<D>, S: RaftStorage<D, R>> Ra
|
|||
let (tx_api, rx_api) = mpsc::unbounded_channel();
|
||||
let (tx_metrics, rx_metrics) = watch::channel(RaftMetrics::new_initial(id));
|
||||
let needs_shutdown = Arc::new(AtomicBool::new(false));
|
||||
let raft_handle = RaftCore::spawn(
|
||||
id, config, network, storage,
|
||||
rx_api, tx_metrics,
|
||||
needs_shutdown.clone(),
|
||||
);
|
||||
Self{
|
||||
tx_api, rx_metrics, raft_handle, needs_shutdown,
|
||||
marker_n: std::marker::PhantomData, marker_s: std::marker::PhantomData,
|
||||
let raft_handle = RaftCore::spawn(id, config, network, storage, rx_api, tx_metrics, needs_shutdown.clone());
|
||||
Self {
|
||||
tx_api,
|
||||
rx_metrics,
|
||||
raft_handle,
|
||||
needs_shutdown,
|
||||
marker_n: std::marker::PhantomData,
|
||||
marker_s: std::marker::PhantomData,
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -78,20 +78,22 @@ impl<D: AppData, R: AppDataResponse, N: RaftNetwork<D>, S: RaftStorage<D, R>> Ra
|
|||
///
|
||||
/// These RPCs are sent by the cluster leader to replicate log entries (§5.3), and are also
|
||||
/// used as heartbeats (§5.2).
|
||||
#[tracing::instrument(level="debug", skip(self, rpc))]
|
||||
#[tracing::instrument(level = "debug", skip(self, rpc))]
|
||||
pub async fn append_entries(&self, rpc: AppendEntriesRequest<D>) -> Result<AppendEntriesResponse, RaftError> {
|
||||
let (tx, rx) = oneshot::channel();
|
||||
self.tx_api.send(RaftMsg::AppendEntries{rpc, tx}).map_err(|_| RaftError::ShuttingDown)?;
|
||||
self.tx_api
|
||||
.send(RaftMsg::AppendEntries { rpc, tx })
|
||||
.map_err(|_| RaftError::ShuttingDown)?;
|
||||
Ok(rx.await.map_err(|_| RaftError::ShuttingDown).and_then(|res| res)?)
|
||||
}
|
||||
|
||||
/// Submit a VoteRequest (RequestVote in the spec) RPC to this Raft node.
|
||||
///
|
||||
/// These RPCs are sent by cluster peers which are in candidate state attempting to gather votes (§5.2).
|
||||
#[tracing::instrument(level="debug", skip(self, rpc))]
|
||||
#[tracing::instrument(level = "debug", skip(self, rpc))]
|
||||
pub async fn vote(&self, rpc: VoteRequest) -> Result<VoteResponse, RaftError> {
|
||||
let (tx, rx) = oneshot::channel();
|
||||
self.tx_api.send(RaftMsg::RequestVote{rpc, tx}).map_err(|_| RaftError::ShuttingDown)?;
|
||||
self.tx_api.send(RaftMsg::RequestVote { rpc, tx }).map_err(|_| RaftError::ShuttingDown)?;
|
||||
Ok(rx.await.map_err(|_| RaftError::ShuttingDown).and_then(|res| res)?)
|
||||
}
|
||||
|
||||
|
@ -99,10 +101,12 @@ impl<D: AppData, R: AppDataResponse, N: RaftNetwork<D>, S: RaftStorage<D, R>> Ra
|
|||
///
|
||||
/// These RPCs are sent by the cluster leader in order to bring a new node or a slow node up-to-speed
|
||||
/// with the leader (§7).
|
||||
#[tracing::instrument(level="debug", skip(self, rpc))]
|
||||
#[tracing::instrument(level = "debug", skip(self, rpc))]
|
||||
pub async fn install_snapshot(&self, rpc: InstallSnapshotRequest) -> Result<InstallSnapshotResponse, RaftError> {
|
||||
let (tx, rx) = oneshot::channel();
|
||||
self.tx_api.send(RaftMsg::InstallSnapshot{rpc, tx}).map_err(|_| RaftError::ShuttingDown)?;
|
||||
self.tx_api
|
||||
.send(RaftMsg::InstallSnapshot { rpc, tx })
|
||||
.map_err(|_| RaftError::ShuttingDown)?;
|
||||
Ok(rx.await.map_err(|_| RaftError::ShuttingDown).and_then(|res| res)?)
|
||||
}
|
||||
|
||||
|
@ -110,11 +114,16 @@ impl<D: AppData, R: AppDataResponse, N: RaftNetwork<D>, S: RaftStorage<D, R>> Ra
|
|||
///
|
||||
/// The actual read operation itself is up to the application, this method just ensures that
|
||||
/// the read will not be stale.
|
||||
#[tracing::instrument(level="debug", skip(self))]
|
||||
#[tracing::instrument(level = "debug", skip(self))]
|
||||
pub async fn client_read(&self) -> Result<(), ClientReadError> {
|
||||
let (tx, rx) = oneshot::channel();
|
||||
self.tx_api.send(RaftMsg::ClientReadRequest{tx}).map_err(|_| ClientReadError::RaftError(RaftError::ShuttingDown))?;
|
||||
Ok(rx.await.map_err(|_| ClientReadError::RaftError(RaftError::ShuttingDown)).and_then(|res| res)?)
|
||||
self.tx_api
|
||||
.send(RaftMsg::ClientReadRequest { tx })
|
||||
.map_err(|_| ClientReadError::RaftError(RaftError::ShuttingDown))?;
|
||||
Ok(rx
|
||||
.await
|
||||
.map_err(|_| ClientReadError::RaftError(RaftError::ShuttingDown))
|
||||
.and_then(|res| res)?)
|
||||
}
|
||||
|
||||
/// Submit a mutating client request to Raft to update the state of the system (§5.1).
|
||||
|
@ -134,11 +143,16 @@ impl<D: AppData, R: AppDataResponse, N: RaftNetwork<D>, S: RaftStorage<D, R>> Ra
|
|||
///
|
||||
/// These are application specific requirements, and must be implemented by the application which is
|
||||
/// being built on top of Raft.
|
||||
#[tracing::instrument(level="debug", skip(self, rpc))]
|
||||
#[tracing::instrument(level = "debug", skip(self, rpc))]
|
||||
pub async fn client_write(&self, rpc: ClientWriteRequest<D>) -> Result<ClientWriteResponse<R>, ClientWriteError<D>> {
|
||||
let (tx, rx) = oneshot::channel();
|
||||
self.tx_api.send(RaftMsg::ClientWriteRequest{rpc, tx}).map_err(|_| ClientWriteError::RaftError(RaftError::ShuttingDown))?;
|
||||
Ok(rx.await.map_err(|_| ClientWriteError::RaftError(RaftError::ShuttingDown)).and_then(|res| res)?)
|
||||
self.tx_api
|
||||
.send(RaftMsg::ClientWriteRequest { rpc, tx })
|
||||
.map_err(|_| ClientWriteError::RaftError(RaftError::ShuttingDown))?;
|
||||
Ok(rx
|
||||
.await
|
||||
.map_err(|_| ClientWriteError::RaftError(RaftError::ShuttingDown))
|
||||
.and_then(|res| res)?)
|
||||
}
|
||||
|
||||
/// Initialize a pristine Raft node with the given config.
|
||||
|
@ -169,11 +183,16 @@ impl<D: AppData, R: AppDataResponse, N: RaftNetwork<D>, S: RaftStorage<D, R>> Ra
|
|||
/// Every member of the cluster should perform these actions. This routine is race-condition
|
||||
/// free, and Raft guarantees that the first node to become the cluster leader will propagate
|
||||
/// only its own config.
|
||||
#[tracing::instrument(level="debug", skip(self))]
|
||||
#[tracing::instrument(level = "debug", skip(self))]
|
||||
pub async fn initialize(&self, members: HashSet<NodeId>) -> Result<(), InitializeError> {
|
||||
let (tx, rx) = oneshot::channel();
|
||||
self.tx_api.send(RaftMsg::Initialize{members, tx}).map_err(|_| RaftError::ShuttingDown)?;
|
||||
Ok(rx.await.map_err(|_| InitializeError::RaftError(RaftError::ShuttingDown)).and_then(|res| res)?)
|
||||
self.tx_api
|
||||
.send(RaftMsg::Initialize { members, tx })
|
||||
.map_err(|_| RaftError::ShuttingDown)?;
|
||||
Ok(rx
|
||||
.await
|
||||
.map_err(|_| InitializeError::RaftError(RaftError::ShuttingDown))
|
||||
.and_then(|res| res)?)
|
||||
}
|
||||
|
||||
/// Synchronize a new Raft node, bringing it up-to-speed (§6).
|
||||
|
@ -188,11 +207,14 @@ impl<D: AppData, R: AppDataResponse, N: RaftNetwork<D>, S: RaftStorage<D, R>> Ra
|
|||
/// application to then call `change_membership` once all of the new nodes are synced.
|
||||
///
|
||||
/// If this Raft node is not the cluster leader, then this call will fail.
|
||||
#[tracing::instrument(level="debug", skip(self))]
|
||||
#[tracing::instrument(level = "debug", skip(self))]
|
||||
pub async fn add_non_voter(&self, id: NodeId) -> Result<(), ChangeConfigError> {
|
||||
let (tx, rx) = oneshot::channel();
|
||||
self.tx_api.send(RaftMsg::AddNonVoter{id, tx}).map_err(|_| RaftError::ShuttingDown)?;
|
||||
Ok(rx.await.map_err(|_| ChangeConfigError::RaftError(RaftError::ShuttingDown)).and_then(|res| res)?)
|
||||
self.tx_api.send(RaftMsg::AddNonVoter { id, tx }).map_err(|_| RaftError::ShuttingDown)?;
|
||||
Ok(rx
|
||||
.await
|
||||
.map_err(|_| ChangeConfigError::RaftError(RaftError::ShuttingDown))
|
||||
.and_then(|res| res)?)
|
||||
}
|
||||
|
||||
/// Propose a cluster configuration change (§6).
|
||||
|
@ -206,11 +228,16 @@ impl<D: AppData, R: AppDataResponse, N: RaftNetwork<D>, S: RaftStorage<D, R>> Ra
|
|||
///
|
||||
/// If this Raft node is not the cluster leader, then the proposed configuration change will be
|
||||
/// rejected.
|
||||
#[tracing::instrument(level="debug", skip(self))]
|
||||
#[tracing::instrument(level = "debug", skip(self))]
|
||||
pub async fn change_membership(&self, members: HashSet<NodeId>) -> Result<(), ChangeConfigError> {
|
||||
let (tx, rx) = oneshot::channel();
|
||||
self.tx_api.send(RaftMsg::ChangeMembership{members, tx}).map_err(|_| RaftError::ShuttingDown)?;
|
||||
Ok(rx.await.map_err(|_| ChangeConfigError::RaftError(RaftError::ShuttingDown)).and_then(|res| res)?)
|
||||
self.tx_api
|
||||
.send(RaftMsg::ChangeMembership { members, tx })
|
||||
.map_err(|_| RaftError::ShuttingDown)?;
|
||||
Ok(rx
|
||||
.await
|
||||
.map_err(|_| ChangeConfigError::RaftError(RaftError::ShuttingDown))
|
||||
.and_then(|res| res)?)
|
||||
}
|
||||
|
||||
/// Get a handle to the metrics channel.
|
||||
|
@ -282,7 +309,7 @@ pub struct AppendEntriesRequest<D: AppData> {
|
|||
///
|
||||
/// This may be empty when the leader is sending heartbeats. Entries
|
||||
/// are batched for efficiency.
|
||||
#[serde(bound="D: AppData")]
|
||||
#[serde(bound = "D: AppData")]
|
||||
pub entries: Vec<Entry<D>>,
|
||||
/// The leader's commit index.
|
||||
pub leader_commit: u64,
|
||||
|
@ -325,7 +352,7 @@ pub struct Entry<D: AppData> {
|
|||
/// This entry's index.
|
||||
pub index: u64,
|
||||
/// This entry's payload.
|
||||
#[serde(bound="D: AppData")]
|
||||
#[serde(bound = "D: AppData")]
|
||||
pub payload: EntryPayload<D>,
|
||||
}
|
||||
|
||||
|
@ -342,7 +369,11 @@ impl<D: AppData> Entry<D> {
|
|||
/// The cluster membership config which is contained in the snapshot, which will always be the
|
||||
/// latest membership covered by the snapshot.
|
||||
pub fn new_snapshot_pointer(index: u64, term: u64, id: String, membership: MembershipConfig) -> Self {
|
||||
Entry{term, index, payload: EntryPayload::SnapshotPointer(EntrySnapshotPointer{id, membership})}
|
||||
Entry {
|
||||
term,
|
||||
index,
|
||||
payload: EntryPayload::SnapshotPointer(EntrySnapshotPointer { id, membership }),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -352,7 +383,7 @@ pub enum EntryPayload<D: AppData> {
|
|||
/// An empty payload committed by a new cluster leader.
|
||||
Blank,
|
||||
/// A normal log entry.
|
||||
#[serde(bound="D: AppData")]
|
||||
#[serde(bound = "D: AppData")]
|
||||
Normal(EntryNormal<D>),
|
||||
/// A config change log entry.
|
||||
ConfigChange(EntryConfigChange),
|
||||
|
@ -364,7 +395,7 @@ pub enum EntryPayload<D: AppData> {
|
|||
#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
|
||||
pub struct EntryNormal<D: AppData> {
|
||||
/// The contents of this entry.
|
||||
#[serde(bound="D: AppData")]
|
||||
#[serde(bound = "D: AppData")]
|
||||
pub data: D,
|
||||
}
|
||||
|
||||
|
@ -416,11 +447,12 @@ impl MembershipConfig {
|
|||
///
|
||||
/// When in joint consensus, this will check both config groups.
|
||||
pub fn contains(&self, x: &NodeId) -> bool {
|
||||
self.members.contains(x) || if let Some(members) = &self.members_after_consensus {
|
||||
members.contains(x)
|
||||
} else {
|
||||
false
|
||||
}
|
||||
self.members.contains(x)
|
||||
|| if let Some(members) = &self.members_after_consensus {
|
||||
members.contains(x)
|
||||
} else {
|
||||
false
|
||||
}
|
||||
}
|
||||
|
||||
/// Check to see if the config is currently in joint consensus.
|
||||
|
@ -432,7 +464,10 @@ impl MembershipConfig {
|
|||
pub fn new_initial(id: NodeId) -> Self {
|
||||
let mut members = HashSet::new();
|
||||
members.insert(id);
|
||||
Self{members, members_after_consensus: None}
|
||||
Self {
|
||||
members,
|
||||
members_after_consensus: None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -455,7 +490,12 @@ pub struct VoteRequest {
|
|||
impl VoteRequest {
|
||||
/// Create a new instance.
|
||||
pub fn new(term: u64, candidate_id: u64, last_log_index: u64, last_log_term: u64) -> Self {
|
||||
Self{term, candidate_id, last_log_index, last_log_term}
|
||||
Self {
|
||||
term,
|
||||
candidate_id,
|
||||
last_log_index,
|
||||
last_log_term,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -507,24 +547,24 @@ pub struct InstallSnapshotResponse {
|
|||
#[derive(Debug, Serialize, Deserialize)]
|
||||
pub struct ClientWriteRequest<D: AppData> {
|
||||
/// The application specific contents of this client request.
|
||||
#[serde(bound="D: AppData")]
|
||||
#[serde(bound = "D: AppData")]
|
||||
pub(crate) entry: EntryPayload<D>,
|
||||
}
|
||||
|
||||
impl<D: AppData> ClientWriteRequest<D> {
|
||||
/// Create a new client payload instance with a normal entry type.
|
||||
pub fn new(entry: D) -> Self {
|
||||
Self::new_base(EntryPayload::Normal(EntryNormal{data: entry}))
|
||||
Self::new_base(EntryPayload::Normal(EntryNormal { data: entry }))
|
||||
}
|
||||
|
||||
/// Create a new instance.
|
||||
pub(crate) fn new_base(entry: EntryPayload<D>) -> Self {
|
||||
Self{entry}
|
||||
Self { entry }
|
||||
}
|
||||
|
||||
/// Generate a new payload holding a config change.
|
||||
pub(crate) fn new_config(membership: MembershipConfig) -> Self {
|
||||
Self::new_base(EntryPayload::ConfigChange(EntryConfigChange{membership}))
|
||||
Self::new_base(EntryPayload::ConfigChange(EntryConfigChange { membership }))
|
||||
}
|
||||
|
||||
/// Generate a new blank payload.
|
||||
|
@ -541,6 +581,6 @@ pub struct ClientWriteResponse<R: AppDataResponse> {
|
|||
/// The log index of the successfully processed client request.
|
||||
pub index: u64,
|
||||
/// Application specific response data.
|
||||
#[serde(bound="R: AppDataResponse")]
|
||||
#[serde(bound = "R: AppDataResponse")]
|
||||
pub data: R,
|
||||
}
|
||||
|
|
|
@ -7,13 +7,13 @@ use tokio::io::{AsyncRead, AsyncReadExt, AsyncSeek, AsyncSeekExt};
|
|||
use tokio::stream::StreamExt;
|
||||
use tokio::sync::{mpsc, oneshot};
|
||||
use tokio::task::JoinHandle;
|
||||
use tokio::time::{Duration, Interval, interval, timeout};
|
||||
use tokio::time::{interval, timeout, Duration, Interval};
|
||||
|
||||
use crate::{AppData, AppDataResponse, NodeId, RaftNetwork, RaftStorage};
|
||||
use crate::config::{Config, SnapshotPolicy};
|
||||
use crate::error::RaftResult;
|
||||
use crate::raft::{AppendEntriesRequest, Entry, EntryPayload, InstallSnapshotRequest};
|
||||
use crate::storage::CurrentSnapshotData;
|
||||
use crate::{AppData, AppDataResponse, NodeId, RaftNetwork, RaftStorage};
|
||||
|
||||
/// The public handle to a spawned replication stream.
|
||||
pub(crate) struct ReplicationStream<D: AppData> {
|
||||
|
@ -26,13 +26,20 @@ pub(crate) struct ReplicationStream<D: AppData> {
|
|||
impl<D: AppData> ReplicationStream<D> {
|
||||
/// Create a new replication stream for the target peer.
|
||||
pub(crate) fn new<R: AppDataResponse, N: RaftNetwork<D>, S: RaftStorage<D, R>>(
|
||||
id: NodeId, target: NodeId, term: u64, config: Arc<Config>,
|
||||
last_log_index: u64, last_log_term: u64, commit_index: u64,
|
||||
network: Arc<N>, storage: Arc<S>, replicationtx: mpsc::UnboundedSender<ReplicaEvent<S::Snapshot>>,
|
||||
id: NodeId, target: NodeId, term: u64, config: Arc<Config>, last_log_index: u64, last_log_term: u64, commit_index: u64, network: Arc<N>,
|
||||
storage: Arc<S>, replicationtx: mpsc::UnboundedSender<ReplicaEvent<S::Snapshot>>,
|
||||
) -> Self {
|
||||
ReplicationCore::spawn(
|
||||
id, target, term, config, last_log_index, last_log_term, commit_index,
|
||||
network, storage, replicationtx,
|
||||
id,
|
||||
target,
|
||||
term,
|
||||
config,
|
||||
last_log_index,
|
||||
last_log_term,
|
||||
commit_index,
|
||||
network,
|
||||
storage,
|
||||
replicationtx,
|
||||
)
|
||||
}
|
||||
}
|
||||
|
@ -45,7 +52,6 @@ impl<D: AppData> ReplicationStream<D> {
|
|||
struct ReplicationCore<D: AppData, R: AppDataResponse, N: RaftNetwork<D>, S: RaftStorage<D, R>> {
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
// Static Fields /////////////////////////////////////////////////////////
|
||||
|
||||
/// The ID of this Raft node.
|
||||
id: NodeId,
|
||||
/// The ID of the target Raft node which replication events are to be sent to.
|
||||
|
@ -68,7 +74,6 @@ struct ReplicationCore<D: AppData, R: AppDataResponse, N: RaftNetwork<D>, S: Raf
|
|||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
// Dynamic Fields ////////////////////////////////////////////////////////
|
||||
|
||||
/// The target state of this replication stream.
|
||||
target_state: TargetReplState,
|
||||
|
||||
|
@ -126,23 +131,36 @@ struct ReplicationCore<D: AppData, R: AppDataResponse, N: RaftNetwork<D>, S: Raf
|
|||
impl<D: AppData, R: AppDataResponse, N: RaftNetwork<D>, S: RaftStorage<D, R>> ReplicationCore<D, R, N, S> {
|
||||
/// Spawn a new replication task for the target node.
|
||||
pub(self) fn spawn(
|
||||
id: NodeId, target: NodeId, term: u64, config: Arc<Config>,
|
||||
last_log_index: u64, last_log_term: u64, commit_index: u64,
|
||||
network: Arc<N>, storage: Arc<S>, rafttx: mpsc::UnboundedSender<ReplicaEvent<S::Snapshot>>,
|
||||
id: NodeId, target: NodeId, term: u64, config: Arc<Config>, last_log_index: u64, last_log_term: u64, commit_index: u64, network: Arc<N>,
|
||||
storage: Arc<S>, rafttx: mpsc::UnboundedSender<ReplicaEvent<S::Snapshot>>,
|
||||
) -> ReplicationStream<D> {
|
||||
let (raftrx_tx, raftrx) = mpsc::unbounded_channel();
|
||||
let heartbeat_timeout = Duration::from_millis(config.heartbeat_interval);
|
||||
let max_payload_entries = config.max_payload_entries as usize;
|
||||
let this = Self{
|
||||
id, target, term, network, storage, config, max_payload_entries,
|
||||
let this = Self {
|
||||
id,
|
||||
target,
|
||||
term,
|
||||
network,
|
||||
storage,
|
||||
config,
|
||||
max_payload_entries,
|
||||
marker_r: std::marker::PhantomData,
|
||||
target_state: TargetReplState::Lagging, last_log_index, commit_index,
|
||||
next_index: last_log_index + 1, match_index: last_log_index, match_term: last_log_term,
|
||||
rafttx, raftrx, heartbeat: interval(heartbeat_timeout), heartbeat_timeout,
|
||||
replication_buffer: Vec::new(), outbound_buffer: Vec::new(),
|
||||
target_state: TargetReplState::Lagging,
|
||||
last_log_index,
|
||||
commit_index,
|
||||
next_index: last_log_index + 1,
|
||||
match_index: last_log_index,
|
||||
match_term: last_log_term,
|
||||
rafttx,
|
||||
raftrx,
|
||||
heartbeat: interval(heartbeat_timeout),
|
||||
heartbeat_timeout,
|
||||
replication_buffer: Vec::new(),
|
||||
outbound_buffer: Vec::new(),
|
||||
};
|
||||
let handle = tokio::spawn(this.main());
|
||||
ReplicationStream{handle, repltx: raftrx_tx}
|
||||
ReplicationStream { handle, repltx: raftrx_tx }
|
||||
}
|
||||
|
||||
#[tracing::instrument(level="trace", skip(self), fields(id=self.id, target=self.target, cluster=%self.config.cluster_name))]
|
||||
|
@ -165,24 +183,30 @@ impl<D: AppData, R: AppDataResponse, N: RaftNetwork<D>, S: RaftStorage<D, R>> Re
|
|||
///
|
||||
/// This request will timeout if no response is received within the
|
||||
/// configured heartbeat interval.
|
||||
#[tracing::instrument(level="trace", skip(self))]
|
||||
#[tracing::instrument(level = "trace", skip(self))]
|
||||
async fn send_append_entries(&mut self) {
|
||||
// Attempt to fill the send buffer from the replication buffer.
|
||||
if self.outbound_buffer.is_empty() {
|
||||
let repl_len = self.replication_buffer.len();
|
||||
if repl_len > 0 {
|
||||
let chunk_size = if repl_len < self.max_payload_entries { repl_len } else { self.max_payload_entries };
|
||||
self.outbound_buffer.extend(
|
||||
self.replication_buffer.drain(..chunk_size)
|
||||
.map(OutboundEntry::Arc));
|
||||
let chunk_size = if repl_len < self.max_payload_entries {
|
||||
repl_len
|
||||
} else {
|
||||
self.max_payload_entries
|
||||
};
|
||||
self.outbound_buffer
|
||||
.extend(self.replication_buffer.drain(..chunk_size).map(OutboundEntry::Arc));
|
||||
}
|
||||
}
|
||||
|
||||
// Build the heartbeat frame to be sent to the follower.
|
||||
let payload = AppendEntriesRequest{
|
||||
term: self.term, leader_id: self.id,
|
||||
prev_log_index: self.match_index, prev_log_term: self.match_term,
|
||||
leader_commit: self.commit_index, entries: self.outbound_buffer.iter().map(|entry| entry.as_ref().clone()).collect(),
|
||||
let payload = AppendEntriesRequest {
|
||||
term: self.term,
|
||||
leader_id: self.id,
|
||||
prev_log_index: self.match_index,
|
||||
prev_log_term: self.match_term,
|
||||
leader_commit: self.commit_index,
|
||||
entries: self.outbound_buffer.iter().map(|entry| entry.as_ref().clone()).collect(),
|
||||
};
|
||||
|
||||
// Send the payload.
|
||||
|
@ -193,11 +217,11 @@ impl<D: AppData, R: AppDataResponse, N: RaftNetwork<D>, S: RaftStorage<D, R>> Re
|
|||
tracing::error!({error=%err}, "error sending AppendEntries RPC to target");
|
||||
return;
|
||||
}
|
||||
}
|
||||
},
|
||||
Err(err) => {
|
||||
tracing::error!({error=%err}, "timeout while sending AppendEntries RPC to target");
|
||||
return;
|
||||
},
|
||||
}
|
||||
};
|
||||
let last_index_and_term = match self.outbound_buffer.last() {
|
||||
Some(last) => Some((last.as_ref().index, last.as_ref().term)),
|
||||
|
@ -213,7 +237,11 @@ impl<D: AppData, R: AppDataResponse, N: RaftNetwork<D>, S: RaftStorage<D, R>> Re
|
|||
self.next_index = index + 1; // This should always be the next expected index.
|
||||
self.match_index = index;
|
||||
self.match_term = term;
|
||||
let _ = self.rafttx.send(ReplicaEvent::UpdateMatchIndex{target: self.target, match_index: index, match_term: term});
|
||||
let _ = self.rafttx.send(ReplicaEvent::UpdateMatchIndex {
|
||||
target: self.target,
|
||||
match_index: index,
|
||||
match_term: term,
|
||||
});
|
||||
|
||||
// If running at line rate, and our buffered outbound requests have accumulated too
|
||||
// much, we need to purge and transition to a lagging state. The target is not able to
|
||||
|
@ -227,8 +255,11 @@ impl<D: AppData, R: AppDataResponse, N: RaftNetwork<D>, S: RaftStorage<D, R>> Re
|
|||
|
||||
// Replication was not successful, if a newer term has been returned, revert to follower.
|
||||
if res.term > self.term {
|
||||
tracing::trace!({res.term}, "append entries failed, reverting to follower");
|
||||
let _ = self.rafttx.send(ReplicaEvent::RevertToFollower{target: self.target, term: res.term});
|
||||
tracing::trace!({ res.term }, "append entries failed, reverting to follower");
|
||||
let _ = self.rafttx.send(ReplicaEvent::RevertToFollower {
|
||||
target: self.target,
|
||||
term: res.term,
|
||||
});
|
||||
self.target_state = TargetReplState::Shutdown;
|
||||
return;
|
||||
}
|
||||
|
@ -249,14 +280,21 @@ impl<D: AppData, R: AppDataResponse, N: RaftNetwork<D>, S: RaftStorage<D, R>> Re
|
|||
// it will never exist. So instead, we just return, and accept the conflict data.
|
||||
if conflict.index == 0 {
|
||||
self.target_state = TargetReplState::Lagging;
|
||||
let _ = self.rafttx.send(ReplicaEvent::UpdateMatchIndex{
|
||||
target: self.target, match_index: self.match_index, match_term: self.match_term,
|
||||
let _ = self.rafttx.send(ReplicaEvent::UpdateMatchIndex {
|
||||
target: self.target,
|
||||
match_index: self.match_index,
|
||||
match_term: self.match_term,
|
||||
});
|
||||
return;
|
||||
}
|
||||
|
||||
// Fetch the entry at conflict index and use the term specified there.
|
||||
match self.storage.get_log_entries(conflict.index, conflict.index + 1).await.map(|entries| entries.get(0).map(|entry| entry.term)) {
|
||||
match self
|
||||
.storage
|
||||
.get_log_entries(conflict.index, conflict.index + 1)
|
||||
.await
|
||||
.map(|entries| entries.get(0).map(|entry| entry.term))
|
||||
{
|
||||
Ok(Some(term)) => {
|
||||
self.match_term = term; // If we have the specified log, ensure we use its term.
|
||||
}
|
||||
|
@ -264,8 +302,10 @@ impl<D: AppData, R: AppDataResponse, N: RaftNetwork<D>, S: RaftStorage<D, R>> Re
|
|||
// This condition would only ever be reached if the log has been removed due to
|
||||
// log compaction (barring critical storage failure), so transition to snapshotting.
|
||||
self.target_state = TargetReplState::Snapshotting;
|
||||
let _ = self.rafttx.send(ReplicaEvent::UpdateMatchIndex{
|
||||
target: self.target, match_index: self.match_index, match_term: self.match_term,
|
||||
let _ = self.rafttx.send(ReplicaEvent::UpdateMatchIndex {
|
||||
target: self.target,
|
||||
match_index: self.match_index,
|
||||
match_term: self.match_term,
|
||||
});
|
||||
return;
|
||||
}
|
||||
|
@ -278,8 +318,10 @@ impl<D: AppData, R: AppDataResponse, N: RaftNetwork<D>, S: RaftStorage<D, R>> Re
|
|||
};
|
||||
|
||||
// Check snapshot policy and handle conflict as needed.
|
||||
let _ = self.rafttx.send(ReplicaEvent::UpdateMatchIndex{
|
||||
target: self.target, match_index: self.match_index, match_term: self.match_term,
|
||||
let _ = self.rafttx.send(ReplicaEvent::UpdateMatchIndex {
|
||||
target: self.target,
|
||||
match_index: self.match_index,
|
||||
match_term: self.match_term,
|
||||
});
|
||||
match &self.config.snapshot_policy {
|
||||
SnapshotPolicy::LogsSinceLast(threshold) => {
|
||||
|
@ -299,7 +341,7 @@ impl<D: AppData, R: AppDataResponse, N: RaftNetwork<D>, S: RaftStorage<D, R>> Re
|
|||
|
||||
/// Perform a check to see if this replication stream is lagging behind far enough that a
|
||||
/// snapshot is warranted.
|
||||
#[tracing::instrument(level="trace", skip(self))]
|
||||
#[tracing::instrument(level = "trace", skip(self))]
|
||||
pub(self) fn needs_snapshot(&self) -> bool {
|
||||
match &self.config.snapshot_policy {
|
||||
SnapshotPolicy::LogsSinceLast(threshold) => {
|
||||
|
@ -330,10 +372,10 @@ impl<D: AppData, R: AppDataResponse, N: RaftNetwork<D>, S: RaftStorage<D, R>> Re
|
|||
};
|
||||
// Process the event.
|
||||
match event {
|
||||
RaftEvent::UpdateCommitIndex{commit_index} => {
|
||||
RaftEvent::UpdateCommitIndex { commit_index } => {
|
||||
self.commit_index = commit_index;
|
||||
}
|
||||
RaftEvent::Replicate{entry, commit_index} => {
|
||||
RaftEvent::Replicate { entry, commit_index } => {
|
||||
self.commit_index = commit_index;
|
||||
self.last_log_index = entry.index;
|
||||
if self.target_state == TargetReplState::LineRate {
|
||||
|
@ -408,10 +450,11 @@ pub(crate) enum RaftEvent<D: AppData> {
|
|||
|
||||
/// An event coming from a replication stream.
|
||||
pub(crate) enum ReplicaEvent<S>
|
||||
where S: AsyncRead + AsyncSeek + Send + Unpin + 'static,
|
||||
where
|
||||
S: AsyncRead + AsyncSeek + Send + Unpin + 'static,
|
||||
{
|
||||
/// An event representing an update to the replication rate of a replication stream.
|
||||
RateUpdate{
|
||||
RateUpdate {
|
||||
/// The ID of the Raft node to which this event relates.
|
||||
target: NodeId,
|
||||
/// A flag indicating if the corresponding target node is replicating at line rate.
|
||||
|
@ -422,7 +465,7 @@ pub(crate) enum ReplicaEvent<S>
|
|||
is_line_rate: bool,
|
||||
},
|
||||
/// An event from a replication stream which updates the target node's match index.
|
||||
UpdateMatchIndex{
|
||||
UpdateMatchIndex {
|
||||
/// The ID of the target node for which the match index is to be updated.
|
||||
target: NodeId,
|
||||
/// The index of the most recent log known to have been successfully replicated on the target.
|
||||
|
@ -431,14 +474,14 @@ pub(crate) enum ReplicaEvent<S>
|
|||
match_term: u64,
|
||||
},
|
||||
/// An event indicating that the Raft node needs to revert to follower state.
|
||||
RevertToFollower{
|
||||
RevertToFollower {
|
||||
/// The ID of the target node from which the new term was observed.
|
||||
target: NodeId,
|
||||
/// The new term observed.
|
||||
term: u64,
|
||||
},
|
||||
/// An event from a replication stream requesting snapshot info.
|
||||
NeedsSnapshot{
|
||||
NeedsSnapshot {
|
||||
/// The ID of the target node from which the event was sent.
|
||||
target: NodeId,
|
||||
/// The response channel for delivering the snapshot data.
|
||||
|
@ -460,12 +503,15 @@ struct LineRateState<'a, D: AppData, R: AppDataResponse, N: RaftNetwork<D>, S: R
|
|||
impl<'a, D: AppData, R: AppDataResponse, N: RaftNetwork<D>, S: RaftStorage<D, R>> LineRateState<'a, D, R, N, S> {
|
||||
/// Create a new instance.
|
||||
pub fn new(core: &'a mut ReplicationCore<D, R, N, S>) -> Self {
|
||||
Self{core}
|
||||
Self { core }
|
||||
}
|
||||
|
||||
#[tracing::instrument(level="trace", skip(self), fields(state="line-rate"))]
|
||||
#[tracing::instrument(level = "trace", skip(self), fields(state = "line-rate"))]
|
||||
pub async fn run(mut self) {
|
||||
let event = ReplicaEvent::RateUpdate{target: self.core.target, is_line_rate: true};
|
||||
let event = ReplicaEvent::RateUpdate {
|
||||
target: self.core.target,
|
||||
is_line_rate: true,
|
||||
};
|
||||
let _ = self.core.rafttx.send(event);
|
||||
loop {
|
||||
if self.core.target_state != TargetReplState::LineRate {
|
||||
|
@ -473,7 +519,11 @@ impl<'a, D: AppData, R: AppDataResponse, N: RaftNetwork<D>, S: RaftStorage<D, R>
|
|||
}
|
||||
|
||||
// We always prioritize draining our buffers first.
|
||||
let next_buf_index = self.core.outbound_buffer.first().map(|entry| entry.as_ref().index)
|
||||
let next_buf_index = self
|
||||
.core
|
||||
.outbound_buffer
|
||||
.first()
|
||||
.map(|entry| entry.as_ref().index)
|
||||
.or_else(|| self.core.replication_buffer.first().map(|entry| entry.index));
|
||||
if let Some(index) = next_buf_index {
|
||||
// Ensure that our buffered data matches up with `next_index`. When transitioning to
|
||||
|
@ -490,7 +540,7 @@ impl<'a, D: AppData, R: AppDataResponse, N: RaftNetwork<D>, S: RaftStorage<D, R>
|
|||
self.core.send_append_entries().await;
|
||||
continue;
|
||||
}
|
||||
tokio::select!{
|
||||
tokio::select! {
|
||||
_ = self.core.heartbeat.next() => self.core.send_append_entries().await,
|
||||
event = self.core.raftrx.next() => match event {
|
||||
Some(event) => self.core.drain_raftrx(event),
|
||||
|
@ -501,7 +551,7 @@ impl<'a, D: AppData, R: AppDataResponse, N: RaftNetwork<D>, S: RaftStorage<D, R>
|
|||
}
|
||||
|
||||
/// Ensure there are no gaps in the outbound buffer due to transition from lagging.
|
||||
#[tracing::instrument(level="trace", skip(self))]
|
||||
#[tracing::instrument(level = "trace", skip(self))]
|
||||
async fn frontload_outbound_buffer(&mut self, start: u64, stop: u64) {
|
||||
let entries = match self.core.storage.get_log_entries(start, stop).await {
|
||||
Ok(entries) => entries,
|
||||
|
@ -536,12 +586,15 @@ struct LaggingState<'a, D: AppData, R: AppDataResponse, N: RaftNetwork<D>, S: Ra
|
|||
impl<'a, D: AppData, R: AppDataResponse, N: RaftNetwork<D>, S: RaftStorage<D, R>> LaggingState<'a, D, R, N, S> {
|
||||
/// Create a new instance.
|
||||
pub fn new(core: &'a mut ReplicationCore<D, R, N, S>) -> Self {
|
||||
Self{core}
|
||||
Self { core }
|
||||
}
|
||||
|
||||
#[tracing::instrument(level="trace", skip(self), fields(state="lagging"))]
|
||||
#[tracing::instrument(level = "trace", skip(self), fields(state = "lagging"))]
|
||||
pub async fn run(mut self) {
|
||||
let event = ReplicaEvent::RateUpdate{target: self.core.target, is_line_rate: false};
|
||||
let event = ReplicaEvent::RateUpdate {
|
||||
target: self.core.target,
|
||||
is_line_rate: false,
|
||||
};
|
||||
let _ = self.core.rafttx.send(event);
|
||||
self.core.replication_buffer.clear();
|
||||
self.core.outbound_buffer.clear();
|
||||
|
@ -581,7 +634,7 @@ impl<'a, D: AppData, R: AppDataResponse, N: RaftNetwork<D>, S: RaftStorage<D, R>
|
|||
}
|
||||
|
||||
/// Prep the outbound buffer with the next payload of entries to append.
|
||||
#[tracing::instrument(level="trace", skip(self))]
|
||||
#[tracing::instrument(level = "trace", skip(self))]
|
||||
async fn prep_outbound_buffer_from_storage(&mut self) {
|
||||
// If the send buffer is empty, we need to fill it.
|
||||
if self.core.outbound_buffer.is_empty() {
|
||||
|
@ -633,12 +686,19 @@ struct SnapshottingState<'a, D: AppData, R: AppDataResponse, N: RaftNetwork<D>,
|
|||
impl<'a, D: AppData, R: AppDataResponse, N: RaftNetwork<D>, S: RaftStorage<D, R>> SnapshottingState<'a, D, R, N, S> {
|
||||
/// Create a new instance.
|
||||
pub fn new(core: &'a mut ReplicationCore<D, R, N, S>) -> Self {
|
||||
Self{core, snapshot: None, snapshot_fetch_rx: None}
|
||||
Self {
|
||||
core,
|
||||
snapshot: None,
|
||||
snapshot_fetch_rx: None,
|
||||
}
|
||||
}
|
||||
|
||||
#[tracing::instrument(level="trace", skip(self), fields(state="snapshotting"))]
|
||||
#[tracing::instrument(level = "trace", skip(self), fields(state = "snapshotting"))]
|
||||
pub async fn run(mut self) {
|
||||
let event = ReplicaEvent::RateUpdate{target: self.core.target, is_line_rate: false};
|
||||
let event = ReplicaEvent::RateUpdate {
|
||||
target: self.core.target,
|
||||
is_line_rate: false,
|
||||
};
|
||||
let _ = self.core.rafttx.send(event);
|
||||
self.core.replication_buffer.clear();
|
||||
self.core.outbound_buffer.clear();
|
||||
|
@ -651,7 +711,10 @@ impl<'a, D: AppData, R: AppDataResponse, N: RaftNetwork<D>, S: RaftStorage<D, R>
|
|||
// If we don't have any of the components we need, fetch the current snapshot.
|
||||
if self.snapshot.is_none() && self.snapshot_fetch_rx.is_none() {
|
||||
let (tx, rx) = oneshot::channel();
|
||||
let _ = self.core.rafttx.send(ReplicaEvent::NeedsSnapshot{target: self.core.target, tx});
|
||||
let _ = self.core.rafttx.send(ReplicaEvent::NeedsSnapshot {
|
||||
target: self.core.target,
|
||||
tx,
|
||||
});
|
||||
self.snapshot_fetch_rx = Some(rx);
|
||||
}
|
||||
|
||||
|
@ -676,10 +739,10 @@ impl<'a, D: AppData, R: AppDataResponse, N: RaftNetwork<D>, S: RaftStorage<D, R>
|
|||
///
|
||||
/// If an error comes up during processing, this routine should simple be called again after
|
||||
/// issuing a new request to the storage layer.
|
||||
#[tracing::instrument(level="trace", skip(self, rx))]
|
||||
#[tracing::instrument(level = "trace", skip(self, rx))]
|
||||
async fn wait_for_snapshot(&mut self, mut rx: oneshot::Receiver<CurrentSnapshotData<S::Snapshot>>) {
|
||||
loop {
|
||||
tokio::select!{
|
||||
tokio::select! {
|
||||
_ = self.core.heartbeat.next() => self.core.send_append_entries().await,
|
||||
event = self.core.raftrx.next() => match event {
|
||||
Some(event) => self.core.drain_raftrx(event),
|
||||
|
@ -701,7 +764,7 @@ impl<'a, D: AppData, R: AppDataResponse, N: RaftNetwork<D>, S: RaftStorage<D, R>
|
|||
}
|
||||
}
|
||||
|
||||
#[tracing::instrument(level="trace", skip(self, snapshot))]
|
||||
#[tracing::instrument(level = "trace", skip(self, snapshot))]
|
||||
async fn stream_snapshot(&mut self, mut snapshot: CurrentSnapshotData<S::Snapshot>) -> RaftResult<()> {
|
||||
let mut offset = 0;
|
||||
self.core.last_log_index = snapshot.index;
|
||||
|
@ -714,11 +777,14 @@ impl<'a, D: AppData, R: AppDataResponse, N: RaftNetwork<D>, S: RaftStorage<D, R>
|
|||
snapshot.snapshot.seek(SeekFrom::Start(offset)).await?;
|
||||
let nread = snapshot.snapshot.read_buf(&mut buf).await?;
|
||||
let done = nread == 0; // If bytes read == 0, then we're done.
|
||||
let req = InstallSnapshotRequest{
|
||||
term: self.core.term, leader_id: self.core.id,
|
||||
let req = InstallSnapshotRequest {
|
||||
term: self.core.term,
|
||||
leader_id: self.core.id,
|
||||
last_included_index: snapshot.index,
|
||||
last_included_term: snapshot.term,
|
||||
offset, data: Vec::from(&buf[..nread]), done,
|
||||
offset,
|
||||
data: Vec::from(&buf[..nread]),
|
||||
done,
|
||||
};
|
||||
buf.clear();
|
||||
|
||||
|
@ -740,7 +806,10 @@ impl<'a, D: AppData, R: AppDataResponse, N: RaftNetwork<D>, S: RaftStorage<D, R>
|
|||
|
||||
// Handle response conditions.
|
||||
if res.term > self.core.term {
|
||||
let _ = self.core.rafttx.send(ReplicaEvent::RevertToFollower{target: self.core.target, term: res.term});
|
||||
let _ = self.core.rafttx.send(ReplicaEvent::RevertToFollower {
|
||||
target: self.core.target,
|
||||
term: res.term,
|
||||
});
|
||||
self.core.target_state = TargetReplState::Shutdown;
|
||||
return Ok(());
|
||||
}
|
||||
|
|
|
@ -2,15 +2,16 @@
|
|||
|
||||
use anyhow::Result;
|
||||
use async_trait::async_trait;
|
||||
use serde::{Serialize, Deserialize};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use tokio::io::{AsyncRead, AsyncSeek, AsyncWrite};
|
||||
|
||||
use crate::{AppData, AppDataResponse, NodeId};
|
||||
use crate::raft::{Entry, MembershipConfig};
|
||||
use crate::{AppData, AppDataResponse, NodeId};
|
||||
|
||||
/// The data associated with the current snapshot.
|
||||
pub struct CurrentSnapshotData<S>
|
||||
where S: AsyncRead + AsyncSeek + Send + Unpin + 'static,
|
||||
where
|
||||
S: AsyncRead + AsyncSeek + Send + Unpin + 'static,
|
||||
{
|
||||
/// The snapshot entry's term.
|
||||
pub term: u64,
|
||||
|
@ -56,9 +57,14 @@ impl InitialState {
|
|||
/// ### `id`
|
||||
/// The ID of the Raft node.
|
||||
pub fn new_initial(id: NodeId) -> Self {
|
||||
Self{
|
||||
last_log_index: 0, last_log_term: 0, last_applied_log: 0,
|
||||
hard_state: HardState{current_term: 0, voted_for: None},
|
||||
Self {
|
||||
last_log_index: 0,
|
||||
last_log_term: 0,
|
||||
last_applied_log: 0,
|
||||
hard_state: HardState {
|
||||
current_term: 0,
|
||||
voted_for: None,
|
||||
},
|
||||
membership: MembershipConfig::new_initial(id),
|
||||
}
|
||||
}
|
||||
|
@ -70,9 +76,9 @@ impl InitialState {
|
|||
/// for details and discussion on this trait and how to implement it.
|
||||
#[async_trait]
|
||||
pub trait RaftStorage<D, R>: Send + Sync + 'static
|
||||
where
|
||||
D: AppData,
|
||||
R: AppDataResponse,
|
||||
where
|
||||
D: AppData,
|
||||
R: AppDataResponse,
|
||||
{
|
||||
/// The storage engine's associated type used for exposing a snapshot for reading & writing.
|
||||
type Snapshot: AsyncRead + AsyncWrite + AsyncSeek + Send + Unpin + 'static;
|
||||
|
@ -184,8 +190,7 @@ pub trait RaftStorage<D, R>: Send + Sync + 'static
|
|||
/// `AsyncWriteExt.shutdown()` method will have been called, so no additional writes should be
|
||||
/// made to the snapshot.
|
||||
async fn finalize_snapshot_installation(
|
||||
&self, index: u64, term: u64, delete_through: Option<u64>,
|
||||
id: String, snapshot: Box<Self::Snapshot>,
|
||||
&self, index: u64, term: u64, delete_through: Option<u64>, id: String, snapshot: Box<Self::Snapshot>,
|
||||
) -> Result<()>;
|
||||
|
||||
/// Get a readable handle to the current snapshot, along with its metadata.
|
||||
|
|
|
@ -18,7 +18,7 @@ use fixtures::RaftRouter;
|
|||
/// - call the client_read interface on the followers, and assert failure.
|
||||
///
|
||||
/// RUST_LOG=async_raft,memstore,client_reads=trace cargo test -p async-raft --test client_reads
|
||||
#[tokio::test(core_threads=4)]
|
||||
#[tokio::test(core_threads = 4)]
|
||||
async fn client_reads() -> Result<()> {
|
||||
fixtures::init_tracing();
|
||||
|
||||
|
@ -42,7 +42,10 @@ async fn client_reads() -> Result<()> {
|
|||
// Get the ID of the leader, and assert that client_read succeeds.
|
||||
let leader = router.leader().await.expect("leader not found");
|
||||
assert_eq!(leader, 0, "expected leader to be node 0, got {}", leader);
|
||||
router.client_read(leader).await.expect(&format!("expected client_read to succeed for cluster leader {}", leader));
|
||||
router
|
||||
.client_read(leader)
|
||||
.await
|
||||
.unwrap_or_else(|_| panic!("expected client_read to succeed for cluster leader {}", leader));
|
||||
router.client_read(1).await.expect_err("expected client_read on follower node 1 to fail");
|
||||
router.client_read(2).await.expect_err("expected client_read on follower node 2 to fail");
|
||||
|
||||
|
|
|
@ -4,8 +4,8 @@ use std::sync::Arc;
|
|||
use std::time::Duration;
|
||||
|
||||
use anyhow::Result;
|
||||
use async_raft::Config;
|
||||
use async_raft::raft::MembershipConfig;
|
||||
use async_raft::Config;
|
||||
use futures::prelude::*;
|
||||
use maplit::hashset;
|
||||
use tokio::time::delay_for;
|
||||
|
@ -21,7 +21,7 @@ use fixtures::RaftRouter;
|
|||
/// - assert that the cluster stayed stable and has all of the expected data.
|
||||
///
|
||||
/// RUST_LOG=async_raft,memstore,client_writes=trace cargo test -p async-raft --test client_writes
|
||||
#[tokio::test(core_threads=4)]
|
||||
#[tokio::test(core_threads = 4)]
|
||||
async fn client_writes() -> Result<()> {
|
||||
fixtures::init_tracing();
|
||||
|
||||
|
@ -51,10 +51,25 @@ async fn client_writes() -> Result<()> {
|
|||
clients.push(router.client_request_many(leader, "3", 1000));
|
||||
clients.push(router.client_request_many(leader, "4", 1000));
|
||||
clients.push(router.client_request_many(leader, "5", 1000));
|
||||
while let Some(_) = clients.next().await { }
|
||||
while clients.next().await.is_some() {}
|
||||
delay_for(Duration::from_secs(5)).await; // Ensure enough time is given for replication (this is WAY more than enough).
|
||||
router.assert_stable_cluster(Some(1), Some(6001)).await; // The extra 1 is from the leader's initial commit entry.
|
||||
router.assert_storage_state(1, 6001, Some(0), 6001, Some(((5000..5100).into(), 1, MembershipConfig{members: hashset![0, 1, 2], members_after_consensus: None}))).await;
|
||||
router
|
||||
.assert_storage_state(
|
||||
1,
|
||||
6001,
|
||||
Some(0),
|
||||
6001,
|
||||
Some((
|
||||
(5000..5100).into(),
|
||||
1,
|
||||
MembershipConfig {
|
||||
members: hashset![0, 1, 2],
|
||||
members_after_consensus: None,
|
||||
},
|
||||
)),
|
||||
)
|
||||
.await;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
|
|
@ -4,8 +4,8 @@ use std::sync::Arc;
|
|||
use std::time::Duration;
|
||||
|
||||
use anyhow::Result;
|
||||
use async_raft::{Config, SnapshotPolicy};
|
||||
use async_raft::raft::MembershipConfig;
|
||||
use async_raft::{Config, SnapshotPolicy};
|
||||
use maplit::hashset;
|
||||
use tokio::time::delay_for;
|
||||
|
||||
|
@ -20,15 +20,17 @@ use fixtures::RaftRouter;
|
|||
/// - add new nodes and assert that they receive the snapshot.
|
||||
///
|
||||
/// RUST_LOG=async_raft,memstore,compaction=trace cargo test -p async-raft --test compaction
|
||||
#[tokio::test(core_threads=4)]
|
||||
#[tokio::test(core_threads = 4)]
|
||||
async fn compaction() -> Result<()> {
|
||||
fixtures::init_tracing();
|
||||
|
||||
// Setup test dependencies.
|
||||
let config = Arc::new(Config::build("test".into())
|
||||
.snapshot_policy(SnapshotPolicy::LogsSinceLast(500))
|
||||
.validate()
|
||||
.expect("failed to build Raft config"));
|
||||
let config = Arc::new(
|
||||
Config::build("test".into())
|
||||
.snapshot_policy(SnapshotPolicy::LogsSinceLast(500))
|
||||
.validate()
|
||||
.expect("failed to build Raft config"),
|
||||
);
|
||||
let router = Arc::new(RaftRouter::new(config.clone()));
|
||||
router.new_raft_node(0).await;
|
||||
|
||||
|
@ -46,15 +48,48 @@ async fn compaction() -> Result<()> {
|
|||
router.client_request_many(0, "0", 499).await; // Puts us exactly at the configured snapshot policy threshold.
|
||||
delay_for(Duration::from_secs(5)).await; // Wait to ensure there is enough time for a snapshot to be built (this is way more than enough).
|
||||
router.assert_stable_cluster(Some(1), Some(500)).await;
|
||||
router.assert_storage_state(1, 500, Some(0), 500, Some((500.into(), 1, MembershipConfig{members: hashset![0], members_after_consensus: None}))).await;
|
||||
router
|
||||
.assert_storage_state(
|
||||
1,
|
||||
500,
|
||||
Some(0),
|
||||
500,
|
||||
Some((
|
||||
500.into(),
|
||||
1,
|
||||
MembershipConfig {
|
||||
members: hashset![0],
|
||||
members_after_consensus: None,
|
||||
},
|
||||
)),
|
||||
)
|
||||
.await;
|
||||
|
||||
// Add a new node and assert that it received the same snapshot.
|
||||
router.new_raft_node(1).await;
|
||||
router.add_non_voter(0, 1).await.expect("failed to add new node as non-voter");
|
||||
router.change_membership(0, hashset![0, 1]).await.expect("failed to modify cluster membership");
|
||||
router
|
||||
.change_membership(0, hashset![0, 1])
|
||||
.await
|
||||
.expect("failed to modify cluster membership");
|
||||
delay_for(Duration::from_secs(5)).await; // Wait to ensure metrics are updated (this is way more than enough).
|
||||
router.assert_stable_cluster(Some(1), Some(502)).await; // We expect index to be 500 + 2 (joint & uniform config change entries).
|
||||
router.assert_storage_state(1, 502, None, 500, Some((500.into(), 1, MembershipConfig{members: hashset![0u64], members_after_consensus: None}))).await;
|
||||
router
|
||||
.assert_storage_state(
|
||||
1,
|
||||
502,
|
||||
None,
|
||||
500,
|
||||
Some((
|
||||
500.into(),
|
||||
1,
|
||||
MembershipConfig {
|
||||
members: hashset![0u64],
|
||||
members_after_consensus: None,
|
||||
},
|
||||
)),
|
||||
)
|
||||
.await;
|
||||
// -------------------------------- ^^^^ this value is None because non-voters do not vote.
|
||||
|
||||
Ok(())
|
||||
|
|
|
@ -22,7 +22,7 @@ use fixtures::RaftRouter;
|
|||
/// - restore the isolated node and assert that it becomes a follower.
|
||||
///
|
||||
/// RUST_LOG=async_raft,memstore,dynamic_membership=trace cargo test -p async-raft --test dynamic_membership
|
||||
#[tokio::test(core_threads=6)]
|
||||
#[tokio::test(core_threads = 6)]
|
||||
async fn dynamic_membership() -> Result<()> {
|
||||
fixtures::init_tracing();
|
||||
|
||||
|
|
|
@ -7,15 +7,15 @@ use std::sync::Arc;
|
|||
|
||||
use anyhow::{anyhow, Result};
|
||||
use async_raft::async_trait::async_trait;
|
||||
use async_raft::{Config, NodeId, Raft, RaftMetrics, RaftNetwork, State};
|
||||
use async_raft::error::{ChangeConfigError, ClientReadError, ClientWriteError};
|
||||
use async_raft::raft::ClientWriteRequest;
|
||||
use async_raft::raft::MembershipConfig;
|
||||
use async_raft::raft::{AppendEntriesRequest, AppendEntriesResponse};
|
||||
use async_raft::raft::{InstallSnapshotRequest, InstallSnapshotResponse};
|
||||
use async_raft::raft::{VoteRequest, VoteResponse};
|
||||
use async_raft::raft::ClientWriteRequest;
|
||||
use async_raft::raft::MembershipConfig;
|
||||
use async_raft::storage::RaftStorage;
|
||||
use memstore::{MemStore, ClientRequest as MemClientRequest, ClientResponse as MemClientResponse};
|
||||
use async_raft::{Config, NodeId, Raft, RaftMetrics, RaftNetwork, State};
|
||||
use memstore::{ClientRequest as MemClientRequest, ClientResponse as MemClientResponse, MemStore};
|
||||
use tokio::sync::RwLock;
|
||||
use tracing_subscriber::prelude::*;
|
||||
|
||||
|
@ -49,7 +49,11 @@ pub struct RaftRouter {
|
|||
impl RaftRouter {
|
||||
/// Create a new instance.
|
||||
pub fn new(config: Arc<Config>) -> Self {
|
||||
Self{config, routing_table: Default::default(), isolated_nodes: Default::default()}
|
||||
Self {
|
||||
config,
|
||||
routing_table: Default::default(),
|
||||
isolated_nodes: Default::default(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Create and register a new Raft node bearing the given ID.
|
||||
|
@ -70,7 +74,7 @@ impl RaftRouter {
|
|||
|
||||
/// Initialize all nodes based on the config in the routing table.
|
||||
pub async fn initialize_from_single_node(&self, node: NodeId) -> Result<()> {
|
||||
tracing::info!({node}, "initializing cluster from single node");
|
||||
tracing::info!({ node }, "initializing cluster from single node");
|
||||
let rt = self.routing_table.read().await;
|
||||
let members: HashSet<NodeId> = rt.keys().cloned().collect();
|
||||
rt.get(&node)
|
||||
|
@ -82,7 +86,7 @@ impl RaftRouter {
|
|||
}
|
||||
|
||||
/// Isolate the network of the specified node.
|
||||
#[tracing::instrument(level="debug", skip(self))]
|
||||
#[tracing::instrument(level = "debug", skip(self))]
|
||||
pub async fn isolate_node(&self, id: NodeId) {
|
||||
self.isolated_nodes.write().await.insert(id);
|
||||
}
|
||||
|
@ -100,19 +104,21 @@ impl RaftRouter {
|
|||
/// Get the ID of the current leader.
|
||||
pub async fn leader(&self) -> Option<NodeId> {
|
||||
let isolated = self.isolated_nodes.read().await;
|
||||
self.latest_metrics().await.into_iter().find_map(|node| if node.current_leader == Some(node.id) {
|
||||
if isolated.contains(&node.id) {
|
||||
None
|
||||
self.latest_metrics().await.into_iter().find_map(|node| {
|
||||
if node.current_leader == Some(node.id) {
|
||||
if isolated.contains(&node.id) {
|
||||
None
|
||||
} else {
|
||||
Some(node.id)
|
||||
}
|
||||
} else {
|
||||
Some(node.id)
|
||||
None
|
||||
}
|
||||
} else {
|
||||
None
|
||||
})
|
||||
}
|
||||
|
||||
/// Restore the network of the specified node.
|
||||
#[tracing::instrument(level="debug", skip(self))]
|
||||
#[tracing::instrument(level = "debug", skip(self))]
|
||||
pub async fn restore_node(&self, id: NodeId) {
|
||||
let mut nodes = self.isolated_nodes.write().await;
|
||||
nodes.remove(&id);
|
||||
|
@ -120,26 +126,30 @@ impl RaftRouter {
|
|||
|
||||
pub async fn add_non_voter(&self, leader: NodeId, target: NodeId) -> Result<(), ChangeConfigError> {
|
||||
let rt = self.routing_table.read().await;
|
||||
let node = rt.get(&leader).expect(&format!("node with ID {} does not exist", leader));
|
||||
let node = rt.get(&leader).unwrap_or_else(|| panic!("node with ID {} does not exist", leader));
|
||||
node.0.add_non_voter(target).await
|
||||
}
|
||||
|
||||
pub async fn change_membership(&self, leader: NodeId, members: HashSet<NodeId>) -> Result<(), ChangeConfigError> {
|
||||
let rt = self.routing_table.read().await;
|
||||
let node = rt.get(&leader).expect(&format!("node with ID {} does not exist", leader));
|
||||
let node = rt.get(&leader).unwrap_or_else(|| panic!("node with ID {} does not exist", leader));
|
||||
node.0.change_membership(members).await
|
||||
}
|
||||
|
||||
/// Send a client read request to the target node.
|
||||
pub async fn client_read(&self, target: NodeId) -> Result<(), ClientReadError> {
|
||||
let rt = self.routing_table.read().await;
|
||||
let node = rt.get(&target).expect(&format!("node with ID {} does not exist", target));
|
||||
let node = rt.get(&target).unwrap_or_else(|| panic!("node with ID {} does not exist", target));
|
||||
node.0.client_read().await
|
||||
}
|
||||
|
||||
/// Send a client request to the target node, causing test failure on error.
|
||||
pub async fn client_request(&self, target: NodeId, client_id: &str, serial: u64) {
|
||||
let req = MemClientRequest{client: client_id.into(), serial, status: format!("request-{}", serial)};
|
||||
let req = MemClientRequest {
|
||||
client: client_id.into(),
|
||||
serial,
|
||||
status: format!("request-{}", serial),
|
||||
};
|
||||
if let Err(err) = self.send_client_request(target, req).await {
|
||||
tracing::error!({error=%err}, "error from client request");
|
||||
panic!(err)
|
||||
|
@ -153,9 +163,13 @@ impl RaftRouter {
|
|||
}
|
||||
}
|
||||
|
||||
async fn send_client_request(&self, target: NodeId, req: MemClientRequest) -> std::result::Result<MemClientResponse, ClientWriteError<MemClientRequest>> {
|
||||
async fn send_client_request(
|
||||
&self, target: NodeId, req: MemClientRequest,
|
||||
) -> std::result::Result<MemClientResponse, ClientWriteError<MemClientRequest>> {
|
||||
let rt = self.routing_table.read().await;
|
||||
let node = rt.get(&target).expect(&format!("node '{}' does not exist in routing table", target));
|
||||
let node = rt
|
||||
.get(&target)
|
||||
.unwrap_or_else(|| panic!("node '{}' does not exist in routing table", target));
|
||||
node.0.client_write(ClientWriteRequest::new(req)).await.map(|res| res.data)
|
||||
}
|
||||
|
||||
|
@ -169,11 +183,23 @@ impl RaftRouter {
|
|||
assert!(node.current_leader.is_none(), "node {} has a current leader, expected none", node.id);
|
||||
assert_eq!(node.state, State::NonVoter, "node is in state {:?}, expected NonVoter", node.state);
|
||||
assert_eq!(node.current_term, 0, "node {} has term {}, expected 0", node.id, node.current_term);
|
||||
assert_eq!(node.last_applied, 0, "node {} has last_applied {}, expected 0", node.id, node.last_applied);
|
||||
assert_eq!(node.last_log_index, 0, "node {} has last_log_index {}, expected 0", node.id, node.last_log_index);
|
||||
assert_eq!(
|
||||
node.last_applied, 0,
|
||||
"node {} has last_applied {}, expected 0",
|
||||
node.id, node.last_applied
|
||||
);
|
||||
assert_eq!(
|
||||
node.last_log_index, 0,
|
||||
"node {} has last_log_index {}, expected 0",
|
||||
node.id, node.last_log_index
|
||||
);
|
||||
let members = node.membership_config.members.iter().collect::<Vec<_>>();
|
||||
assert_eq!(members, vec![&node.id], "node {0} has membership {1:?}, expected [{0}]", node.id, members);
|
||||
assert!(node.membership_config.members_after_consensus.is_none(), "node {} is in joint consensus, expected uniform consensus", node.id);
|
||||
assert!(
|
||||
node.membership_config.members_after_consensus.is_none(),
|
||||
"node {} is in joint consensus, expected uniform consensus",
|
||||
node.id
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -189,21 +215,24 @@ impl RaftRouter {
|
|||
let isolated = self.isolated_nodes.read().await;
|
||||
let nodes = self.latest_metrics().await;
|
||||
|
||||
let non_isolated_nodes: Vec<_> = nodes.iter()
|
||||
.filter(|node| !isolated.contains(&node.id))
|
||||
.collect();
|
||||
let leader = nodes.iter()
|
||||
let non_isolated_nodes: Vec<_> = nodes.iter().filter(|node| !isolated.contains(&node.id)).collect();
|
||||
let leader = nodes
|
||||
.iter()
|
||||
.filter(|node| !isolated.contains(&node.id))
|
||||
.find(|node| node.state == State::Leader)
|
||||
.expect("expected to find a cluster leader");
|
||||
let followers: Vec<_> = nodes.iter()
|
||||
let followers: Vec<_> = nodes
|
||||
.iter()
|
||||
.filter(|node| !isolated.contains(&node.id))
|
||||
.filter(|node| node.state == State::Follower)
|
||||
.collect();
|
||||
|
||||
assert_eq!(followers.len() + 1, non_isolated_nodes.len(),
|
||||
assert_eq!(
|
||||
followers.len() + 1,
|
||||
non_isolated_nodes.len(),
|
||||
"expected all nodes to be followers with one leader, got 1 leader and {} followers, expected {} followers",
|
||||
followers.len(), non_isolated_nodes.len() - 1,
|
||||
followers.len(),
|
||||
non_isolated_nodes.len() - 1,
|
||||
);
|
||||
let expected_term = match expected_term {
|
||||
Some(term) => term,
|
||||
|
@ -215,46 +244,116 @@ impl RaftRouter {
|
|||
};
|
||||
let all_nodes = nodes.iter().map(|node| node.id).collect::<Vec<_>>();
|
||||
for node in non_isolated_nodes.iter() {
|
||||
assert_eq!(node.current_leader, Some(leader.id), "node {} has leader {:?}, expected {}", node.id, node.current_leader, leader.id);
|
||||
assert_eq!(node.current_term, expected_term, "node {} has term {}, expected {}", node.id, node.current_term, expected_term);
|
||||
assert_eq!(node.last_applied, expected_last_log, "node {} has last_applied {}, expected {}", node.id, node.last_applied, expected_last_log);
|
||||
assert_eq!(node.last_log_index, expected_last_log, "node {} has last_log_index {}, expected {}", node.id, node.last_log_index, expected_last_log);
|
||||
assert_eq!(
|
||||
node.current_leader,
|
||||
Some(leader.id),
|
||||
"node {} has leader {:?}, expected {}",
|
||||
node.id,
|
||||
node.current_leader,
|
||||
leader.id
|
||||
);
|
||||
assert_eq!(
|
||||
node.current_term, expected_term,
|
||||
"node {} has term {}, expected {}",
|
||||
node.id, node.current_term, expected_term
|
||||
);
|
||||
assert_eq!(
|
||||
node.last_applied, expected_last_log,
|
||||
"node {} has last_applied {}, expected {}",
|
||||
node.id, node.last_applied, expected_last_log
|
||||
);
|
||||
assert_eq!(
|
||||
node.last_log_index, expected_last_log,
|
||||
"node {} has last_log_index {}, expected {}",
|
||||
node.id, node.last_log_index, expected_last_log
|
||||
);
|
||||
let mut members = node.membership_config.members.iter().cloned().collect::<Vec<_>>();
|
||||
members.sort();
|
||||
assert_eq!(members, all_nodes, "node {} has membership {:?}, expected {:?}", node.id, members, all_nodes);
|
||||
assert!(node.membership_config.members_after_consensus.is_none(), "node {} was not in uniform consensus state", node.id);
|
||||
assert_eq!(
|
||||
members, all_nodes,
|
||||
"node {} has membership {:?}, expected {:?}",
|
||||
node.id, members, all_nodes
|
||||
);
|
||||
assert!(
|
||||
node.membership_config.members_after_consensus.is_none(),
|
||||
"node {} was not in uniform consensus state",
|
||||
node.id
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
/// Assert against the state of the storage system per node in the cluster.
|
||||
pub async fn assert_storage_state(
|
||||
&self, expect_term: u64, expect_last_log: u64, expect_voted_for: Option<u64>,
|
||||
expect_sm_last_applied_log: u64,
|
||||
&self, expect_term: u64, expect_last_log: u64, expect_voted_for: Option<u64>, expect_sm_last_applied_log: u64,
|
||||
expect_snapshot: Option<(ValueTest<u64>, u64, MembershipConfig)>,
|
||||
) {
|
||||
let rt = self.routing_table.read().await;
|
||||
for (id, (_node, storage)) in rt.iter() {
|
||||
let log = storage.get_log().await;
|
||||
let last_log = log.keys().last().expect(&format!("no last log found for node {}", id));
|
||||
assert_eq!(last_log, &expect_last_log, "expected node {} to have last_log {}, got {}", id, expect_last_log, last_log);
|
||||
let hs = storage.read_hard_state().await.clone().expect(&format!("no hardstate found for node {}", id));
|
||||
assert_eq!(hs.current_term, expect_term, "expected node {} to have term {}, got {}", id, expect_term, hs.current_term);
|
||||
let last_log = log.keys().last().unwrap_or_else(|| panic!("no last log found for node {}", id));
|
||||
assert_eq!(
|
||||
last_log, &expect_last_log,
|
||||
"expected node {} to have last_log {}, got {}",
|
||||
id, expect_last_log, last_log
|
||||
);
|
||||
let hs = storage
|
||||
.read_hard_state()
|
||||
.await
|
||||
.clone()
|
||||
.unwrap_or_else(|| panic!("no hardstate found for node {}", id));
|
||||
assert_eq!(
|
||||
hs.current_term, expect_term,
|
||||
"expected node {} to have term {}, got {}",
|
||||
id, expect_term, hs.current_term
|
||||
);
|
||||
if let Some(voted_for) = &expect_voted_for {
|
||||
assert_eq!(hs.voted_for.as_ref(), Some(voted_for), "expected node {} to have voted for {}, got {:?}", id, voted_for, hs.voted_for);
|
||||
assert_eq!(
|
||||
hs.voted_for.as_ref(),
|
||||
Some(voted_for),
|
||||
"expected node {} to have voted for {}, got {:?}",
|
||||
id,
|
||||
voted_for,
|
||||
hs.voted_for
|
||||
);
|
||||
}
|
||||
if let Some((index_test, term, cfg)) = &expect_snapshot {
|
||||
let snap = storage.get_current_snapshot().await
|
||||
.map_err(|err| panic!("{}", err)).unwrap()
|
||||
.expect(&format!("no snapshot present for node {}", id));
|
||||
let snap = storage
|
||||
.get_current_snapshot()
|
||||
.await
|
||||
.map_err(|err| panic!("{}", err))
|
||||
.unwrap()
|
||||
.unwrap_or_else(|| panic!("no snapshot present for node {}", id));
|
||||
match index_test {
|
||||
ValueTest::Exact(index) => assert_eq!(&snap.index, index, "expected node {} to have snapshot with index {}, got {}", id, index, snap.index),
|
||||
ValueTest::Range(range) => assert!(range.contains(&snap.index), "expected node {} to have snapshot within range {:?}, got {}", id, range, snap.index),
|
||||
ValueTest::Exact(index) => assert_eq!(
|
||||
&snap.index, index,
|
||||
"expected node {} to have snapshot with index {}, got {}",
|
||||
id, index, snap.index
|
||||
),
|
||||
ValueTest::Range(range) => assert!(
|
||||
range.contains(&snap.index),
|
||||
"expected node {} to have snapshot within range {:?}, got {}",
|
||||
id,
|
||||
range,
|
||||
snap.index
|
||||
),
|
||||
}
|
||||
assert_eq!(&snap.term, term, "expected node {} to have snapshot with term {}, got {}", id, term, snap.term);
|
||||
assert_eq!(&snap.membership, cfg, "expected node {} to have membership config {:?}, got {:?}", id, cfg, snap.membership);
|
||||
assert_eq!(
|
||||
&snap.term, term,
|
||||
"expected node {} to have snapshot with term {}, got {}",
|
||||
id, term, snap.term
|
||||
);
|
||||
assert_eq!(
|
||||
&snap.membership, cfg,
|
||||
"expected node {} to have membership config {:?}, got {:?}",
|
||||
id, cfg, snap.membership
|
||||
);
|
||||
}
|
||||
let sm = storage.get_state_machine().await;
|
||||
assert_eq!(&sm.last_applied_log, &expect_sm_last_applied_log, "expected node {} to have state machine last_applied_log {}, got {}", id, expect_sm_last_applied_log, sm.last_applied_log);
|
||||
assert_eq!(
|
||||
&sm.last_applied_log, &expect_sm_last_applied_log,
|
||||
"expected node {} to have state machine last_applied_log {}, got {}",
|
||||
id, expect_sm_last_applied_log, sm.last_applied_log
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -21,7 +21,7 @@ use fixtures::RaftRouter;
|
|||
/// followers have successfully replicated the payload.
|
||||
///
|
||||
/// RUST_LOG=async_raft,memstore,initialization=trace cargo test -p async-raft --test initialization
|
||||
#[tokio::test(core_threads=4)]
|
||||
#[tokio::test(core_threads = 4)]
|
||||
async fn initialization() -> Result<()> {
|
||||
fixtures::init_tracing();
|
||||
|
||||
|
|
|
@ -20,7 +20,7 @@ use fixtures::RaftRouter;
|
|||
/// - asserts that the leader was able to successfully commit its initial payload.
|
||||
///
|
||||
/// RUST_LOG=async_raft,memstore,singlenode=trace cargo test -p async-raft --test singlenode
|
||||
#[tokio::test(core_threads=4)]
|
||||
#[tokio::test(core_threads = 4)]
|
||||
async fn singlenode() -> Result<()> {
|
||||
fixtures::init_tracing();
|
||||
|
||||
|
|
|
@ -20,7 +20,7 @@ use fixtures::RaftRouter;
|
|||
/// after the config change is committed.
|
||||
///
|
||||
/// RUST_LOG=async_raft,memstore,stepdown=trace cargo test -p async-raft --test stepdown
|
||||
#[tokio::test(core_threads=5)]
|
||||
#[tokio::test(core_threads = 5)]
|
||||
async fn stepdown() -> Result<()> {
|
||||
fixtures::init_tracing();
|
||||
|
||||
|
@ -50,14 +50,35 @@ async fn stepdown() -> Result<()> {
|
|||
|
||||
// Assert on the state of the old leader.
|
||||
{
|
||||
let metrics = router.latest_metrics().await.into_iter().find(|node| node.id == 0)
|
||||
let metrics = router
|
||||
.latest_metrics()
|
||||
.await
|
||||
.into_iter()
|
||||
.find(|node| node.id == 0)
|
||||
.expect("expected to find metrics on original leader node");
|
||||
let cfg = metrics.membership_config;
|
||||
assert!(metrics.state != State::Leader, "expected old leader to have stepped down");
|
||||
assert_eq!(metrics.current_term, 1, "expected old leader to still be in first term, got {}", metrics.current_term);
|
||||
assert_eq!(metrics.last_log_index, 3, "expected old leader to have last log index of 3, got {}", metrics.last_log_index);
|
||||
assert_eq!(metrics.last_applied, 3, "expected old leader to have last applied of 3, got {}", metrics.last_applied);
|
||||
assert_eq!(cfg.members, hashset![1, 2, 3], "expected old leader to have membership of [1, 2, 3], got {:?}", cfg.members);
|
||||
assert_eq!(
|
||||
metrics.current_term, 1,
|
||||
"expected old leader to still be in first term, got {}",
|
||||
metrics.current_term
|
||||
);
|
||||
assert_eq!(
|
||||
metrics.last_log_index, 3,
|
||||
"expected old leader to have last log index of 3, got {}",
|
||||
metrics.last_log_index
|
||||
);
|
||||
assert_eq!(
|
||||
metrics.last_applied, 3,
|
||||
"expected old leader to have last applied of 3, got {}",
|
||||
metrics.last_applied
|
||||
);
|
||||
assert_eq!(
|
||||
cfg.members,
|
||||
hashset![1, 2, 3],
|
||||
"expected old leader to have membership of [1, 2, 3], got {:?}",
|
||||
cfg.members
|
||||
);
|
||||
assert!(cfg.members_after_consensus.is_none(), "expected old leader to be out of joint consensus");
|
||||
}
|
||||
|
||||
|
|
|
@ -1 +1,2 @@
|
|||
too-many-arguments-threshold = 10
|
||||
cognitive-complexity-threshold = 25
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
#![cfg_attr(feature="docinclude", feature(external_doc))]
|
||||
#![cfg_attr(feature="docinclude", doc(include="../README.md"))]
|
||||
#![cfg_attr(feature = "docinclude", feature(external_doc))]
|
||||
#![cfg_attr(feature = "docinclude", doc(include = "../README.md"))]
|
||||
|
||||
#[cfg(test)]
|
||||
mod test;
|
||||
|
@ -9,10 +9,10 @@ use std::io::Cursor;
|
|||
|
||||
use anyhow::Result;
|
||||
use async_raft::async_trait::async_trait;
|
||||
use async_raft::{AppData, AppDataResponse, NodeId, RaftStorage};
|
||||
use async_raft::raft::{Entry, EntryPayload, MembershipConfig};
|
||||
use async_raft::storage::{CurrentSnapshotData, HardState, InitialState};
|
||||
use serde::{Serialize, Deserialize};
|
||||
use async_raft::{AppData, AppDataResponse, NodeId, RaftStorage};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use tokio::sync::RwLock;
|
||||
use tokio::sync::{RwLockReadGuard, RwLockWriteGuard};
|
||||
|
||||
|
@ -94,23 +94,32 @@ impl MemStore {
|
|||
let sm = RwLock::new(MemStoreStateMachine::default());
|
||||
let hs = RwLock::new(None);
|
||||
let current_snapshot = RwLock::new(None);
|
||||
Self{id, log, sm, hs, current_snapshot}
|
||||
Self {
|
||||
id,
|
||||
log,
|
||||
sm,
|
||||
hs,
|
||||
current_snapshot,
|
||||
}
|
||||
}
|
||||
|
||||
/// Create a new `MemStore` instance with some existing state (for testing).
|
||||
#[cfg(test)]
|
||||
pub fn new_with_state(
|
||||
id: NodeId,
|
||||
log: BTreeMap<u64, Entry<ClientRequest>>,
|
||||
sm: MemStoreStateMachine,
|
||||
hs: Option<HardState>,
|
||||
id: NodeId, log: BTreeMap<u64, Entry<ClientRequest>>, sm: MemStoreStateMachine, hs: Option<HardState>,
|
||||
current_snapshot: Option<MemStoreSnapshot>,
|
||||
) -> Self {
|
||||
let log = RwLock::new(log);
|
||||
let sm = RwLock::new(sm);
|
||||
let hs = RwLock::new(hs);
|
||||
let current_snapshot = RwLock::new(current_snapshot);
|
||||
Self{id, log, sm, hs, current_snapshot}
|
||||
Self {
|
||||
id,
|
||||
log,
|
||||
sm,
|
||||
hs,
|
||||
current_snapshot,
|
||||
}
|
||||
}
|
||||
|
||||
/// Get a handle to the log for testing purposes.
|
||||
|
@ -133,7 +142,7 @@ impl MemStore {
|
|||
impl RaftStorage<ClientRequest, ClientResponse> for MemStore {
|
||||
type Snapshot = Cursor<Vec<u8>>;
|
||||
|
||||
#[tracing::instrument(level="trace", skip(self))]
|
||||
#[tracing::instrument(level = "trace", skip(self))]
|
||||
async fn get_membership_config(&self) -> Result<MembershipConfig> {
|
||||
let log = self.log.read().await;
|
||||
let cfg_opt = log.values().rev().find_map(|entry| match &entry.payload {
|
||||
|
@ -147,7 +156,7 @@ impl RaftStorage<ClientRequest, ClientResponse> for MemStore {
|
|||
})
|
||||
}
|
||||
|
||||
#[tracing::instrument(level="trace", skip(self))]
|
||||
#[tracing::instrument(level = "trace", skip(self))]
|
||||
async fn get_initial_state(&self) -> Result<InitialState> {
|
||||
let membership = self.get_membership_config().await?;
|
||||
let mut hs = self.hs.write().await;
|
||||
|
@ -160,7 +169,7 @@ impl RaftStorage<ClientRequest, ClientResponse> for MemStore {
|
|||
None => (0, 0),
|
||||
};
|
||||
let last_applied_log = sm.last_applied_log;
|
||||
return Ok(InitialState{
|
||||
return Ok(InitialState {
|
||||
last_log_index,
|
||||
last_log_term,
|
||||
last_applied_log,
|
||||
|
@ -176,13 +185,13 @@ impl RaftStorage<ClientRequest, ClientResponse> for MemStore {
|
|||
}
|
||||
}
|
||||
|
||||
#[tracing::instrument(level="trace", skip(self, hs))]
|
||||
#[tracing::instrument(level = "trace", skip(self, hs))]
|
||||
async fn save_hard_state(&self, hs: &HardState) -> Result<()> {
|
||||
*self.hs.write().await = Some(hs.clone());
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tracing::instrument(level="trace", skip(self))]
|
||||
#[tracing::instrument(level = "trace", skip(self))]
|
||||
async fn get_log_entries(&self, start: u64, stop: u64) -> Result<Vec<Entry<ClientRequest>>> {
|
||||
// Invalid request, return empty vec.
|
||||
if start > stop {
|
||||
|
@ -193,7 +202,7 @@ impl RaftStorage<ClientRequest, ClientResponse> for MemStore {
|
|||
Ok(log.range(start..stop).map(|(_, val)| val.clone()).collect())
|
||||
}
|
||||
|
||||
#[tracing::instrument(level="trace", skip(self))]
|
||||
#[tracing::instrument(level = "trace", skip(self))]
|
||||
async fn delete_logs_from(&self, start: u64, stop: Option<u64>) -> Result<()> {
|
||||
if stop.as_ref().map(|stop| &start > stop).unwrap_or(false) {
|
||||
tracing::error!("invalid request, start > stop");
|
||||
|
@ -213,14 +222,14 @@ impl RaftStorage<ClientRequest, ClientResponse> for MemStore {
|
|||
Ok(())
|
||||
}
|
||||
|
||||
#[tracing::instrument(level="trace", skip(self, entry))]
|
||||
#[tracing::instrument(level = "trace", skip(self, entry))]
|
||||
async fn append_entry_to_log(&self, entry: &Entry<ClientRequest>) -> Result<()> {
|
||||
let mut log = self.log.write().await;
|
||||
log.insert(entry.index, entry.clone());
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tracing::instrument(level="trace", skip(self, entries))]
|
||||
#[tracing::instrument(level = "trace", skip(self, entries))]
|
||||
async fn replicate_to_log(&self, entries: &[Entry<ClientRequest>]) -> Result<()> {
|
||||
let mut log = self.log.write().await;
|
||||
for entry in entries {
|
||||
|
@ -229,13 +238,13 @@ impl RaftStorage<ClientRequest, ClientResponse> for MemStore {
|
|||
Ok(())
|
||||
}
|
||||
|
||||
#[tracing::instrument(level="trace", skip(self, data))]
|
||||
#[tracing::instrument(level = "trace", skip(self, data))]
|
||||
async fn apply_entry_to_state_machine(&self, index: &u64, data: &ClientRequest) -> Result<ClientResponse> {
|
||||
let mut sm = self.sm.write().await;
|
||||
sm.last_applied_log = *index;
|
||||
if let Some((serial, res)) = sm.client_serial_responses.get(&data.client) {
|
||||
if serial == &data.serial {
|
||||
return Ok(ClientResponse(Ok(res.clone())))
|
||||
return Ok(ClientResponse(Ok(res.clone())));
|
||||
}
|
||||
}
|
||||
let previous = sm.client_status.insert(data.client.clone(), data.status.clone());
|
||||
|
@ -243,7 +252,7 @@ impl RaftStorage<ClientRequest, ClientResponse> for MemStore {
|
|||
Ok(ClientResponse(Ok(previous)))
|
||||
}
|
||||
|
||||
#[tracing::instrument(level="trace", skip(self, entries))]
|
||||
#[tracing::instrument(level = "trace", skip(self, entries))]
|
||||
async fn replicate_to_state_machine(&self, entries: &[(&u64, &ClientRequest)]) -> Result<()> {
|
||||
let mut sm = self.sm.write().await;
|
||||
for (index, data) in entries {
|
||||
|
@ -259,7 +268,7 @@ impl RaftStorage<ClientRequest, ClientResponse> for MemStore {
|
|||
Ok(())
|
||||
}
|
||||
|
||||
#[tracing::instrument(level="trace", skip(self))]
|
||||
#[tracing::instrument(level = "trace", skip(self))]
|
||||
async fn do_log_compaction(&self, through: u64) -> Result<CurrentSnapshotData<Self::Snapshot>> {
|
||||
let data;
|
||||
{
|
||||
|
@ -272,7 +281,9 @@ impl RaftStorage<ClientRequest, ClientResponse> for MemStore {
|
|||
{
|
||||
// Go backwards through the log to find the most recent membership config <= the `through` index.
|
||||
let log = self.log.read().await;
|
||||
membership_config = log.values().rev()
|
||||
membership_config = log
|
||||
.values()
|
||||
.rev()
|
||||
.skip_while(|entry| entry.index > through)
|
||||
.find_map(|entry| match &entry.payload {
|
||||
EntryPayload::ConfigChange(cfg) => Some(cfg.membership.clone()),
|
||||
|
@ -286,30 +297,42 @@ impl RaftStorage<ClientRequest, ClientResponse> for MemStore {
|
|||
{
|
||||
let mut log = self.log.write().await;
|
||||
let mut current_snapshot = self.current_snapshot.write().await;
|
||||
term = log.get(&through).map(|entry| entry.term).ok_or_else(|| anyhow::anyhow!(ERR_INCONSISTENT_LOG))?;
|
||||
term = log
|
||||
.get(&through)
|
||||
.map(|entry| entry.term)
|
||||
.ok_or_else(|| anyhow::anyhow!(ERR_INCONSISTENT_LOG))?;
|
||||
*log = log.split_off(&through);
|
||||
log.insert(through, Entry::new_snapshot_pointer(through, term, "".into(), membership_config.clone()));
|
||||
|
||||
let snapshot = MemStoreSnapshot{index: through, term, membership: membership_config.clone(), data};
|
||||
let snapshot = MemStoreSnapshot {
|
||||
index: through,
|
||||
term,
|
||||
membership: membership_config.clone(),
|
||||
data,
|
||||
};
|
||||
snapshot_bytes = serde_json::to_vec(&snapshot)?;
|
||||
*current_snapshot = Some(snapshot);
|
||||
} // Release log & snapshot write locks.
|
||||
|
||||
tracing::trace!({snapshot_size=snapshot_bytes.len()}, "log compaction complete");
|
||||
Ok(CurrentSnapshotData{
|
||||
term, index: through, membership: membership_config.clone(),
|
||||
tracing::trace!({ snapshot_size = snapshot_bytes.len() }, "log compaction complete");
|
||||
Ok(CurrentSnapshotData {
|
||||
term,
|
||||
index: through,
|
||||
membership: membership_config.clone(),
|
||||
snapshot: Box::new(Cursor::new(snapshot_bytes)),
|
||||
})
|
||||
}
|
||||
|
||||
#[tracing::instrument(level="trace", skip(self))]
|
||||
#[tracing::instrument(level = "trace", skip(self))]
|
||||
async fn create_snapshot(&self) -> Result<(String, Box<Self::Snapshot>)> {
|
||||
Ok((String::from(""), Box::new(Cursor::new(Vec::new())))) // Snapshot IDs are insignificant to this storage engine.
|
||||
}
|
||||
|
||||
#[tracing::instrument(level="trace", skip(self, snapshot))]
|
||||
async fn finalize_snapshot_installation(&self, index: u64, term: u64, delete_through: Option<u64>, id: String, snapshot: Box<Self::Snapshot>) -> Result<()> {
|
||||
tracing::trace!({snapshot_size=snapshot.get_ref().len()}, "decoding snapshot for installation");
|
||||
#[tracing::instrument(level = "trace", skip(self, snapshot))]
|
||||
async fn finalize_snapshot_installation(
|
||||
&self, index: u64, term: u64, delete_through: Option<u64>, id: String, snapshot: Box<Self::Snapshot>,
|
||||
) -> Result<()> {
|
||||
tracing::trace!({ snapshot_size = snapshot.get_ref().len() }, "decoding snapshot for installation");
|
||||
let raw = serde_json::to_string_pretty(snapshot.get_ref().as_slice())?;
|
||||
println!("JSON SNAP:\n{}", raw);
|
||||
let new_snapshot: MemStoreSnapshot = serde_json::from_slice(snapshot.get_ref().as_slice())?;
|
||||
|
@ -317,7 +340,9 @@ impl RaftStorage<ClientRequest, ClientResponse> for MemStore {
|
|||
{
|
||||
// Go backwards through the log to find the most recent membership config <= the `through` index.
|
||||
let mut log = self.log.write().await;
|
||||
let membership_config = log.values().rev()
|
||||
let membership_config = log
|
||||
.values()
|
||||
.rev()
|
||||
.skip_while(|entry| entry.index > index)
|
||||
.find_map(|entry| match &entry.payload {
|
||||
EntryPayload::ConfigChange(cfg) => Some(cfg.membership.clone()),
|
||||
|
@ -347,12 +372,12 @@ impl RaftStorage<ClientRequest, ClientResponse> for MemStore {
|
|||
Ok(())
|
||||
}
|
||||
|
||||
#[tracing::instrument(level="trace", skip(self))]
|
||||
#[tracing::instrument(level = "trace", skip(self))]
|
||||
async fn get_current_snapshot(&self) -> Result<Option<CurrentSnapshotData<Self::Snapshot>>> {
|
||||
match &*self.current_snapshot.read().await {
|
||||
Some(snapshot) => {
|
||||
let reader = serde_json::to_vec(&snapshot)?;
|
||||
Ok(Some(CurrentSnapshotData{
|
||||
Ok(Some(CurrentSnapshotData {
|
||||
index: snapshot.index,
|
||||
term: snapshot.term,
|
||||
membership: snapshot.membership.clone(),
|
||||
|
|
|
@ -13,7 +13,10 @@ async fn test_get_membership_config_default() -> Result<()> {
|
|||
let store = MemStore::new(NODE_ID);
|
||||
let membership = store.get_membership_config().await?;
|
||||
assert_eq!(membership.members.len(), 1, "expected members len of 1");
|
||||
assert!(membership.members_after_consensus.is_none(), "expected None for default members_after_consensus");
|
||||
assert!(
|
||||
membership.members_after_consensus.is_none(),
|
||||
"expected None for default members_after_consensus"
|
||||
);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
|
@ -24,11 +27,24 @@ async fn test_get_membership_config_with_previous_state() -> Result<()> {
|
|||
members.insert(1);
|
||||
members.insert(2);
|
||||
members.insert(3);
|
||||
log.insert(1, Entry{term: 1, index: 1, payload: EntryPayload::ConfigChange(EntryConfigChange{
|
||||
membership: MembershipConfig{members: members.clone(), members_after_consensus: None}
|
||||
})});
|
||||
log.insert(
|
||||
1,
|
||||
Entry {
|
||||
term: 1,
|
||||
index: 1,
|
||||
payload: EntryPayload::ConfigChange(EntryConfigChange {
|
||||
membership: MembershipConfig {
|
||||
members: members.clone(),
|
||||
members_after_consensus: None,
|
||||
},
|
||||
}),
|
||||
},
|
||||
);
|
||||
let sm = MemStoreStateMachine::default();
|
||||
let hs = HardState{current_term: 1, voted_for: Some(NODE_ID)};
|
||||
let hs = HardState {
|
||||
current_term: 1,
|
||||
voted_for: Some(NODE_ID),
|
||||
};
|
||||
let store = MemStore::new_with_state(NODE_ID, log, sm, Some(hs.clone()), None);
|
||||
|
||||
let initial = store.get_membership_config().await?;
|
||||
|
@ -44,7 +60,10 @@ async fn test_get_membership_config_with_previous_state() -> Result<()> {
|
|||
#[tokio::test]
|
||||
async fn test_get_initial_state_default() -> Result<()> {
|
||||
let store = MemStore::new(NODE_ID);
|
||||
let expected_hs = HardState{current_term: 0, voted_for: None};
|
||||
let expected_hs = HardState {
|
||||
current_term: 0,
|
||||
voted_for: None,
|
||||
};
|
||||
let expected_membership = MembershipConfig::new_initial(NODE_ID);
|
||||
|
||||
let initial = store.get_initial_state().await?;
|
||||
|
@ -60,10 +79,20 @@ async fn test_get_initial_state_default() -> Result<()> {
|
|||
#[tokio::test]
|
||||
async fn test_get_initial_state_with_previous_state() -> Result<()> {
|
||||
let mut log = BTreeMap::new();
|
||||
log.insert(1, Entry{term: 1, index: 1, payload: EntryPayload::Blank});
|
||||
log.insert(
|
||||
1,
|
||||
Entry {
|
||||
term: 1,
|
||||
index: 1,
|
||||
payload: EntryPayload::Blank,
|
||||
},
|
||||
);
|
||||
let mut sm = MemStoreStateMachine::default();
|
||||
sm.last_applied_log = 1; // Just stubbed in for testing.
|
||||
let hs = HardState{current_term: 1, voted_for: Some(NODE_ID)};
|
||||
let hs = HardState {
|
||||
current_term: 1,
|
||||
voted_for: Some(NODE_ID),
|
||||
};
|
||||
let store = MemStore::new_with_state(NODE_ID, log, sm, Some(hs.clone()), None);
|
||||
|
||||
let initial = store.get_initial_state().await?;
|
||||
|
@ -81,13 +110,19 @@ async fn test_get_initial_state_with_previous_state() -> Result<()> {
|
|||
#[tokio::test]
|
||||
async fn test_save_hard_state() -> Result<()> {
|
||||
let store = MemStore::new(NODE_ID);
|
||||
let new_hs = HardState{current_term: 100, voted_for: Some(NODE_ID)};
|
||||
let new_hs = HardState {
|
||||
current_term: 100,
|
||||
voted_for: Some(NODE_ID),
|
||||
};
|
||||
|
||||
let initial = store.get_initial_state().await?;
|
||||
store.save_hard_state(&new_hs).await?;
|
||||
let post = store.get_initial_state().await?;
|
||||
|
||||
assert_ne!(initial.hard_state, post.hard_state, "hard state was expected to be different after update");
|
||||
assert_ne!(
|
||||
initial.hard_state, post.hard_state,
|
||||
"hard state was expected to be different after update"
|
||||
);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
|
@ -173,7 +208,13 @@ async fn test_delete_logs_from_deletes_only_target_logs() -> Result<()> {
|
|||
async fn test_append_entry_to_log() -> Result<()> {
|
||||
let store = default_store_with_logs();
|
||||
|
||||
store.append_entry_to_log(&Entry{term: 2, index: 10, payload: EntryPayload::Blank}).await?;
|
||||
store
|
||||
.append_entry_to_log(&Entry {
|
||||
term: 2,
|
||||
index: 10,
|
||||
payload: EntryPayload::Blank,
|
||||
})
|
||||
.await?;
|
||||
let log = store.get_log().await;
|
||||
|
||||
assert_eq!(log.len(), 10, "expected 10 entries to exist in the log");
|
||||
|
@ -189,7 +230,13 @@ async fn test_append_entry_to_log() -> Result<()> {
|
|||
async fn test_replicate_to_log() -> Result<()> {
|
||||
let store = default_store_with_logs();
|
||||
|
||||
store.replicate_to_log(&[Entry{term: 1, index: 11, payload: EntryPayload::Blank}]).await?;
|
||||
store
|
||||
.replicate_to_log(&[Entry {
|
||||
term: 1,
|
||||
index: 11,
|
||||
payload: EntryPayload::Blank,
|
||||
}])
|
||||
.await?;
|
||||
let log = store.get_log().await;
|
||||
|
||||
assert_eq!(log.len(), 11, "expected 11 entries to exist in the log");
|
||||
|
@ -205,11 +252,23 @@ async fn test_replicate_to_log() -> Result<()> {
|
|||
async fn test_apply_entry_to_state_machine() -> Result<()> {
|
||||
let store = default_store_with_logs();
|
||||
|
||||
store.apply_entry_to_state_machine(&1, &ClientRequest{client: "0".into(), serial: 0, status: "lit".into()}).await?;
|
||||
store
|
||||
.apply_entry_to_state_machine(
|
||||
&1,
|
||||
&ClientRequest {
|
||||
client: "0".into(),
|
||||
serial: 0,
|
||||
status: "lit".into(),
|
||||
},
|
||||
)
|
||||
.await?;
|
||||
let sm = store.get_state_machine().await;
|
||||
|
||||
assert_eq!(sm.last_applied_log, 1, "expected last_applied_log to be 1, got {}", sm.last_applied_log);
|
||||
let client_serial = sm.client_serial_responses.get("0").expect("expected entry to exist in client_serial_responses");
|
||||
let client_serial = sm
|
||||
.client_serial_responses
|
||||
.get("0")
|
||||
.expect("expected entry to exist in client_serial_responses");
|
||||
assert_eq!(client_serial.0, 0, "unexpected client serial response");
|
||||
assert_eq!(client_serial.1, None, "unexpected client serial response");
|
||||
let client_status = sm.client_status.get("0").expect("expected entry to exist in client_status");
|
||||
|
@ -224,22 +283,36 @@ async fn test_apply_entry_to_state_machine() -> Result<()> {
|
|||
async fn test_replicate_to_state_machine() -> Result<()> {
|
||||
let store = default_store_with_logs();
|
||||
|
||||
let req0 = ClientRequest{client: "1".into(), serial: 0, status: "old".into()};
|
||||
let req1 = ClientRequest{client: "1".into(), serial: 1, status: "new".into()};
|
||||
let req2 = ClientRequest{client: "2".into(), serial: 0, status: "other".into()};
|
||||
let entries = vec![
|
||||
(&1u64, &req0),
|
||||
(&2u64, &req1),
|
||||
(&3u64, &req2),
|
||||
];
|
||||
let req0 = ClientRequest {
|
||||
client: "1".into(),
|
||||
serial: 0,
|
||||
status: "old".into(),
|
||||
};
|
||||
let req1 = ClientRequest {
|
||||
client: "1".into(),
|
||||
serial: 1,
|
||||
status: "new".into(),
|
||||
};
|
||||
let req2 = ClientRequest {
|
||||
client: "2".into(),
|
||||
serial: 0,
|
||||
status: "other".into(),
|
||||
};
|
||||
let entries = vec![(&1u64, &req0), (&2u64, &req1), (&3u64, &req2)];
|
||||
store.replicate_to_state_machine(&entries).await?;
|
||||
let sm = store.get_state_machine().await;
|
||||
|
||||
assert_eq!(sm.last_applied_log, 3, "expected last_applied_log to be 3, got {}", sm.last_applied_log);
|
||||
let client_serial1 = sm.client_serial_responses.get("1").expect("expected entry to exist in client_serial_responses for client 1");
|
||||
let client_serial1 = sm
|
||||
.client_serial_responses
|
||||
.get("1")
|
||||
.expect("expected entry to exist in client_serial_responses for client 1");
|
||||
assert_eq!(client_serial1.0, 1, "unexpected client serial response");
|
||||
assert_eq!(client_serial1.1, Some(String::from("old")), "unexpected client serial response");
|
||||
let client_serial2 = sm.client_serial_responses.get("2").expect("expected entry to exist in client_serial_responses for client 2");
|
||||
let client_serial2 = sm
|
||||
.client_serial_responses
|
||||
.get("2")
|
||||
.expect("expected entry to exist in client_serial_responses for client 2");
|
||||
assert_eq!(client_serial2.0, 0, "unexpected client serial response");
|
||||
assert_eq!(client_serial2.1, None, "unexpected client serial response");
|
||||
let client_status1 = sm.client_status.get("1").expect("expected entry to exist in client_status for client 1");
|
||||
|
@ -254,17 +327,90 @@ async fn test_replicate_to_state_machine() -> Result<()> {
|
|||
|
||||
fn default_store_with_logs() -> MemStore {
|
||||
let mut log = BTreeMap::new();
|
||||
log.insert(1, Entry{term: 1, index: 1, payload: EntryPayload::Blank});
|
||||
log.insert(2, Entry{term: 1, index: 2, payload: EntryPayload::Blank});
|
||||
log.insert(3, Entry{term: 1, index: 3, payload: EntryPayload::Blank});
|
||||
log.insert(4, Entry{term: 1, index: 4, payload: EntryPayload::Blank});
|
||||
log.insert(5, Entry{term: 1, index: 5, payload: EntryPayload::Blank});
|
||||
log.insert(6, Entry{term: 1, index: 6, payload: EntryPayload::Blank});
|
||||
log.insert(7, Entry{term: 1, index: 7, payload: EntryPayload::Blank});
|
||||
log.insert(8, Entry{term: 1, index: 8, payload: EntryPayload::Blank});
|
||||
log.insert(9, Entry{term: 1, index: 9, payload: EntryPayload::Blank});
|
||||
log.insert(10, Entry{term: 1, index: 10, payload: EntryPayload::Blank});
|
||||
log.insert(
|
||||
1,
|
||||
Entry {
|
||||
term: 1,
|
||||
index: 1,
|
||||
payload: EntryPayload::Blank,
|
||||
},
|
||||
);
|
||||
log.insert(
|
||||
2,
|
||||
Entry {
|
||||
term: 1,
|
||||
index: 2,
|
||||
payload: EntryPayload::Blank,
|
||||
},
|
||||
);
|
||||
log.insert(
|
||||
3,
|
||||
Entry {
|
||||
term: 1,
|
||||
index: 3,
|
||||
payload: EntryPayload::Blank,
|
||||
},
|
||||
);
|
||||
log.insert(
|
||||
4,
|
||||
Entry {
|
||||
term: 1,
|
||||
index: 4,
|
||||
payload: EntryPayload::Blank,
|
||||
},
|
||||
);
|
||||
log.insert(
|
||||
5,
|
||||
Entry {
|
||||
term: 1,
|
||||
index: 5,
|
||||
payload: EntryPayload::Blank,
|
||||
},
|
||||
);
|
||||
log.insert(
|
||||
6,
|
||||
Entry {
|
||||
term: 1,
|
||||
index: 6,
|
||||
payload: EntryPayload::Blank,
|
||||
},
|
||||
);
|
||||
log.insert(
|
||||
7,
|
||||
Entry {
|
||||
term: 1,
|
||||
index: 7,
|
||||
payload: EntryPayload::Blank,
|
||||
},
|
||||
);
|
||||
log.insert(
|
||||
8,
|
||||
Entry {
|
||||
term: 1,
|
||||
index: 8,
|
||||
payload: EntryPayload::Blank,
|
||||
},
|
||||
);
|
||||
log.insert(
|
||||
9,
|
||||
Entry {
|
||||
term: 1,
|
||||
index: 9,
|
||||
payload: EntryPayload::Blank,
|
||||
},
|
||||
);
|
||||
log.insert(
|
||||
10,
|
||||
Entry {
|
||||
term: 1,
|
||||
index: 10,
|
||||
payload: EntryPayload::Blank,
|
||||
},
|
||||
);
|
||||
let sm = MemStoreStateMachine::default();
|
||||
let hs = HardState{current_term: 1, voted_for: Some(NODE_ID)};
|
||||
MemStore::new_with_state(NODE_ID, log, sm, Some(hs.clone()), None)
|
||||
let hs = HardState {
|
||||
current_term: 1,
|
||||
voted_for: Some(NODE_ID),
|
||||
};
|
||||
MemStore::new_with_state(NODE_ID, log, sm, Some(hs), None)
|
||||
}
|
||||
|
|
|
@ -0,0 +1,27 @@
|
|||
unstable_features = true
|
||||
edition = "2018"
|
||||
|
||||
comment_width = 100
|
||||
fn_args_layout = "Compressed"
|
||||
max_width = 150
|
||||
use_small_heuristics = "Default"
|
||||
use_try_shorthand = true
|
||||
|
||||
# pre-unstable
|
||||
chain_width = 75
|
||||
single_line_if_else_max_width = 75
|
||||
space_around_attr_eq = false
|
||||
struct_lit_width = 50
|
||||
|
||||
# unstable
|
||||
condense_wildcard_suffixes = true
|
||||
format_code_in_doc_comments = true
|
||||
format_strings = true
|
||||
match_block_trailing_comma = false
|
||||
normalize_comments = true
|
||||
normalize_doc_attributes = true
|
||||
reorder_impl_items = true
|
||||
struct_lit_single_line = true
|
||||
trailing_comma = "Vertical"
|
||||
use_field_init_shorthand = true
|
||||
wrap_comments = true
|
Loading…
Reference in New Issue