Update project TODOs and get things into place for proper fsync management

2024-02-18 08:22:16 +01:00 · 2024-02-18 08:22:16 +01:00 · d6ed26edc5
parent bd65e8e0c4
commit d6ed26edc5
3 changed files with 42 additions and 4 deletions
--- a/src/event_verifier.rs
+++ b/src/event_verifier.rs
@ -87,6 +87,7 @@ impl EventVerifier {
            if !dirty_epochs.is_empty() {
                println!("{object_id:?} was paged out while having dirty epochs {dirty_epochs:?}");
                self.print_debug_history_for_object(object_id);
+                println!("{state:?} {epoch:?} {at}");
                println!("invalid object state transition");
                std::process::abort();
            }
--- a/src/heap.rs
+++ b/src/heap.rs
@ -100,14 +100,17 @@ const SLAB_SIZES: [usize; N_SLABS] = [
    6442450944,
    8589934592,
    12884901888,
-    17179869184,
+    17_179_869_184, // 17gb is max page size as-of now
 ];

 #[derive(Default, Debug, Copy, Clone)]
 pub struct WriteBatchStats {
    pub heap_bytes_written: u64,
    pub heap_files_written_to: u64,
+    /// Latency inclusive of fsync
    pub heap_write_latency: Duration,
+    /// Latency for fsyncing files
+    pub heap_sync_latency: Duration,
    pub metadata_bytes_written: u64,
    pub metadata_write_latency: Duration,
    pub truncated_files: u64,
@ -135,6 +138,9 @@ impl WriteBatchStats {
            heap_write_latency: self
                .heap_write_latency
                .max(other.heap_write_latency),
+            heap_sync_latency: self
+                .heap_sync_latency
+                .max(other.heap_sync_latency),
            metadata_bytes_written: self
                .metadata_bytes_written
                .max(other.metadata_bytes_written),
@ -159,6 +165,9 @@ impl WriteBatchStats {
            heap_write_latency: self
                .heap_write_latency
                .add(other.heap_write_latency),
+            heap_sync_latency: self
+                .heap_sync_latency
+                .add(other.heap_sync_latency),
            metadata_bytes_written: self
                .metadata_bytes_written
                .add(other.metadata_bytes_written),
@ -579,6 +588,10 @@ struct Slab {
 }

 impl Slab {
+    fn sync(&self) -> io::Result<()> {
+        self.file.sync_all()
+    }
+
    fn read(
        &self,
        slot: u64,
@ -890,6 +903,12 @@ impl Heap {
        let metadata_batch_res: io::Result<Vec<UpdateMetadata>> =
            batch.into_par_iter().map(map_closure).collect();

+        let before_heap_sync = Instant::now();
+
+        // TODO fsync dirty slabs here
+
+        let heap_sync_latency = before_heap_sync.elapsed();
+
        let heap_write_latency = before_heap_write.elapsed();

        let metadata_batch = match metadata_batch_res {
@ -1000,6 +1019,7 @@ impl Heap {
                .load(Ordering::Acquire)
                .count_ones() as u64,
            heap_write_latency,
+            heap_sync_latency,
            metadata_bytes_written,
            metadata_write_latency,
            truncated_files,
--- a/src/lib.rs
+++ b/src/lib.rs
@ -1,13 +1,29 @@
 // 1.0 blockers
 //
 // bugs
-// * object_cache violation of flush responsibility for second read of expected cooperative serialization. leaf in question's dirty_flush_epoch is Some(FlushEpoch(3)), our expected key was (FlushEpoch(2), ObjectId(263)). node.deleted: None
 // * tree predecessor holds lock on successor and tries to get it for predecessor. This will
 //   deadlock if used concurrently with write batches, which acquire locks lexicographically.
 //   * add merges to iterator test and assert it deadlocks
 //   * alternative is to merge right, not left
-// * concurrent bug with node hi key overshooting successor's low key
-//   * possibly related to merge?
+// * page-out needs to be deferred until after any flush of the dirty epoch
+//   * can't send reliable page-out request backwards from 7->6
+//   * re-locking every mutex in a writebatch feels bad
+//   * need to signal stability status forward
+//     * maybe we already are
+//   * can make dirty_flush_epoch atomic and CAS it to 0 after flush
+//   * can change dirty_flush_epoch to unflushed_epoch
+//   * can always set mutation_count to max dirty flush epoch
+//     * this feels nice, we can lazily update a global stable flushed counter
+//     * can get rid of dirty_flush_epoch and page_out_on_flush?
+//     * or at least dirty_flush_epoch
+//   * dirty_flush_epoch really means "hasn't yet been cooperatively serialized @ F.E."
+//   * interesting metrics:
+//     * whether dirty for some epoch
+//     * whether cooperatively serialized for some epoch
+//     * whether fully flushed for some epoch
+//     * clean -> dirty -> {maybe coop} -> flushed
+//   * for page-out, we only care if it's stable or if we need to add it to
+//     a page-out priority queue
 //
 // reliability
 // TODO make all writes wrapped in a Tearable wrapper that splits writes
@ -21,6 +37,7 @@
 // performance
 // TODO handle prefix encoding
 // TODO (minor) remove cache access for removed node in merge function
+// TODO index+log hybrid - tinylsm key -> object location
 //
 // features
 // TODO multi-collection batch