bugfix: Use inline assembly in full_fence
This commit bumps the MSRV to 1.59
This commit is contained in:
parent
2d309371f8
commit
cbdf9e88e1
|
@ -57,7 +57,7 @@ jobs:
|
|||
matrix:
|
||||
# When updating this, the reminder to update the minimum supported
|
||||
# Rust version in Cargo.toml.
|
||||
rust: ['1.38']
|
||||
rust: ['1.59']
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- name: Install Rust
|
||||
|
|
|
@ -10,7 +10,7 @@ authors = [
|
|||
"John Nunley <jtnunley01@gmail.com>"
|
||||
]
|
||||
edition = "2018"
|
||||
rust-version = "1.38"
|
||||
rust-version = "1.59"
|
||||
description = "Concurrent multi-producer multi-consumer queue"
|
||||
license = "Apache-2.0 OR MIT"
|
||||
repository = "https://github.com/smol-rs/concurrent-queue"
|
||||
|
|
35
src/lib.rs
35
src/lib.rs
|
@ -59,7 +59,7 @@ extern crate std;
|
|||
|
||||
use alloc::boxed::Box;
|
||||
use core::fmt;
|
||||
use sync::atomic::{self, AtomicUsize, Ordering};
|
||||
use sync::atomic::{self, Ordering};
|
||||
|
||||
#[cfg(feature = "std")]
|
||||
use std::error;
|
||||
|
@ -538,28 +538,31 @@ impl<T> fmt::Display for PushError<T> {
|
|||
/// Equivalent to `atomic::fence(Ordering::SeqCst)`, but in some cases faster.
|
||||
#[inline]
|
||||
fn full_fence() {
|
||||
if cfg!(all(
|
||||
any(target_arch = "x86", target_arch = "x86_64"),
|
||||
not(miri),
|
||||
not(loom)
|
||||
)) {
|
||||
#[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), not(miri), not(loom)))]
|
||||
{
|
||||
use core::{arch::asm, cell::UnsafeCell};
|
||||
// HACK(stjepang): On x86 architectures there are two different ways of executing
|
||||
// a `SeqCst` fence.
|
||||
//
|
||||
// 1. `atomic::fence(SeqCst)`, which compiles into a `mfence` instruction.
|
||||
// 2. `_.compare_exchange(_, _, SeqCst, SeqCst)`, which compiles into a `lock cmpxchg` instruction.
|
||||
// 2. A `lock <op>` instruction.
|
||||
//
|
||||
// Both instructions have the effect of a full barrier, but empirical benchmarks have shown
|
||||
// that the second one is sometimes a bit faster.
|
||||
//
|
||||
// The ideal solution here would be to use inline assembly, but we're instead creating a
|
||||
// temporary atomic variable and compare-and-exchanging its value. No sane compiler to
|
||||
// x86 platforms is going to optimize this away.
|
||||
atomic::compiler_fence(Ordering::SeqCst);
|
||||
let a = AtomicUsize::new(0);
|
||||
let _ = a.compare_exchange(0, 1, Ordering::SeqCst, Ordering::SeqCst);
|
||||
atomic::compiler_fence(Ordering::SeqCst);
|
||||
} else {
|
||||
let a = UnsafeCell::new(0_usize);
|
||||
// It is common to use `lock or` here, but when using a local variable, `lock not`, which
|
||||
// does not change the flag, should be slightly more efficient.
|
||||
// Refs: https://www.felixcloutier.com/x86/not
|
||||
unsafe {
|
||||
#[cfg(target_pointer_width = "64")]
|
||||
asm!("lock not qword ptr [{0}]", in(reg) a.get(), options(nostack, preserves_flags));
|
||||
#[cfg(target_pointer_width = "32")]
|
||||
asm!("lock not dword ptr [{0:e}]", in(reg) a.get(), options(nostack, preserves_flags));
|
||||
}
|
||||
return;
|
||||
}
|
||||
#[allow(unreachable_code)]
|
||||
{
|
||||
atomic::fence(Ordering::SeqCst);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -33,7 +33,6 @@ mod sync_impl {
|
|||
pub(crate) use loom::cell;
|
||||
|
||||
pub(crate) mod atomic {
|
||||
pub(crate) use core::sync::atomic::compiler_fence;
|
||||
pub(crate) use loom::sync::atomic::*;
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue