Skip to content

std::sync::mpsc::Receiver::try_recv can block forever if sending thread is blocked #112723

Open
@benhansen-io

Description

@benhansen-io

I tried this code:

use std::{
    sync::{mpsc, Arc},
    time::{Duration, Instant},
};

use thread_priority::ThreadPriority;

fn main() {
    const PINNED_CORE: usize = 2;

    let (sender, receiver) = mpsc::channel::<usize>();

    std::thread::Builder::new()
        .name("sending".to_owned())
        .spawn(move || {
            thread_priority::set_current_thread_priority(ThreadPriority::Min).unwrap();
            core_affinity::set_for_current(core_affinity::CoreId { id: PINNED_CORE });

            loop {
                sender.send(42).unwrap();
            }
        })
        .unwrap();

    let num_received = Arc::new(std::sync::atomic::AtomicUsize::new(0));

    std::thread::Builder::new()
        .name("receiving".to_owned())
        .spawn({
            let num_received = num_received.clone();
            move || {
                thread_priority::set_current_thread_priority(ThreadPriority::Max).unwrap();
                core_affinity::set_for_current(core_affinity::CoreId { id: PINNED_CORE });

                loop {
                    let start = Instant::now();
                    let try_receive_result = receiver.try_recv();
                    let elapsed = start.elapsed();
                    if elapsed > Duration::from_secs(1) {
                        println!("try_recv blocked for {:.2} seconds", elapsed.as_secs_f32());
                    }
                    match try_receive_result {
                        Ok(_) => {
                            num_received.fetch_add(1, std::sync::atomic::Ordering::SeqCst);
                        }
                        Err(mpsc::TryRecvError::Empty) => {
                            std::thread::sleep(Duration::from_millis(200));
                        }
                        Err(mpsc::TryRecvError::Disconnected) => unreachable!(),
                    }
                }
            }
        })
        .unwrap();

    loop {
        std::thread::sleep(Duration::from_millis(500));
        println!(
            "Receiving thread has received {}",
            num_received.load(std::sync::atomic::Ordering::SeqCst)
        )
    }
}

(full crate code available at https://github.com/benhansen-io/mpsc_deadlock_reproducer)

Based on the following documentation:

This method will never block the caller in order to wait for data to become available. Instead, this will always return immediately with a possible option of pending data on the channel.

I would not expect try_recv to ever block but the ouput shows lines such as:

Receiving thread has received 740113466                                                                                                 
Receiving thread has received 740113466                                                                                                 
Receiving thread has received 740113466                                                                                                 
try_recv blocked for 26.77 seconds                                                                                                      
Receiving thread has received 740354709                                                                                                 
Receiving thread has received 747929297                                                                                                 
Receiving thread has received 754959588

When a deadlock is happening I get the following backtraces:

Backtrace of the sending thread:

0  core::sync::atomic::atomic_or<usize> (dst=0x7fffec243ac8, val=1, order=<optimized out>) at /rustc/90c541806f23a127002de5b4038be731ba1458ca/library/core/src/sync/atomic.rs:3329
#1  core::sync::atomic::AtomicUsize::fetch_or (self=0x7fffec243ac8, val=1, order=<optimized out>) at /rustc/90c541806f23a127002de5b4038be731ba1458ca/library/core/src/sync/atomic.rs:2645
#2  std::sync::mpmc::list::Channel<usize>::write<usize> (self=0x5555555d4c00, token=0x7ffff7c8c910, msg=42) at /rustc/90c541806f23a127002de5b4038be731ba1458ca/library/std/src/sync/mpmc/list.rs:270
#3  0x0000555555572949 in std::sync::mpmc::list::Channel<usize>::send<usize> (self=0x5555555d4c00, msg=42, _deadline=...) at /rustc/90c541806f23a127002de5b4038be731ba1458ca/library/std/src/sync/mpmc/list.rs:403
#4  0x0000555555570015 in std::sync::mpmc::Sender<usize>::send<usize> (self=0x7ffff7c8cb10, msg=42) at /rustc/90c541806f23a127002de5b4038be731ba1458ca/library/std/src/sync/mpmc/mod.rs:128
#5  0x0000555555566ad3 in std::sync::mpsc::Sender<usize>::send<usize> (self=0x7ffff7c8cb10, t=42) at /rustc/90c541806f23a127002de5b4038be731ba1458ca/library/std/src/sync/mpsc/mod.rs:614
#6  0x0000555555573e8a in mpsc_deadlock_reproducer::main::{closure#0} () at src/main.rs:20
#7  0x000055555556ac23 in std::sys_common::backtrace::__rust_begin_short_backtrace<mpsc_deadlock_reproducer::main::{closure_env#0}, ()> (f=...) at /rustc/90c541806f23a127002de5b4038be731ba1458ca/library/std/src/sys_common/backtrace.rs:134
#8  0x000055555556ef63 in std::thread::{impl#0}::spawn_unchecked_::{closure#1}::{closure#0}<mpsc_deadlock_reproducer::main::{closure_env#0}, ()> () at /rustc/90c541806f23a127002de5b4038be731ba1458ca/library/std/src/thread/mod.rs:526
#9  0x0000555555572d93 in core::panic::unwind_safe::{impl#23}::call_once<(), std::thread::{impl#0}::spawn_unchecked_::{closure#1}::{closure_env#0}<mpsc_deadlock_reproducer::main::{closure_env#0}, ()>> (self=...)
    at /rustc/90c541806f23a127002de5b4038be731ba1458ca/library/core/src/panic/unwind_safe.rs:271
#10 0x0000555555564d86 in std::panicking::try::do_call<core::panic::unwind_safe::AssertUnwindSafe<std::thread::{impl#0}::spawn_unchecked_::{closure#1}::{closure_env#0}<mpsc_deadlock_reproducer::main::{closure_env#0}, ()>>, ()> (data=0x7ffff7c8cc90)
    at /rustc/90c541806f23a127002de5b4038be731ba1458ca/library/std/src/panicking.rs:485
#11 0x00005555555650fb in __rust_try ()
#12 0x0000555555564b0e in std::panicking::try<(), core::panic::unwind_safe::AssertUnwindSafe<std::thread::{impl#0}::spawn_unchecked_::{closure#1}::{closure_env#0}<mpsc_deadlock_reproducer::main::{closure_env#0}, ()>>> (f=...)
    at /rustc/90c541806f23a127002de5b4038be731ba1458ca/library/std/src/panicking.rs:449
#13 0x0000555555565343 in std::panic::catch_unwind<core::panic::unwind_safe::AssertUnwindSafe<std::thread::{impl#0}::spawn_unchecked_::{closure#1}::{closure_env#0}<mpsc_deadlock_reproducer::main::{closure_env#0}, ()>>, ()> (f=...)
    at /rustc/90c541806f23a127002de5b4038be731ba1458ca/library/std/src/panic.rs:140
#14 0x000055555556e9bc in std::thread::{impl#0}::spawn_unchecked_::{closure#1}<mpsc_deadlock_reproducer::main::{closure_env#0}, ()> () at /rustc/90c541806f23a127002de5b4038be731ba1458ca/library/std/src/thread/mod.rs:525
#15 0x00005555555611ee in core::ops::function::FnOnce::call_once<std::thread::{impl#0}::spawn_unchecked_::{closure_env#1}<mpsc_deadlock_reproducer::main::{closure_env#0}, ()>, ()> () at /rustc/90c541806f23a127002de5b4038be731ba1458ca/library/core/src/ops/function.rs:250
#16 0x0000555555597085 in alloc::boxed::{impl#45}::call_once<(), dyn core::ops::function::FnOnce<(), Output=()>, alloc::alloc::Global> () at library/alloc/src/boxed.rs:1973
#17 alloc::boxed::{impl#45}::call_once<(), alloc::boxed::Box<dyn core::ops::function::FnOnce<(), Output=()>, alloc::alloc::Global>, alloc::alloc::Global> () at library/alloc/src/boxed.rs:1973
#18 std::sys::unix::thread::{impl#2}::new::thread_start () at library/std/src/sys/unix/thread.rs:108
#19 0x00007ffff7d1844b in ?? () from /usr/lib/libc.so.6
#20 0x00007ffff7d9be40 in ?? () from /usr/lib/libc.so.6

Backtrace of the receiving thread:

#0  0x00007ffff7d804fb in sched_yield () from /usr/lib/libc.so.6
#1  0x0000555555570586 in std::sync::mpmc::utils::Backoff::spin_heavy (self=0x7ffff7a8b664)
    at /rustc/90c541806f23a127002de5b4038be731ba1458ca/library/std/src/sync/mpmc/utils.rs:130
#2  0x0000555555570a7f in std::sync::mpmc::list::Slot<usize>::wait_write<usize> (self=0x7fffec243ac0)
    at /rustc/90c541806f23a127002de5b4038be731ba1458ca/library/std/src/sync/mpmc/list.rs:49
#3  0x000055555557266a in std::sync::mpmc::list::Channel<usize>::read<usize> (self=0x5555555d4c00, token=0x7ffff7a8b800)
    at /rustc/90c541806f23a127002de5b4038be731ba1458ca/library/std/src/sync/mpmc/list.rs:373
#4  0x0000555555572cbc in std::sync::mpmc::list::Channel<usize>::try_recv<usize> (self=0x5555555d4c00)
    at /rustc/90c541806f23a127002de5b4038be731ba1458ca/library/std/src/sync/mpmc/list.rs:411
#5  0x00005555555701ba in std::sync::mpmc::Receiver<usize>::try_recv<usize> (self=0x7ffff7a8bba0)
    at /rustc/90c541806f23a127002de5b4038be731ba1458ca/library/std/src/sync/mpmc/mod.rs:290
#6  0x0000555555566af6 in std::sync::mpsc::Receiver<usize>::try_recv<usize> (self=0x7ffff7a8bba0)
    at /rustc/90c541806f23a127002de5b4038be731ba1458ca/library/std/src/sync/mpsc/mod.rs:801
#7  0x0000555555573f9c in mpsc_deadlock_reproducer::main::{closure#1} () at src/main.rs:37
#8  0x000055555556ac59 in std::sys_common::backtrace::__rust_begin_short_backtrace<mpsc_deadlock_reproducer::main::{closure_env#1}, ()>
    (f=...) at /rustc/90c541806f23a127002de5b4038be731ba1458ca/library/std/src/sys_common/backtrace.rs:134
#9  0x000055555556ef45 in std::thread::{impl#0}::spawn_unchecked_::{closure#1}::{closure#0}<mpsc_deadlock_reproducer::main::{closure_env#1}, ()> () at /rustc/90c541806f23a127002de5b4038be731ba1458ca/library/std/src/thread/mod.rs:526
#10 0x0000555555572d58 in core::panic::unwind_safe::{impl#23}::call_once<(), std::thread::{impl#0}::spawn_unchecked_::{closure#1}::{closure_env#0}<mpsc_deadlock_reproducer::main::{closure_env#1}, ()>> (self=...)
    at /rustc/90c541806f23a127002de5b4038be731ba1458ca/library/core/src/panic/unwind_safe.rs:271
#11 0x0000555555564dfe in std::panicking::try::do_call<core::panic::unwind_safe::AssertUnwindSafe<std::thread::{impl#0}::spawn_unchecked_::{closure#1}::{closure_env#0}<mpsc_deadlock_reproducer::main::{closure_env#1}, ()>>, ()> (data=0x7ffff7a8bc70)
    at /rustc/90c541806f23a127002de5b4038be731ba1458ca/library/std/src/panicking.rs:485
#12 0x00005555555650fb in __rust_try ()
#13 0x0000555555564bae in std::panicking::try<(), core::panic::unwind_safe::AssertUnwindSafe<std::thread::{impl#0}::spawn_unchecked_::{closure#1}::{closure_env#0}<mpsc_deadlock_reproducer::main::{closure_env#1}, ()>>> (f=...)
    at /rustc/90c541806f23a127002de5b4038be731ba1458ca/library/std/src/panicking.rs:449
#14 0x0000555555565366 in std::panic::catch_unwind<core::panic::unwind_safe::AssertUnwindSafe<std::thread::{impl#0}::spawn_unchecked_::{closure#1}::{closure_env#0}<mpsc_deadlock_reproducer::main::{closure_env#1}, ()>>, ()> (f=...)

try_recv calling read which calls wait_write thus causing try_recv to wait on the sender seems fundamentally wrong.

Meta

rustc --version --verbose:

rustc 1.70.0 (90c541806 2023-05-31)
binary: rustc
commit-hash: 90c541806f23a127002de5b4038be731ba1458ca
commit-date: 2023-05-31
host: x86_64-unknown-linux-gnu
release: 1.70.0
LLVM version: 16.0.2

Metadata

Metadata

Assignees

No one assigned

    Labels

    A-concurrencyArea: ConcurrencyC-bugCategory: This is a bug.T-libsRelevant to the library team, which will review and decide on the PR/issue.

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions