Re: deadlocks with intr NFS mounts and ^Z (or: PCATCH and sleepable locks)
- From: Kostik Belousov <kostikbel@xxxxxxxxx>
- Date: Sat, 20 Jun 2009 19:15:40 +0300
On Fri, Jun 19, 2009 at 06:23:28PM +0200, Jilles Tjoelker wrote:
I have been having trouble with deadlocks with NFS mounts for a while,
and I have found at least one way it can deadlock. It seems an issue
with the sleep/lock system.
NFS sleeps while holding a lockmgr lock, waiting for a reply from the
server. When the mount is set intr, this is an interruptible sleep, so
that interrupting signals can abort the sleep. However, this also means
that SIGSTOP etc will suspend the thread without waking it up first, so
it will be suspended with a lock held.
If it holds the wrong locks, it is possible that the shell will not be
able to run, and the process cannot be continued in the normal manner.
Due to some other things I do not understand, it is then possible that
the process cannot be continued at all (SIGCONT seems ignored), but in
simple cases SIGCONT works, and things go back to normal.
In any case, this situation is undesirable, as even 'umount -f' doesn't
work while the thread is suspended.
Of course, this reasoning applies to any code that goes to sleep
interruptibly while holding a lock (sx or lockmgr). Is this supposed to
be possible (likely useful)? If so, a third type of sleep would be
needed that is interrupted by signals but not suspended? If not,
something should check that it doesn't happen and NFS intr mounts may
need to check for signals periodically or find a way to avoid sleeping
with locks held.
The td_locks field is only accessible for the current thread, so it
cannot be used to check if suspending is safe.
Also, making SIGSTOP and the like interrupt/restart syscalls is not
acceptable unless you find some way to do it such that userland won't
notice. For example, a read of 10 megabytes from a regular file with
that much available must not return less then 10 megabytes.
Note that NFS does check for the signals during i/o, so you may get
short reads anyway.
I do think that the right solution both there and with SINGLE_NO_EXIT
case for thread_single is to stop at the usermode boundary instead of
suspending a thread in the interruptible sleep state.
I set error code returned from interrupted msleep() to ERESTART,
that seems to be the right thing, at least to restart the i/o that
transferred no data upon receiving SIGSTOP.
My current patch is below. It contains some not strictly related
changes, e.g. for wakeup().
diff --git a/sys/kern/kern_sig.c b/sys/kern/kern_sig.c
index 5c1d553..28f4f4f 100644
--- a/sys/kern/kern_sig.c
+++ b/sys/kern/kern_sig.c
@@ -2310,18 +2310,22 @@ static void
sig_suspend_threads(struct thread *td, struct proc *p, int sending)
{
struct thread *td2;
+ int wakeup_swapper;
PROC_LOCK_ASSERT(p, MA_OWNED);
PROC_SLOCK_ASSERT(p, MA_OWNED);
+ wakeup_swapper = 0;
FOREACH_THREAD_IN_PROC(p, td2) {
thread_lock(td2);
td2->td_flags |= TDF_ASTPENDING | TDF_NEEDSUSPCHK;
if ((TD_IS_SLEEPING(td2) || TD_IS_SWAPPED(td2)) &&
- (td2->td_flags & TDF_SINTR) &&
- !TD_IS_SUSPENDED(td2)) {
- thread_suspend_one(td2);
- } else {
+ (td2->td_flags & TDF_SINTR)) {
+ if (TD_IS_SUSPENDED(td2))
+ wakeup_swapper |= thread_unsuspend_one(td2);
+ if (TD_ON_SLEEPQ(td2) && (td2->td_flags & TDF_SINTR))
+ wakeup_swapper |= sleepq_abort(td2, ERESTART);
+ } else if (!TD_IS_SUSPENDED(td2)) {
if (sending || td != td2)
td2->td_flags |= TDF_ASTPENDING;
#ifdef SMP
@@ -2331,6 +2335,8 @@ sig_suspend_threads(struct thread *td, struct proc *p, int sending)
}
thread_unlock(td2);
}
+ if (wakeup_swapper)
+ kick_proc0();
}
int
diff --git a/sys/kern/kern_synch.c b/sys/kern/kern_synch.c
index b91c1a5..d27d027 100644
--- a/sys/kern/kern_synch.c
+++ b/sys/kern/kern_synch.c
@@ -344,11 +344,16 @@ wakeup(void *ident)
{
int wakeup_swapper;
+ repeat:
sleepq_lock(ident);
wakeup_swapper = sleepq_broadcast(ident, SLEEPQ_SLEEP, 0, 0);
sleepq_release(ident);
- if (wakeup_swapper)
- kick_proc0();
+ if (wakeup_swapper) {
+ if (ident == &proc0)
+ goto repeat;
+ else
+ kick_proc0();
+ }
}
/*
@@ -361,11 +366,16 @@ wakeup_one(void *ident)
{
int wakeup_swapper;
+ repeat:
sleepq_lock(ident);
wakeup_swapper = sleepq_signal(ident, SLEEPQ_SLEEP, 0, 0);
sleepq_release(ident);
- if (wakeup_swapper)
- kick_proc0();
+ if (wakeup_swapper) {
+ if (ident == &proc0)
+ goto repeat;
+ else
+ kick_proc0();
+ }
}
static void
diff --git a/sys/kern/kern_thread.c b/sys/kern/kern_thread.c
index bb8779b..800a1d1 100644
--- a/sys/kern/kern_thread.c
+++ b/sys/kern/kern_thread.c
@@ -504,6 +504,22 @@ thread_unlink(struct thread *td)
/* Must NOT clear links to proc! */
}
+static int
+recalc_remaining(struct proc *p, int mode)
+{
+ int remaining;
+
+ if (mode == SINGLE_EXIT)
+ remaining = p->p_numthreads;
+ else if (mode == SINGLE_BOUNDARY)
+ remaining = p->p_numthreads - p->p_boundary_count;
+ else if (mode == SINGLE_NO_EXIT)
+ remaining = p->p_numthreads - p->p_suspcount;
+ else
+ panic("recalc_remaining: wrong mode %d", mode);
+ return (remaining);
+}
+
/*
* Enforce single-threading.
*
@@ -551,12 +567,7 @@ thread_single(int mode)
p->p_flag |= P_STOPPED_SINGLE;
PROC_SLOCK(p);
p->p_singlethread = td;
- if (mode == SINGLE_EXIT)
- remaining = p->p_numthreads;
- else if (mode == SINGLE_BOUNDARY)
- remaining = p->p_numthreads - p->p_boundary_count;
- else
- remaining = p->p_numthreads - p->p_suspcount;
+ remaining = recalc_remaining(p, mode);
while (remaining != 1) {
if (P_SHOULDSTOP(p) != P_STOPPED_SINGLE)
goto stopme;
@@ -587,18 +598,17 @@ thread_single(int mode)
wakeup_swapper |=
sleepq_abort(td2, ERESTART);
break;
+ case SINGLE_NO_EXIT:
+ if (TD_IS_SUSPENDED(td2) &&
+ !(td2->td_flags & TDF_BOUNDARY))
+ wakeup_swapper |=
+ thread_unsuspend_one(td2);
+ if (TD_ON_SLEEPQ(td2) &&
+ (td2->td_flags & TDF_SINTR))
+ wakeup_swapper |=
+ sleepq_abort(td2, ERESTART);
+ break;
default:
- if (TD_IS_SUSPENDED(td2)) {
- thread_unlock(td2);
- continue;
- }
- /*
- * maybe other inhibited states too?
- */
- if ((td2->td_flags & TDF_SINTR) &&
- (td2->td_inhibitors &
- (TDI_SLEEPING | TDI_SWAPPED)))
- thread_suspend_one(td2);
break;
}
}
@@ -611,12 +621,7 @@ thread_single(int mode)
}
if (wakeup_swapper)
kick_proc0();
- if (mode == SINGLE_EXIT)
- remaining = p->p_numthreads;
- else if (mode == SINGLE_BOUNDARY)
- remaining = p->p_numthreads - p->p_boundary_count;
- else
- remaining = p->p_numthreads - p->p_suspcount;
+ remaining = recalc_remaining(p, mode);
/*
* Maybe we suspended some threads.. was it enough?
@@ -630,12 +635,7 @@ stopme:
* In the mean time we suspend as well.
*/
thread_suspend_switch(td);
- if (mode == SINGLE_EXIT)
- remaining = p->p_numthreads;
- else if (mode == SINGLE_BOUNDARY)
- remaining = p->p_numthreads - p->p_boundary_count;
- else
- remaining = p->p_numthreads - p->p_suspcount;
+ remaining = recalc_remaining(p, mode);
}
if (mode == SINGLE_EXIT) {
/*
Attachment:
pgp0PPqVi0M6T.pgp
Description: PGP signature
- Follow-Ups:
- Re: deadlocks with intr NFS mounts and ^Z (or: PCATCH and sleepable locks)
- From: Jilles Tjoelker
- Re: deadlocks with intr NFS mounts and ^Z (or: PCATCH and sleepable locks)
- References:
- deadlocks with intr NFS mounts and ^Z (or: PCATCH and sleepable locks)
- From: Jilles Tjoelker
- deadlocks with intr NFS mounts and ^Z (or: PCATCH and sleepable locks)
- Prev by Date: Re: deadlocks with intr NFS mounts and ^Z (or: PCATCH and sleepable locks)
- Next by Date: Re: deadlocks with intr NFS mounts and ^Z (or: PCATCH and sleepable locks)
- Previous by thread: Re: deadlocks with intr NFS mounts and ^Z (or: PCATCH and sleepable locks)
- Next by thread: Re: deadlocks with intr NFS mounts and ^Z (or: PCATCH and sleepable locks)
- Index(es):
Relevant Pages
|