From eaab2a6373909962fb66f24cd1a2de5063dd6c0e Mon Sep 17 00:00:00 2001 From: Joakim Hernberg Date: Thu, 30 Jun 2016 21:39:22 +0200 Subject: bump to 4.4.12_rt19-1 --- ...pletion-simple-wait-code_Nvidia-RT-160319.patch | 75 ++++++++++++++++++++++ 1 file changed, 75 insertions(+) create mode 100644 fix-race-in-PRT-wait-for-completion-simple-wait-code_Nvidia-RT-160319.patch (limited to 'fix-race-in-PRT-wait-for-completion-simple-wait-code_Nvidia-RT-160319.patch') diff --git a/fix-race-in-PRT-wait-for-completion-simple-wait-code_Nvidia-RT-160319.patch b/fix-race-in-PRT-wait-for-completion-simple-wait-code_Nvidia-RT-160319.patch new file mode 100644 index 0000000..7e9e9c8 --- /dev/null +++ b/fix-race-in-PRT-wait-for-completion-simple-wait-code_Nvidia-RT-160319.patch @@ -0,0 +1,75 @@ +From: Joakim Hernberg +Date: Sat, 19 Mar 2016 13:03:55 +0100 +Subject: [PATCH] Fix a race in the PRT wait for completion simple wait code + for NVIDIA on the rt patch + +-Note refactored again 160319. + +-NOTE: this patch is a rebase of John Blackwood's patch. On his kernel, he must be using +-an older simple wait patch - as his applies to kernel/sched/core.c, while the simple wait +-completion code lives in kernel/sched/completion.c ... I have ported this to test with +-nvidia, as i would like to see if it fixes the semaphore issues i have seen. + +-I've kept the original patch comment in tact; + +I'm not 100% sure that the patch below will fix your problem, but we +saw something that sounds pretty familiar to your issue involving the +nvidia driver and the preempt-rt patch. The nvidia driver uses the +completion support to create their own driver's notion of an internally +used semaphore. + +Fix a race in the PRT wait for completion simple wait code. + +A wait_for_completion() waiter task can be awoken by a task calling +complete(), but fail to consume the 'done' completion resource if it +looses a race with another task calling wait_for_completion() just as +it is waking up. + +In this case, the awoken task will call schedule_timeout() again +without being in the simple wait queue. + +So if the awoken task is unable to claim the 'done' completion resource, +check to see if it needs to be re-inserted into the wait list before +waiting again in schedule_timeout(). + +Fix-by: John Blackwood + +--- + kernel/sched/completion.c | 9 +++++++++ + 1 file changed, 9 insertions(+) + +diff --git a/kernel/sched/completion.c b/kernel/sched/completion.c +index b62cf64..c01bbd8 100644 +--- a/kernel/sched/completion.c ++++ b/kernel/sched/completion.c +@@ -61,11 +61,19 @@ static inline long __sched + do_wait_for_common(struct completion *x, + long (*action)(long), long timeout, int state) + { ++ int again = 0; ++ + if (!x->done) { + DECLARE_SWAITQUEUE(wait); + + __prepare_to_swait(&x->wait, &wait); + do { ++ /* Check to see if we lost race for 'done' and are ++ * no longer in the wait list. ++ */ ++ if (unlikely(again) && list_empty(&wait.task_list)) ++ __prepare_to_swait(&x->wait, &wait); ++ + if (signal_pending_state(state, current)) { + timeout = -ERESTARTSYS; + break; +@@ -74,6 +82,7 @@ do_wait_for_common(struct completion *x, + raw_spin_unlock_irq(&x->wait.lock); + timeout = action(timeout); + raw_spin_lock_irq(&x->wait.lock); ++ again = 1; + } while (!x->done && timeout); + __finish_swait(&x->wait, &wait); + if (!x->done) +-- +2.7.2 + -- cgit v1.2.3