You've already forked linux-apfs
mirror of
https://github.com/linux-apfs/linux-apfs.git
synced 2026-05-01 15:00:59 -07:00
Merge branches 'doc.2017.01.15b', 'dyntick.2017.01.23a', 'fixes.2017.01.23a', 'srcu.2017.01.25a' and 'torture.2017.01.15b' into HEAD
doc.2017.01.15b: Documentation updates dyntick.2017.01.23a: Dyntick tracking consolidation fixes.2017.01.23a: Miscellaneous fixes srcu.2017.01.25a: SRCU rewrite, fixes, and verification torture.2017.01.15b: Torture-test updates
This commit is contained in:
@@ -237,7 +237,7 @@ o "ktl" is the low-order 16 bits (in hexadecimal) of the count of
|
||||
|
||||
The output of "cat rcu/rcu_preempt/rcuexp" looks as follows:
|
||||
|
||||
s=21872 wd1=0 wd2=0 wd3=5 n=0 enq=0 sc=21872
|
||||
s=21872 wd1=0 wd2=0 wd3=5 enq=0 sc=21872
|
||||
|
||||
These fields are as follows:
|
||||
|
||||
@@ -249,9 +249,6 @@ o "wd1", "wd2", and "wd3" are the number of times that an attempt
|
||||
completed an expedited grace period that satisfies the attempted
|
||||
request. "Our work is done."
|
||||
|
||||
o "n" is number of times that a concurrent CPU-hotplug operation
|
||||
forced a fallback to a normal grace period.
|
||||
|
||||
o "enq" is the number of quiescent states still outstanding.
|
||||
|
||||
o "sc" is the number of times that the attempt to start a
|
||||
|
||||
+19
-14
@@ -3,28 +3,33 @@
|
||||
/*
|
||||
* Lock-less NULL terminated single linked list
|
||||
*
|
||||
* If there are multiple producers and multiple consumers, llist_add
|
||||
* can be used in producers and llist_del_all can be used in
|
||||
* consumers. They can work simultaneously without lock. But
|
||||
* llist_del_first can not be used here. Because llist_del_first
|
||||
* depends on list->first->next does not changed if list->first is not
|
||||
* changed during its operation, but llist_del_first, llist_add,
|
||||
* llist_add (or llist_del_all, llist_add, llist_add) sequence in
|
||||
* another consumer may violate that.
|
||||
* Cases where locking is not needed:
|
||||
* If there are multiple producers and multiple consumers, llist_add can be
|
||||
* used in producers and llist_del_all can be used in consumers simultaneously
|
||||
* without locking. Also a single consumer can use llist_del_first while
|
||||
* multiple producers simultaneously use llist_add, without any locking.
|
||||
*
|
||||
* If there are multiple producers and one consumer, llist_add can be
|
||||
* used in producers and llist_del_all or llist_del_first can be used
|
||||
* in the consumer.
|
||||
* Cases where locking is needed:
|
||||
* If we have multiple consumers with llist_del_first used in one consumer, and
|
||||
* llist_del_first or llist_del_all used in other consumers, then a lock is
|
||||
* needed. This is because llist_del_first depends on list->first->next not
|
||||
* changing, but without lock protection, there's no way to be sure about that
|
||||
* if a preemption happens in the middle of the delete operation and on being
|
||||
* preempted back, the list->first is the same as before causing the cmpxchg in
|
||||
* llist_del_first to succeed. For example, while a llist_del_first operation
|
||||
* is in progress in one consumer, then a llist_del_first, llist_add,
|
||||
* llist_add (or llist_del_all, llist_add, llist_add) sequence in another
|
||||
* consumer may cause violations.
|
||||
*
|
||||
* This can be summarized as follow:
|
||||
* This can be summarized as follows:
|
||||
*
|
||||
* | add | del_first | del_all
|
||||
* add | - | - | -
|
||||
* del_first | | L | L
|
||||
* del_all | | | -
|
||||
*
|
||||
* Where "-" stands for no lock is needed, while "L" stands for lock
|
||||
* is needed.
|
||||
* Where, a particular row's operation can happen concurrently with a column's
|
||||
* operation, with "-" being no lock needed, while "L" being lock is needed.
|
||||
*
|
||||
* The list entries deleted via llist_del_all can be traversed with
|
||||
* traversing function such as llist_for_each etc. But the list
|
||||
|
||||
@@ -1161,5 +1161,17 @@ do { \
|
||||
ftrace_dump(oops_dump_mode); \
|
||||
} while (0)
|
||||
|
||||
/*
|
||||
* Place this after a lock-acquisition primitive to guarantee that
|
||||
* an UNLOCK+LOCK pair acts as a full barrier. This guarantee applies
|
||||
* if the UNLOCK and LOCK are executed by the same CPU or if the
|
||||
* UNLOCK and LOCK operate on the same lock variable.
|
||||
*/
|
||||
#ifdef CONFIG_PPC
|
||||
#define smp_mb__after_unlock_lock() smp_mb() /* Full ordering for lock. */
|
||||
#else /* #ifdef CONFIG_PPC */
|
||||
#define smp_mb__after_unlock_lock() do { } while (0)
|
||||
#endif /* #else #ifdef CONFIG_PPC */
|
||||
|
||||
|
||||
#endif /* __LINUX_RCUPDATE_H */
|
||||
|
||||
@@ -27,6 +27,12 @@
|
||||
|
||||
#include <linux/cache.h>
|
||||
|
||||
struct rcu_dynticks;
|
||||
static inline int rcu_dynticks_snap(struct rcu_dynticks *rdtp)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline unsigned long get_state_synchronize_rcu(void)
|
||||
{
|
||||
return 0;
|
||||
|
||||
@@ -33,9 +33,9 @@
|
||||
#include <linux/rcupdate.h>
|
||||
#include <linux/workqueue.h>
|
||||
|
||||
struct srcu_struct_array {
|
||||
unsigned long c[2];
|
||||
unsigned long seq[2];
|
||||
struct srcu_array {
|
||||
unsigned long lock_count[2];
|
||||
unsigned long unlock_count[2];
|
||||
};
|
||||
|
||||
struct rcu_batch {
|
||||
@@ -46,7 +46,7 @@ struct rcu_batch {
|
||||
|
||||
struct srcu_struct {
|
||||
unsigned long completed;
|
||||
struct srcu_struct_array __percpu *per_cpu_ref;
|
||||
struct srcu_array __percpu *per_cpu_ref;
|
||||
spinlock_t queue_lock; /* protect ->batch_queue, ->running */
|
||||
bool running;
|
||||
/* callbacks just queued */
|
||||
@@ -118,7 +118,7 @@ void process_srcu(struct work_struct *work);
|
||||
* See include/linux/percpu-defs.h for the rules on per-CPU variables.
|
||||
*/
|
||||
#define __DEFINE_SRCU(name, is_static) \
|
||||
static DEFINE_PER_CPU(struct srcu_struct_array, name##_srcu_array);\
|
||||
static DEFINE_PER_CPU(struct srcu_array, name##_srcu_array);\
|
||||
is_static struct srcu_struct name = __SRCU_STRUCT_INIT(name)
|
||||
#define DEFINE_SRCU(name) __DEFINE_SRCU(name, /* not static */)
|
||||
#define DEFINE_STATIC_SRCU(name) __DEFINE_SRCU(name, static)
|
||||
|
||||
@@ -385,11 +385,11 @@ TRACE_EVENT(rcu_quiescent_state_report,
|
||||
|
||||
/*
|
||||
* Tracepoint for quiescent states detected by force_quiescent_state().
|
||||
* These trace events include the type of RCU, the grace-period number
|
||||
* that was blocked by the CPU, the CPU itself, and the type of quiescent
|
||||
* state, which can be "dti" for dyntick-idle mode, "ofl" for CPU offline,
|
||||
* or "kick" when kicking a CPU that has been in dyntick-idle mode for
|
||||
* too long.
|
||||
* These trace events include the type of RCU, the grace-period number that
|
||||
* was blocked by the CPU, the CPU itself, and the type of quiescent state,
|
||||
* which can be "dti" for dyntick-idle mode, "ofl" for CPU offline, "kick"
|
||||
* when kicking a CPU that has been in dyntick-idle mode for too long, or
|
||||
* "rqc" if the CPU got a quiescent state via its rcu_qs_ctr.
|
||||
*/
|
||||
TRACE_EVENT(rcu_fqs,
|
||||
|
||||
|
||||
@@ -529,7 +529,6 @@ config SRCU
|
||||
config TASKS_RCU
|
||||
bool
|
||||
default n
|
||||
depends on !UML
|
||||
select SRCU
|
||||
help
|
||||
This option enables a task-based RCU implementation that uses
|
||||
@@ -781,19 +780,6 @@ config RCU_NOCB_CPU_ALL
|
||||
|
||||
endchoice
|
||||
|
||||
config RCU_EXPEDITE_BOOT
|
||||
bool
|
||||
default n
|
||||
help
|
||||
This option enables expedited grace periods at boot time,
|
||||
as if rcu_expedite_gp() had been invoked early in boot.
|
||||
The corresponding rcu_unexpedite_gp() is invoked from
|
||||
rcu_end_inkernel_boot(), which is intended to be invoked
|
||||
at the end of the kernel-only boot sequence, just before
|
||||
init is exec'ed.
|
||||
|
||||
Accept the default if unsure.
|
||||
|
||||
endmenu # "RCU Subsystem"
|
||||
|
||||
config BUILD_BIN2C
|
||||
|
||||
@@ -4412,13 +4412,13 @@ void lockdep_rcu_suspicious(const char *file, const int line, const char *s)
|
||||
#endif /* #ifdef CONFIG_PROVE_RCU_REPEATEDLY */
|
||||
/* Note: the following can be executed concurrently, so be careful. */
|
||||
printk("\n");
|
||||
printk("===============================\n");
|
||||
printk("[ INFO: suspicious RCU usage. ]\n");
|
||||
pr_err("===============================\n");
|
||||
pr_err("[ ERR: suspicious RCU usage. ]\n");
|
||||
print_kernel_ident();
|
||||
printk("-------------------------------\n");
|
||||
printk("%s:%d %s!\n", file, line, s);
|
||||
printk("\nother info that might help us debug this:\n\n");
|
||||
printk("\n%srcu_scheduler_active = %d, debug_locks = %d\n",
|
||||
pr_err("-------------------------------\n");
|
||||
pr_err("%s:%d %s!\n", file, line, s);
|
||||
pr_err("\nother info that might help us debug this:\n\n");
|
||||
pr_err("\n%srcu_scheduler_active = %d, debug_locks = %d\n",
|
||||
!rcu_lockdep_current_cpu_online()
|
||||
? "RCU used illegally from offline CPU!\n"
|
||||
: !rcu_is_watching()
|
||||
|
||||
@@ -780,6 +780,10 @@ static void lock_torture_cleanup(void)
|
||||
else
|
||||
lock_torture_print_module_parms(cxt.cur_ops,
|
||||
"End of test: SUCCESS");
|
||||
|
||||
kfree(cxt.lwsa);
|
||||
kfree(cxt.lrsa);
|
||||
|
||||
end:
|
||||
torture_cleanup_end();
|
||||
}
|
||||
@@ -924,6 +928,8 @@ static int __init lock_torture_init(void)
|
||||
GFP_KERNEL);
|
||||
if (reader_tasks == NULL) {
|
||||
VERBOSE_TOROUT_ERRSTRING("reader_tasks: Out of memory");
|
||||
kfree(writer_tasks);
|
||||
writer_tasks = NULL;
|
||||
firsterr = -ENOMEM;
|
||||
goto unwind;
|
||||
}
|
||||
|
||||
@@ -16,6 +16,7 @@
|
||||
|
||||
#include <linux/syscalls.h>
|
||||
#include <linux/membarrier.h>
|
||||
#include <linux/tick.h>
|
||||
|
||||
/*
|
||||
* Bitmask made from a "or" of all commands within enum membarrier_cmd,
|
||||
@@ -51,6 +52,9 @@
|
||||
*/
|
||||
SYSCALL_DEFINE2(membarrier, int, cmd, int, flags)
|
||||
{
|
||||
/* MEMBARRIER_CMD_SHARED is not compatible with nohz_full. */
|
||||
if (tick_nohz_full_enabled())
|
||||
return -ENOSYS;
|
||||
if (unlikely(flags))
|
||||
return -EINVAL;
|
||||
switch (cmd) {
|
||||
|
||||
+17
-2
@@ -564,10 +564,25 @@ static void srcu_torture_stats(void)
|
||||
pr_alert("%s%s per-CPU(idx=%d):",
|
||||
torture_type, TORTURE_FLAG, idx);
|
||||
for_each_possible_cpu(cpu) {
|
||||
unsigned long l0, l1;
|
||||
unsigned long u0, u1;
|
||||
long c0, c1;
|
||||
struct srcu_array *counts = per_cpu_ptr(srcu_ctlp->per_cpu_ref, cpu);
|
||||
|
||||
c0 = (long)per_cpu_ptr(srcu_ctlp->per_cpu_ref, cpu)->c[!idx];
|
||||
c1 = (long)per_cpu_ptr(srcu_ctlp->per_cpu_ref, cpu)->c[idx];
|
||||
u0 = counts->unlock_count[!idx];
|
||||
u1 = counts->unlock_count[idx];
|
||||
|
||||
/*
|
||||
* Make sure that a lock is always counted if the corresponding
|
||||
* unlock is counted.
|
||||
*/
|
||||
smp_rmb();
|
||||
|
||||
l0 = counts->lock_count[!idx];
|
||||
l1 = counts->lock_count[idx];
|
||||
|
||||
c0 = l0 - u0;
|
||||
c1 = l1 - u1;
|
||||
pr_cont(" %d(%ld,%ld)", cpu, c0, c1);
|
||||
}
|
||||
pr_cont("\n");
|
||||
|
||||
+62
-81
@@ -106,7 +106,7 @@ static int init_srcu_struct_fields(struct srcu_struct *sp)
|
||||
rcu_batch_init(&sp->batch_check1);
|
||||
rcu_batch_init(&sp->batch_done);
|
||||
INIT_DELAYED_WORK(&sp->work, process_srcu);
|
||||
sp->per_cpu_ref = alloc_percpu(struct srcu_struct_array);
|
||||
sp->per_cpu_ref = alloc_percpu(struct srcu_array);
|
||||
return sp->per_cpu_ref ? 0 : -ENOMEM;
|
||||
}
|
||||
|
||||
@@ -141,114 +141,77 @@ EXPORT_SYMBOL_GPL(init_srcu_struct);
|
||||
#endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */
|
||||
|
||||
/*
|
||||
* Returns approximate total of the readers' ->seq[] values for the
|
||||
* Returns approximate total of the readers' ->lock_count[] values for the
|
||||
* rank of per-CPU counters specified by idx.
|
||||
*/
|
||||
static unsigned long srcu_readers_seq_idx(struct srcu_struct *sp, int idx)
|
||||
static unsigned long srcu_readers_lock_idx(struct srcu_struct *sp, int idx)
|
||||
{
|
||||
int cpu;
|
||||
unsigned long sum = 0;
|
||||
unsigned long t;
|
||||
|
||||
for_each_possible_cpu(cpu) {
|
||||
t = READ_ONCE(per_cpu_ptr(sp->per_cpu_ref, cpu)->seq[idx]);
|
||||
sum += t;
|
||||
struct srcu_array *cpuc = per_cpu_ptr(sp->per_cpu_ref, cpu);
|
||||
|
||||
sum += READ_ONCE(cpuc->lock_count[idx]);
|
||||
}
|
||||
return sum;
|
||||
}
|
||||
|
||||
/*
|
||||
* Returns approximate number of readers active on the specified rank
|
||||
* of the per-CPU ->c[] counters.
|
||||
* Returns approximate total of the readers' ->unlock_count[] values for the
|
||||
* rank of per-CPU counters specified by idx.
|
||||
*/
|
||||
static unsigned long srcu_readers_active_idx(struct srcu_struct *sp, int idx)
|
||||
static unsigned long srcu_readers_unlock_idx(struct srcu_struct *sp, int idx)
|
||||
{
|
||||
int cpu;
|
||||
unsigned long sum = 0;
|
||||
unsigned long t;
|
||||
|
||||
for_each_possible_cpu(cpu) {
|
||||
t = READ_ONCE(per_cpu_ptr(sp->per_cpu_ref, cpu)->c[idx]);
|
||||
sum += t;
|
||||
struct srcu_array *cpuc = per_cpu_ptr(sp->per_cpu_ref, cpu);
|
||||
|
||||
sum += READ_ONCE(cpuc->unlock_count[idx]);
|
||||
}
|
||||
return sum;
|
||||
}
|
||||
|
||||
/*
|
||||
* Return true if the number of pre-existing readers is determined to
|
||||
* be stably zero. An example unstable zero can occur if the call
|
||||
* to srcu_readers_active_idx() misses an __srcu_read_lock() increment,
|
||||
* but due to task migration, sees the corresponding __srcu_read_unlock()
|
||||
* decrement. This can happen because srcu_readers_active_idx() takes
|
||||
* time to sum the array, and might in fact be interrupted or preempted
|
||||
* partway through the summation.
|
||||
* be zero.
|
||||
*/
|
||||
static bool srcu_readers_active_idx_check(struct srcu_struct *sp, int idx)
|
||||
{
|
||||
unsigned long seq;
|
||||
unsigned long unlocks;
|
||||
|
||||
seq = srcu_readers_seq_idx(sp, idx);
|
||||
unlocks = srcu_readers_unlock_idx(sp, idx);
|
||||
|
||||
/*
|
||||
* The following smp_mb() A pairs with the smp_mb() B located in
|
||||
* __srcu_read_lock(). This pairing ensures that if an
|
||||
* __srcu_read_lock() increments its counter after the summation
|
||||
* in srcu_readers_active_idx(), then the corresponding SRCU read-side
|
||||
* critical section will see any changes made prior to the start
|
||||
* of the current SRCU grace period.
|
||||
* Make sure that a lock is always counted if the corresponding unlock
|
||||
* is counted. Needs to be a smp_mb() as the read side may contain a
|
||||
* read from a variable that is written to before the synchronize_srcu()
|
||||
* in the write side. In this case smp_mb()s A and B act like the store
|
||||
* buffering pattern.
|
||||
*
|
||||
* Also, if the above call to srcu_readers_seq_idx() saw the
|
||||
* increment of ->seq[], then the call to srcu_readers_active_idx()
|
||||
* must see the increment of ->c[].
|
||||
* This smp_mb() also pairs with smp_mb() C to prevent accesses after the
|
||||
* synchronize_srcu() from being executed before the grace period ends.
|
||||
*/
|
||||
smp_mb(); /* A */
|
||||
|
||||
/*
|
||||
* Note that srcu_readers_active_idx() can incorrectly return
|
||||
* zero even though there is a pre-existing reader throughout.
|
||||
* To see this, suppose that task A is in a very long SRCU
|
||||
* read-side critical section that started on CPU 0, and that
|
||||
* no other reader exists, so that the sum of the counters
|
||||
* is equal to one. Then suppose that task B starts executing
|
||||
* srcu_readers_active_idx(), summing up to CPU 1, and then that
|
||||
* task C starts reading on CPU 0, so that its increment is not
|
||||
* summed, but finishes reading on CPU 2, so that its decrement
|
||||
* -is- summed. Then when task B completes its sum, it will
|
||||
* incorrectly get zero, despite the fact that task A has been
|
||||
* in its SRCU read-side critical section the whole time.
|
||||
* If the locks are the same as the unlocks, then there must have
|
||||
* been no readers on this index at some time in between. This does not
|
||||
* mean that there are no more readers, as one could have read the
|
||||
* current index but not have incremented the lock counter yet.
|
||||
*
|
||||
* We therefore do a validation step should srcu_readers_active_idx()
|
||||
* return zero.
|
||||
* Possible bug: There is no guarantee that there haven't been ULONG_MAX
|
||||
* increments of ->lock_count[] since the unlocks were counted, meaning
|
||||
* that this could return true even if there are still active readers.
|
||||
* Since there are no memory barriers around srcu_flip(), the CPU is not
|
||||
* required to increment ->completed before running
|
||||
* srcu_readers_unlock_idx(), which means that there could be an
|
||||
* arbitrarily large number of critical sections that execute after
|
||||
* srcu_readers_unlock_idx() but use the old value of ->completed.
|
||||
*/
|
||||
if (srcu_readers_active_idx(sp, idx) != 0)
|
||||
return false;
|
||||
|
||||
/*
|
||||
* The remainder of this function is the validation step.
|
||||
* The following smp_mb() D pairs with the smp_mb() C in
|
||||
* __srcu_read_unlock(). If the __srcu_read_unlock() was seen
|
||||
* by srcu_readers_active_idx() above, then any destructive
|
||||
* operation performed after the grace period will happen after
|
||||
* the corresponding SRCU read-side critical section.
|
||||
*
|
||||
* Note that there can be at most NR_CPUS worth of readers using
|
||||
* the old index, which is not enough to overflow even a 32-bit
|
||||
* integer. (Yes, this does mean that systems having more than
|
||||
* a billion or so CPUs need to be 64-bit systems.) Therefore,
|
||||
* the sum of the ->seq[] counters cannot possibly overflow.
|
||||
* Therefore, the only way that the return values of the two
|
||||
* calls to srcu_readers_seq_idx() can be equal is if there were
|
||||
* no increments of the corresponding rank of ->seq[] counts
|
||||
* in the interim. But the missed-increment scenario laid out
|
||||
* above includes an increment of the ->seq[] counter by
|
||||
* the corresponding __srcu_read_lock(). Therefore, if this
|
||||
* scenario occurs, the return values from the two calls to
|
||||
* srcu_readers_seq_idx() will differ, and thus the validation
|
||||
* step below suffices.
|
||||
*/
|
||||
smp_mb(); /* D */
|
||||
|
||||
return srcu_readers_seq_idx(sp, idx) == seq;
|
||||
return srcu_readers_lock_idx(sp, idx) == unlocks;
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -266,8 +229,12 @@ static bool srcu_readers_active(struct srcu_struct *sp)
|
||||
unsigned long sum = 0;
|
||||
|
||||
for_each_possible_cpu(cpu) {
|
||||
sum += READ_ONCE(per_cpu_ptr(sp->per_cpu_ref, cpu)->c[0]);
|
||||
sum += READ_ONCE(per_cpu_ptr(sp->per_cpu_ref, cpu)->c[1]);
|
||||
struct srcu_array *cpuc = per_cpu_ptr(sp->per_cpu_ref, cpu);
|
||||
|
||||
sum += READ_ONCE(cpuc->lock_count[0]);
|
||||
sum += READ_ONCE(cpuc->lock_count[1]);
|
||||
sum -= READ_ONCE(cpuc->unlock_count[0]);
|
||||
sum -= READ_ONCE(cpuc->unlock_count[1]);
|
||||
}
|
||||
return sum;
|
||||
}
|
||||
@@ -298,9 +265,8 @@ int __srcu_read_lock(struct srcu_struct *sp)
|
||||
int idx;
|
||||
|
||||
idx = READ_ONCE(sp->completed) & 0x1;
|
||||
__this_cpu_inc(sp->per_cpu_ref->c[idx]);
|
||||
__this_cpu_inc(sp->per_cpu_ref->lock_count[idx]);
|
||||
smp_mb(); /* B */ /* Avoid leaking the critical section. */
|
||||
__this_cpu_inc(sp->per_cpu_ref->seq[idx]);
|
||||
return idx;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(__srcu_read_lock);
|
||||
@@ -314,7 +280,7 @@ EXPORT_SYMBOL_GPL(__srcu_read_lock);
|
||||
void __srcu_read_unlock(struct srcu_struct *sp, int idx)
|
||||
{
|
||||
smp_mb(); /* C */ /* Avoid leaking the critical section. */
|
||||
this_cpu_dec(sp->per_cpu_ref->c[idx]);
|
||||
this_cpu_inc(sp->per_cpu_ref->unlock_count[idx]);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(__srcu_read_unlock);
|
||||
|
||||
@@ -349,12 +315,21 @@ static bool try_check_zero(struct srcu_struct *sp, int idx, int trycount)
|
||||
|
||||
/*
|
||||
* Increment the ->completed counter so that future SRCU readers will
|
||||
* use the other rank of the ->c[] and ->seq[] arrays. This allows
|
||||
* use the other rank of the ->(un)lock_count[] arrays. This allows
|
||||
* us to wait for pre-existing readers in a starvation-free manner.
|
||||
*/
|
||||
static void srcu_flip(struct srcu_struct *sp)
|
||||
{
|
||||
sp->completed++;
|
||||
WRITE_ONCE(sp->completed, sp->completed + 1);
|
||||
|
||||
/*
|
||||
* Ensure that if the updater misses an __srcu_read_unlock()
|
||||
* increment, that task's next __srcu_read_lock() will see the
|
||||
* above counter update. Note that both this memory barrier
|
||||
* and the one in srcu_readers_active_idx_check() provide the
|
||||
* guarantee for __srcu_read_lock().
|
||||
*/
|
||||
smp_mb(); /* D */ /* Pairs with C. */
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -392,6 +367,7 @@ void call_srcu(struct srcu_struct *sp, struct rcu_head *head,
|
||||
head->next = NULL;
|
||||
head->func = func;
|
||||
spin_lock_irqsave(&sp->queue_lock, flags);
|
||||
smp_mb__after_unlock_lock(); /* Caller's prior accesses before GP. */
|
||||
rcu_batch_queue(&sp->batch_queue, head);
|
||||
if (!sp->running) {
|
||||
sp->running = true;
|
||||
@@ -425,6 +401,7 @@ static void __synchronize_srcu(struct srcu_struct *sp, int trycount)
|
||||
head->next = NULL;
|
||||
head->func = wakeme_after_rcu;
|
||||
spin_lock_irq(&sp->queue_lock);
|
||||
smp_mb__after_unlock_lock(); /* Caller's prior accesses before GP. */
|
||||
if (!sp->running) {
|
||||
/* steal the processing owner */
|
||||
sp->running = true;
|
||||
@@ -444,8 +421,11 @@ static void __synchronize_srcu(struct srcu_struct *sp, int trycount)
|
||||
spin_unlock_irq(&sp->queue_lock);
|
||||
}
|
||||
|
||||
if (!done)
|
||||
if (!done) {
|
||||
wait_for_completion(&rcu.completion);
|
||||
smp_mb(); /* Caller's later accesses after GP. */
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -613,7 +593,8 @@ static void srcu_advance_batches(struct srcu_struct *sp, int trycount)
|
||||
/*
|
||||
* Invoke a limited number of SRCU callbacks that have passed through
|
||||
* their grace period. If there are more to do, SRCU will reschedule
|
||||
* the workqueue.
|
||||
* the workqueue. Note that needed memory barriers have been executed
|
||||
* in this task's context by srcu_readers_active_idx_check().
|
||||
*/
|
||||
static void srcu_invoke_callbacks(struct srcu_struct *sp)
|
||||
{
|
||||
|
||||
@@ -41,8 +41,6 @@
|
||||
|
||||
/* Forward declarations for tiny_plugin.h. */
|
||||
struct rcu_ctrlblk;
|
||||
static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp);
|
||||
static void rcu_process_callbacks(struct softirq_action *unused);
|
||||
static void __call_rcu(struct rcu_head *head,
|
||||
rcu_callback_t func,
|
||||
struct rcu_ctrlblk *rcp);
|
||||
|
||||
+177
-85
@@ -281,6 +281,116 @@ static DEFINE_PER_CPU(struct rcu_dynticks, rcu_dynticks) = {
|
||||
#endif /* #ifdef CONFIG_NO_HZ_FULL_SYSIDLE */
|
||||
};
|
||||
|
||||
/*
|
||||
* Record entry into an extended quiescent state. This is only to be
|
||||
* called when not already in an extended quiescent state.
|
||||
*/
|
||||
static void rcu_dynticks_eqs_enter(void)
|
||||
{
|
||||
struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks);
|
||||
int special;
|
||||
|
||||
/*
|
||||
* CPUs seeing atomic_inc_return() must see prior RCU read-side
|
||||
* critical sections, and we also must force ordering with the
|
||||
* next idle sojourn.
|
||||
*/
|
||||
special = atomic_inc_return(&rdtp->dynticks);
|
||||
WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && special & 0x1);
|
||||
}
|
||||
|
||||
/*
|
||||
* Record exit from an extended quiescent state. This is only to be
|
||||
* called from an extended quiescent state.
|
||||
*/
|
||||
static void rcu_dynticks_eqs_exit(void)
|
||||
{
|
||||
struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks);
|
||||
int special;
|
||||
|
||||
/*
|
||||
* CPUs seeing atomic_inc_return() must see prior idle sojourns,
|
||||
* and we also must force ordering with the next RCU read-side
|
||||
* critical section.
|
||||
*/
|
||||
special = atomic_inc_return(&rdtp->dynticks);
|
||||
WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && !(special & 0x1));
|
||||
}
|
||||
|
||||
/*
|
||||
* Reset the current CPU's ->dynticks counter to indicate that the
|
||||
* newly onlined CPU is no longer in an extended quiescent state.
|
||||
* This will either leave the counter unchanged, or increment it
|
||||
* to the next non-quiescent value.
|
||||
*
|
||||
* The non-atomic test/increment sequence works because the upper bits
|
||||
* of the ->dynticks counter are manipulated only by the corresponding CPU,
|
||||
* or when the corresponding CPU is offline.
|
||||
*/
|
||||
static void rcu_dynticks_eqs_online(void)
|
||||
{
|
||||
struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks);
|
||||
|
||||
if (atomic_read(&rdtp->dynticks) & 0x1)
|
||||
return;
|
||||
atomic_add(0x1, &rdtp->dynticks);
|
||||
}
|
||||
|
||||
/*
|
||||
* Is the current CPU in an extended quiescent state?
|
||||
*
|
||||
* No ordering, as we are sampling CPU-local information.
|
||||
*/
|
||||
bool rcu_dynticks_curr_cpu_in_eqs(void)
|
||||
{
|
||||
struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks);
|
||||
|
||||
return !(atomic_read(&rdtp->dynticks) & 0x1);
|
||||
}
|
||||
|
||||
/*
|
||||
* Snapshot the ->dynticks counter with full ordering so as to allow
|
||||
* stable comparison of this counter with past and future snapshots.
|
||||
*/
|
||||
int rcu_dynticks_snap(struct rcu_dynticks *rdtp)
|
||||
{
|
||||
int snap = atomic_add_return(0, &rdtp->dynticks);
|
||||
|
||||
return snap;
|
||||
}
|
||||
|
||||
/*
|
||||
* Return true if the snapshot returned from rcu_dynticks_snap()
|
||||
* indicates that RCU is in an extended quiescent state.
|
||||
*/
|
||||
static bool rcu_dynticks_in_eqs(int snap)
|
||||
{
|
||||
return !(snap & 0x1);
|
||||
}
|
||||
|
||||
/*
|
||||
* Return true if the CPU corresponding to the specified rcu_dynticks
|
||||
* structure has spent some time in an extended quiescent state since
|
||||
* rcu_dynticks_snap() returned the specified snapshot.
|
||||
*/
|
||||
static bool rcu_dynticks_in_eqs_since(struct rcu_dynticks *rdtp, int snap)
|
||||
{
|
||||
return snap != rcu_dynticks_snap(rdtp);
|
||||
}
|
||||
|
||||
/*
|
||||
* Do a double-increment of the ->dynticks counter to emulate a
|
||||
* momentary idle-CPU quiescent state.
|
||||
*/
|
||||
static void rcu_dynticks_momentary_idle(void)
|
||||
{
|
||||
struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks);
|
||||
int special = atomic_add_return(2, &rdtp->dynticks);
|
||||
|
||||
/* It is illegal to call this from idle state. */
|
||||
WARN_ON_ONCE(!(special & 0x1));
|
||||
}
|
||||
|
||||
DEFINE_PER_CPU_SHARED_ALIGNED(unsigned long, rcu_qs_ctr);
|
||||
EXPORT_PER_CPU_SYMBOL_GPL(rcu_qs_ctr);
|
||||
|
||||
@@ -300,7 +410,6 @@ EXPORT_PER_CPU_SYMBOL_GPL(rcu_qs_ctr);
|
||||
static void rcu_momentary_dyntick_idle(void)
|
||||
{
|
||||
struct rcu_data *rdp;
|
||||
struct rcu_dynticks *rdtp;
|
||||
int resched_mask;
|
||||
struct rcu_state *rsp;
|
||||
|
||||
@@ -327,10 +436,7 @@ static void rcu_momentary_dyntick_idle(void)
|
||||
* quiescent state, with no need for this CPU to do anything
|
||||
* further.
|
||||
*/
|
||||
rdtp = this_cpu_ptr(&rcu_dynticks);
|
||||
smp_mb__before_atomic(); /* Earlier stuff before QS. */
|
||||
atomic_add(2, &rdtp->dynticks); /* QS. */
|
||||
smp_mb__after_atomic(); /* Later stuff after QS. */
|
||||
rcu_dynticks_momentary_idle();
|
||||
break;
|
||||
}
|
||||
}
|
||||
@@ -611,7 +717,7 @@ static int
|
||||
cpu_has_callbacks_ready_to_invoke(struct rcu_data *rdp)
|
||||
{
|
||||
return &rdp->nxtlist != rdp->nxttail[RCU_DONE_TAIL] &&
|
||||
rdp->nxttail[RCU_DONE_TAIL] != NULL;
|
||||
rdp->nxttail[RCU_NEXT_TAIL] != NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -673,7 +779,7 @@ static void rcu_eqs_enter_common(long long oldval, bool user)
|
||||
{
|
||||
struct rcu_state *rsp;
|
||||
struct rcu_data *rdp;
|
||||
struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks);
|
||||
RCU_TRACE(struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks);)
|
||||
|
||||
trace_rcu_dyntick(TPS("Start"), oldval, rdtp->dynticks_nesting);
|
||||
if (IS_ENABLED(CONFIG_RCU_EQS_DEBUG) &&
|
||||
@@ -692,12 +798,7 @@ static void rcu_eqs_enter_common(long long oldval, bool user)
|
||||
do_nocb_deferred_wakeup(rdp);
|
||||
}
|
||||
rcu_prepare_for_idle();
|
||||
/* CPUs seeing atomic_inc() must see prior RCU read-side crit sects */
|
||||
smp_mb__before_atomic(); /* See above. */
|
||||
atomic_inc(&rdtp->dynticks);
|
||||
smp_mb__after_atomic(); /* Force ordering with next sojourn. */
|
||||
WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) &&
|
||||
atomic_read(&rdtp->dynticks) & 0x1);
|
||||
rcu_dynticks_eqs_enter();
|
||||
rcu_dynticks_task_enter();
|
||||
|
||||
/*
|
||||
@@ -826,15 +927,10 @@ void rcu_irq_exit_irqson(void)
|
||||
*/
|
||||
static void rcu_eqs_exit_common(long long oldval, int user)
|
||||
{
|
||||
struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks);
|
||||
RCU_TRACE(struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks);)
|
||||
|
||||
rcu_dynticks_task_exit();
|
||||
smp_mb__before_atomic(); /* Force ordering w/previous sojourn. */
|
||||
atomic_inc(&rdtp->dynticks);
|
||||
/* CPUs seeing atomic_inc() must see later RCU read-side crit sects */
|
||||
smp_mb__after_atomic(); /* See above. */
|
||||
WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) &&
|
||||
!(atomic_read(&rdtp->dynticks) & 0x1));
|
||||
rcu_dynticks_eqs_exit();
|
||||
rcu_cleanup_after_idle();
|
||||
trace_rcu_dyntick(TPS("End"), oldval, rdtp->dynticks_nesting);
|
||||
if (IS_ENABLED(CONFIG_RCU_EQS_DEBUG) &&
|
||||
@@ -980,12 +1076,8 @@ void rcu_nmi_enter(void)
|
||||
* to be in the outermost NMI handler that interrupted an RCU-idle
|
||||
* period (observation due to Andy Lutomirski).
|
||||
*/
|
||||
if (!(atomic_read(&rdtp->dynticks) & 0x1)) {
|
||||
smp_mb__before_atomic(); /* Force delay from prior write. */
|
||||
atomic_inc(&rdtp->dynticks);
|
||||
/* atomic_inc() before later RCU read-side crit sects */
|
||||
smp_mb__after_atomic(); /* See above. */
|
||||
WARN_ON_ONCE(!(atomic_read(&rdtp->dynticks) & 0x1));
|
||||
if (rcu_dynticks_curr_cpu_in_eqs()) {
|
||||
rcu_dynticks_eqs_exit();
|
||||
incby = 1;
|
||||
}
|
||||
rdtp->dynticks_nmi_nesting += incby;
|
||||
@@ -1010,7 +1102,7 @@ void rcu_nmi_exit(void)
|
||||
* to us!)
|
||||
*/
|
||||
WARN_ON_ONCE(rdtp->dynticks_nmi_nesting <= 0);
|
||||
WARN_ON_ONCE(!(atomic_read(&rdtp->dynticks) & 0x1));
|
||||
WARN_ON_ONCE(rcu_dynticks_curr_cpu_in_eqs());
|
||||
|
||||
/*
|
||||
* If the nesting level is not 1, the CPU wasn't RCU-idle, so
|
||||
@@ -1023,11 +1115,7 @@ void rcu_nmi_exit(void)
|
||||
|
||||
/* This NMI interrupted an RCU-idle CPU, restore RCU-idleness. */
|
||||
rdtp->dynticks_nmi_nesting = 0;
|
||||
/* CPUs seeing atomic_inc() must see prior RCU read-side crit sects */
|
||||
smp_mb__before_atomic(); /* See above. */
|
||||
atomic_inc(&rdtp->dynticks);
|
||||
smp_mb__after_atomic(); /* Force delay to next write. */
|
||||
WARN_ON_ONCE(atomic_read(&rdtp->dynticks) & 0x1);
|
||||
rcu_dynticks_eqs_enter();
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -1040,7 +1128,7 @@ void rcu_nmi_exit(void)
|
||||
*/
|
||||
bool notrace __rcu_is_watching(void)
|
||||
{
|
||||
return atomic_read(this_cpu_ptr(&rcu_dynticks.dynticks)) & 0x1;
|
||||
return !rcu_dynticks_curr_cpu_in_eqs();
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -1123,9 +1211,9 @@ static int rcu_is_cpu_rrupt_from_idle(void)
|
||||
static int dyntick_save_progress_counter(struct rcu_data *rdp,
|
||||
bool *isidle, unsigned long *maxj)
|
||||
{
|
||||
rdp->dynticks_snap = atomic_add_return(0, &rdp->dynticks->dynticks);
|
||||
rdp->dynticks_snap = rcu_dynticks_snap(rdp->dynticks);
|
||||
rcu_sysidle_check_cpu(rdp, isidle, maxj);
|
||||
if ((rdp->dynticks_snap & 0x1) == 0) {
|
||||
if (rcu_dynticks_in_eqs(rdp->dynticks_snap)) {
|
||||
trace_rcu_fqs(rdp->rsp->name, rdp->gpnum, rdp->cpu, TPS("dti"));
|
||||
if (ULONG_CMP_LT(READ_ONCE(rdp->gpnum) + ULONG_MAX / 4,
|
||||
rdp->mynode->gpnum))
|
||||
@@ -1144,12 +1232,10 @@ static int dyntick_save_progress_counter(struct rcu_data *rdp,
|
||||
static int rcu_implicit_dynticks_qs(struct rcu_data *rdp,
|
||||
bool *isidle, unsigned long *maxj)
|
||||
{
|
||||
unsigned int curr;
|
||||
unsigned long jtsq;
|
||||
int *rcrmp;
|
||||
unsigned int snap;
|
||||
|
||||
curr = (unsigned int)atomic_add_return(0, &rdp->dynticks->dynticks);
|
||||
snap = (unsigned int)rdp->dynticks_snap;
|
||||
unsigned long rjtsc;
|
||||
struct rcu_node *rnp;
|
||||
|
||||
/*
|
||||
* If the CPU passed through or entered a dynticks idle phase with
|
||||
@@ -1159,27 +1245,39 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp,
|
||||
* read-side critical section that started before the beginning
|
||||
* of the current RCU grace period.
|
||||
*/
|
||||
if ((curr & 0x1) == 0 || UINT_CMP_GE(curr, snap + 2)) {
|
||||
if (rcu_dynticks_in_eqs_since(rdp->dynticks, rdp->dynticks_snap)) {
|
||||
trace_rcu_fqs(rdp->rsp->name, rdp->gpnum, rdp->cpu, TPS("dti"));
|
||||
rdp->dynticks_fqs++;
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* Compute and saturate jiffies_till_sched_qs. */
|
||||
jtsq = jiffies_till_sched_qs;
|
||||
rjtsc = rcu_jiffies_till_stall_check();
|
||||
if (jtsq > rjtsc / 2) {
|
||||
WRITE_ONCE(jiffies_till_sched_qs, rjtsc);
|
||||
jtsq = rjtsc / 2;
|
||||
} else if (jtsq < 1) {
|
||||
WRITE_ONCE(jiffies_till_sched_qs, 1);
|
||||
jtsq = 1;
|
||||
}
|
||||
|
||||
/*
|
||||
* Check for the CPU being offline, but only if the grace period
|
||||
* is old enough. We don't need to worry about the CPU changing
|
||||
* state: If we see it offline even once, it has been through a
|
||||
* quiescent state.
|
||||
*
|
||||
* The reason for insisting that the grace period be at least
|
||||
* one jiffy old is that CPUs that are not quite online and that
|
||||
* have just gone offline can still execute RCU read-side critical
|
||||
* sections.
|
||||
* Has this CPU encountered a cond_resched_rcu_qs() since the
|
||||
* beginning of the grace period? For this to be the case,
|
||||
* the CPU has to have noticed the current grace period. This
|
||||
* might not be the case for nohz_full CPUs looping in the kernel.
|
||||
*/
|
||||
if (ULONG_CMP_GE(rdp->rsp->gp_start + 2, jiffies))
|
||||
return 0; /* Grace period is not old enough. */
|
||||
barrier();
|
||||
if (cpu_is_offline(rdp->cpu)) {
|
||||
rnp = rdp->mynode;
|
||||
if (time_after(jiffies, rdp->rsp->gp_start + jtsq) &&
|
||||
READ_ONCE(rdp->rcu_qs_ctr_snap) != per_cpu(rcu_qs_ctr, rdp->cpu) &&
|
||||
READ_ONCE(rdp->gpnum) == rnp->gpnum && !rdp->gpwrap) {
|
||||
trace_rcu_fqs(rdp->rsp->name, rdp->gpnum, rdp->cpu, TPS("rqc"));
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* Check for the CPU being offline. */
|
||||
if (!(rdp->grpmask & rcu_rnp_online_cpus(rnp))) {
|
||||
trace_rcu_fqs(rdp->rsp->name, rdp->gpnum, rdp->cpu, TPS("ofl"));
|
||||
rdp->offline_fqs++;
|
||||
return 1;
|
||||
@@ -1207,9 +1305,8 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp,
|
||||
* warning delay.
|
||||
*/
|
||||
rcrmp = &per_cpu(rcu_sched_qs_mask, rdp->cpu);
|
||||
if (ULONG_CMP_GE(jiffies,
|
||||
rdp->rsp->gp_start + jiffies_till_sched_qs) ||
|
||||
ULONG_CMP_GE(jiffies, rdp->rsp->jiffies_resched)) {
|
||||
if (time_after(jiffies, rdp->rsp->gp_start + jtsq) ||
|
||||
time_after(jiffies, rdp->rsp->jiffies_resched)) {
|
||||
if (!(READ_ONCE(*rcrmp) & rdp->rsp->flavor_mask)) {
|
||||
WRITE_ONCE(rdp->cond_resched_completed,
|
||||
READ_ONCE(rdp->mynode->completed));
|
||||
@@ -1220,11 +1317,12 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp,
|
||||
rdp->rsp->jiffies_resched += 5; /* Re-enable beating. */
|
||||
}
|
||||
|
||||
/* And if it has been a really long time, kick the CPU as well. */
|
||||
if (ULONG_CMP_GE(jiffies,
|
||||
rdp->rsp->gp_start + 2 * jiffies_till_sched_qs) ||
|
||||
ULONG_CMP_GE(jiffies, rdp->rsp->gp_start + jiffies_till_sched_qs))
|
||||
resched_cpu(rdp->cpu); /* Force CPU into scheduler. */
|
||||
/*
|
||||
* If more than halfway to RCU CPU stall-warning time, do
|
||||
* a resched_cpu() to try to loosen things up a bit.
|
||||
*/
|
||||
if (jiffies - rdp->rsp->gp_start > rcu_jiffies_till_stall_check() / 2)
|
||||
resched_cpu(rdp->cpu);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -1277,7 +1375,10 @@ static void rcu_check_gp_kthread_starvation(struct rcu_state *rsp)
|
||||
}
|
||||
|
||||
/*
|
||||
* Dump stacks of all tasks running on stalled CPUs.
|
||||
* Dump stacks of all tasks running on stalled CPUs. First try using
|
||||
* NMIs, but fall back to manual remote stack tracing on architectures
|
||||
* that don't support NMI-based stack dumps. The NMI-triggered stack
|
||||
* traces are more accurate because they are printed by the target CPU.
|
||||
*/
|
||||
static void rcu_dump_cpu_stacks(struct rcu_state *rsp)
|
||||
{
|
||||
@@ -1287,11 +1388,10 @@ static void rcu_dump_cpu_stacks(struct rcu_state *rsp)
|
||||
|
||||
rcu_for_each_leaf_node(rsp, rnp) {
|
||||
raw_spin_lock_irqsave_rcu_node(rnp, flags);
|
||||
if (rnp->qsmask != 0) {
|
||||
for_each_leaf_node_possible_cpu(rnp, cpu)
|
||||
if (rnp->qsmask & leaf_node_cpu_bit(rnp, cpu))
|
||||
for_each_leaf_node_possible_cpu(rnp, cpu)
|
||||
if (rnp->qsmask & leaf_node_cpu_bit(rnp, cpu))
|
||||
if (!trigger_single_cpu_backtrace(cpu))
|
||||
dump_cpu_task(cpu);
|
||||
}
|
||||
raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
|
||||
}
|
||||
}
|
||||
@@ -1379,6 +1479,9 @@ static void print_other_cpu_stall(struct rcu_state *rsp, unsigned long gpnum)
|
||||
(long)rsp->gpnum, (long)rsp->completed, totqlen);
|
||||
if (ndetected) {
|
||||
rcu_dump_cpu_stacks(rsp);
|
||||
|
||||
/* Complain about tasks blocking the grace period. */
|
||||
rcu_print_detail_task_stall(rsp);
|
||||
} else {
|
||||
if (READ_ONCE(rsp->gpnum) != gpnum ||
|
||||
READ_ONCE(rsp->completed) == gpnum) {
|
||||
@@ -1395,9 +1498,6 @@ static void print_other_cpu_stall(struct rcu_state *rsp, unsigned long gpnum)
|
||||
}
|
||||
}
|
||||
|
||||
/* Complain about tasks blocking the grace period. */
|
||||
rcu_print_detail_task_stall(rsp);
|
||||
|
||||
rcu_check_gp_kthread_starvation(rsp);
|
||||
|
||||
panic_on_rcu_stall();
|
||||
@@ -2467,10 +2567,8 @@ rcu_report_qs_rdp(int cpu, struct rcu_state *rsp, struct rcu_data *rdp)
|
||||
|
||||
rnp = rdp->mynode;
|
||||
raw_spin_lock_irqsave_rcu_node(rnp, flags);
|
||||
if ((rdp->cpu_no_qs.b.norm &&
|
||||
rdp->rcu_qs_ctr_snap == __this_cpu_read(rcu_qs_ctr)) ||
|
||||
rdp->gpnum != rnp->gpnum || rnp->completed == rnp->gpnum ||
|
||||
rdp->gpwrap) {
|
||||
if (rdp->cpu_no_qs.b.norm || rdp->gpnum != rnp->gpnum ||
|
||||
rnp->completed == rnp->gpnum || rdp->gpwrap) {
|
||||
|
||||
/*
|
||||
* The grace period in which this quiescent state was
|
||||
@@ -2525,8 +2623,7 @@ rcu_check_quiescent_state(struct rcu_state *rsp, struct rcu_data *rdp)
|
||||
* Was there a quiescent state since the beginning of the grace
|
||||
* period? If no, then exit and wait for the next call.
|
||||
*/
|
||||
if (rdp->cpu_no_qs.b.norm &&
|
||||
rdp->rcu_qs_ctr_snap == __this_cpu_read(rcu_qs_ctr))
|
||||
if (rdp->cpu_no_qs.b.norm)
|
||||
return;
|
||||
|
||||
/*
|
||||
@@ -3480,9 +3577,7 @@ static int __rcu_pending(struct rcu_state *rsp, struct rcu_data *rdp)
|
||||
rdp->core_needs_qs && rdp->cpu_no_qs.b.norm &&
|
||||
rdp->rcu_qs_ctr_snap == __this_cpu_read(rcu_qs_ctr)) {
|
||||
rdp->n_rp_core_needs_qs++;
|
||||
} else if (rdp->core_needs_qs &&
|
||||
(!rdp->cpu_no_qs.b.norm ||
|
||||
rdp->rcu_qs_ctr_snap != __this_cpu_read(rcu_qs_ctr))) {
|
||||
} else if (rdp->core_needs_qs && !rdp->cpu_no_qs.b.norm) {
|
||||
rdp->n_rp_report_qs++;
|
||||
return 1;
|
||||
}
|
||||
@@ -3748,7 +3843,7 @@ rcu_boot_init_percpu_data(int cpu, struct rcu_state *rsp)
|
||||
rdp->grpmask = leaf_node_cpu_bit(rdp->mynode, cpu);
|
||||
rdp->dynticks = &per_cpu(rcu_dynticks, cpu);
|
||||
WARN_ON_ONCE(rdp->dynticks->dynticks_nesting != DYNTICK_TASK_EXIT_IDLE);
|
||||
WARN_ON_ONCE(atomic_read(&rdp->dynticks->dynticks) != 1);
|
||||
WARN_ON_ONCE(rcu_dynticks_in_eqs(rcu_dynticks_snap(rdp->dynticks)));
|
||||
rdp->cpu = cpu;
|
||||
rdp->rsp = rsp;
|
||||
rcu_boot_init_nocb_percpu_data(rdp);
|
||||
@@ -3765,7 +3860,6 @@ static void
|
||||
rcu_init_percpu_data(int cpu, struct rcu_state *rsp)
|
||||
{
|
||||
unsigned long flags;
|
||||
unsigned long mask;
|
||||
struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu);
|
||||
struct rcu_node *rnp = rcu_get_root(rsp);
|
||||
|
||||
@@ -3778,8 +3872,7 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp)
|
||||
init_callback_list(rdp); /* Re-enable callbacks on this CPU. */
|
||||
rdp->dynticks->dynticks_nesting = DYNTICK_TASK_EXIT_IDLE;
|
||||
rcu_sysidle_init_percpu_data(rdp->dynticks);
|
||||
atomic_set(&rdp->dynticks->dynticks,
|
||||
(atomic_read(&rdp->dynticks->dynticks) & ~0x1) + 1);
|
||||
rcu_dynticks_eqs_online();
|
||||
raw_spin_unlock_rcu_node(rnp); /* irqs remain disabled. */
|
||||
|
||||
/*
|
||||
@@ -3788,7 +3881,6 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp)
|
||||
* of the next grace period.
|
||||
*/
|
||||
rnp = rdp->mynode;
|
||||
mask = rdp->grpmask;
|
||||
raw_spin_lock_rcu_node(rnp); /* irqs already disabled. */
|
||||
if (!rdp->beenonline)
|
||||
WRITE_ONCE(rsp->ncpus, READ_ONCE(rsp->ncpus) + 1);
|
||||
@@ -3872,7 +3964,7 @@ void rcu_cpu_starting(unsigned int cpu)
|
||||
struct rcu_state *rsp;
|
||||
|
||||
for_each_rcu_flavor(rsp) {
|
||||
rdp = this_cpu_ptr(rsp->rda);
|
||||
rdp = per_cpu_ptr(rsp->rda, cpu);
|
||||
rnp = rdp->mynode;
|
||||
mask = rdp->grpmask;
|
||||
raw_spin_lock_irqsave_rcu_node(rnp, flags);
|
||||
|
||||
+2
-13
@@ -521,7 +521,6 @@ struct rcu_state {
|
||||
struct mutex exp_mutex; /* Serialize expedited GP. */
|
||||
struct mutex exp_wake_mutex; /* Serialize wakeup. */
|
||||
unsigned long expedited_sequence; /* Take a ticket. */
|
||||
atomic_long_t expedited_normal; /* # fallbacks to normal. */
|
||||
atomic_t expedited_need_qs; /* # CPUs left to check in. */
|
||||
struct swait_queue_head expedited_wq; /* Wait for check-ins. */
|
||||
int ncpus_snap; /* # CPUs seen last time. */
|
||||
@@ -595,6 +594,8 @@ extern struct rcu_state rcu_bh_state;
|
||||
extern struct rcu_state rcu_preempt_state;
|
||||
#endif /* #ifdef CONFIG_PREEMPT_RCU */
|
||||
|
||||
int rcu_dynticks_snap(struct rcu_dynticks *rdtp);
|
||||
|
||||
#ifdef CONFIG_RCU_BOOST
|
||||
DECLARE_PER_CPU(unsigned int, rcu_cpu_kthread_status);
|
||||
DECLARE_PER_CPU(int, rcu_cpu_kthread_cpu);
|
||||
@@ -687,18 +688,6 @@ static inline void rcu_nocb_q_lengths(struct rcu_data *rdp, long *ql, long *qll)
|
||||
}
|
||||
#endif /* #ifdef CONFIG_RCU_TRACE */
|
||||
|
||||
/*
|
||||
* Place this after a lock-acquisition primitive to guarantee that
|
||||
* an UNLOCK+LOCK pair act as a full barrier. This guarantee applies
|
||||
* if the UNLOCK and LOCK are executed by the same CPU or if the
|
||||
* UNLOCK and LOCK operate on the same lock variable.
|
||||
*/
|
||||
#ifdef CONFIG_PPC
|
||||
#define smp_mb__after_unlock_lock() smp_mb() /* Full ordering for lock. */
|
||||
#else /* #ifdef CONFIG_PPC */
|
||||
#define smp_mb__after_unlock_lock() do { } while (0)
|
||||
#endif /* #else #ifdef CONFIG_PPC */
|
||||
|
||||
/*
|
||||
* Wrappers for the rcu_node::lock acquire and release.
|
||||
*
|
||||
|
||||
+31
-7
@@ -20,16 +20,26 @@
|
||||
* Authors: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
|
||||
*/
|
||||
|
||||
/* Wrapper functions for expedited grace periods. */
|
||||
/*
|
||||
* Record the start of an expedited grace period.
|
||||
*/
|
||||
static void rcu_exp_gp_seq_start(struct rcu_state *rsp)
|
||||
{
|
||||
rcu_seq_start(&rsp->expedited_sequence);
|
||||
}
|
||||
|
||||
/*
|
||||
* Record the end of an expedited grace period.
|
||||
*/
|
||||
static void rcu_exp_gp_seq_end(struct rcu_state *rsp)
|
||||
{
|
||||
rcu_seq_end(&rsp->expedited_sequence);
|
||||
smp_mb(); /* Ensure that consecutive grace periods serialize. */
|
||||
}
|
||||
|
||||
/*
|
||||
* Take a snapshot of the expedited-grace-period counter.
|
||||
*/
|
||||
static unsigned long rcu_exp_gp_seq_snap(struct rcu_state *rsp)
|
||||
{
|
||||
unsigned long s;
|
||||
@@ -39,6 +49,12 @@ static unsigned long rcu_exp_gp_seq_snap(struct rcu_state *rsp)
|
||||
trace_rcu_exp_grace_period(rsp->name, s, TPS("snap"));
|
||||
return s;
|
||||
}
|
||||
|
||||
/*
|
||||
* Given a counter snapshot from rcu_exp_gp_seq_snap(), return true
|
||||
* if a full expedited grace period has elapsed since that snapshot
|
||||
* was taken.
|
||||
*/
|
||||
static bool rcu_exp_gp_seq_done(struct rcu_state *rsp, unsigned long s)
|
||||
{
|
||||
return rcu_seq_done(&rsp->expedited_sequence, s);
|
||||
@@ -356,12 +372,11 @@ static void sync_rcu_exp_select_cpus(struct rcu_state *rsp,
|
||||
mask_ofl_test = 0;
|
||||
for_each_leaf_node_possible_cpu(rnp, cpu) {
|
||||
struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu);
|
||||
struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu);
|
||||
|
||||
rdp->exp_dynticks_snap =
|
||||
atomic_add_return(0, &rdtp->dynticks);
|
||||
rcu_dynticks_snap(rdp->dynticks);
|
||||
if (raw_smp_processor_id() == cpu ||
|
||||
!(rdp->exp_dynticks_snap & 0x1) ||
|
||||
rcu_dynticks_in_eqs(rdp->exp_dynticks_snap) ||
|
||||
!(rnp->qsmaskinitnext & rdp->grpmask))
|
||||
mask_ofl_test |= rdp->grpmask;
|
||||
}
|
||||
@@ -380,13 +395,12 @@ static void sync_rcu_exp_select_cpus(struct rcu_state *rsp,
|
||||
for_each_leaf_node_possible_cpu(rnp, cpu) {
|
||||
unsigned long mask = leaf_node_cpu_bit(rnp, cpu);
|
||||
struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu);
|
||||
struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu);
|
||||
|
||||
if (!(mask_ofl_ipi & mask))
|
||||
continue;
|
||||
retry_ipi:
|
||||
if (atomic_add_return(0, &rdtp->dynticks) !=
|
||||
rdp->exp_dynticks_snap) {
|
||||
if (rcu_dynticks_in_eqs_since(rdp->dynticks,
|
||||
rdp->exp_dynticks_snap)) {
|
||||
mask_ofl_test |= mask;
|
||||
continue;
|
||||
}
|
||||
@@ -623,6 +637,11 @@ void synchronize_sched_expedited(void)
|
||||
{
|
||||
struct rcu_state *rsp = &rcu_sched_state;
|
||||
|
||||
RCU_LOCKDEP_WARN(lock_is_held(&rcu_bh_lock_map) ||
|
||||
lock_is_held(&rcu_lock_map) ||
|
||||
lock_is_held(&rcu_sched_lock_map),
|
||||
"Illegal synchronize_sched_expedited() in RCU read-side critical section");
|
||||
|
||||
/* If only one CPU, this is automatically a grace period. */
|
||||
if (rcu_blocking_is_gp())
|
||||
return;
|
||||
@@ -692,6 +711,11 @@ void synchronize_rcu_expedited(void)
|
||||
{
|
||||
struct rcu_state *rsp = rcu_state_p;
|
||||
|
||||
RCU_LOCKDEP_WARN(lock_is_held(&rcu_bh_lock_map) ||
|
||||
lock_is_held(&rcu_lock_map) ||
|
||||
lock_is_held(&rcu_sched_lock_map),
|
||||
"Illegal synchronize_rcu_expedited() in RCU read-side critical section");
|
||||
|
||||
if (rcu_scheduler_active == RCU_SCHEDULER_INACTIVE)
|
||||
return;
|
||||
_synchronize_rcu_expedited(rsp, sync_rcu_exp_handler);
|
||||
|
||||
@@ -1643,7 +1643,7 @@ static void print_cpu_stall_info(struct rcu_state *rsp, int cpu)
|
||||
"o."[!!(rdp->grpmask & rdp->mynode->qsmaskinit)],
|
||||
"N."[!!(rdp->grpmask & rdp->mynode->qsmaskinitnext)],
|
||||
ticks_value, ticks_title,
|
||||
atomic_read(&rdtp->dynticks) & 0xfff,
|
||||
rcu_dynticks_snap(rdtp) & 0xfff,
|
||||
rdtp->dynticks_nesting, rdtp->dynticks_nmi_nesting,
|
||||
rdp->softirq_snap, kstat_softirqs_cpu(RCU_SOFTIRQ, cpu),
|
||||
READ_ONCE(rsp->n_force_qs) - rsp->n_force_qs_gpstart,
|
||||
@@ -2366,8 +2366,9 @@ static void __init rcu_organize_nocb_kthreads(struct rcu_state *rsp)
|
||||
}
|
||||
|
||||
/*
|
||||
* Each pass through this loop sets up one rcu_data structure and
|
||||
* spawns one rcu_nocb_kthread().
|
||||
* Each pass through this loop sets up one rcu_data structure.
|
||||
* Should the corresponding CPU come online in the future, then
|
||||
* we will spawn the needed set of rcu_nocb_kthread() kthreads.
|
||||
*/
|
||||
for_each_cpu(cpu, rcu_nocb_mask) {
|
||||
rdp = per_cpu_ptr(rsp->rda, cpu);
|
||||
|
||||
@@ -124,7 +124,7 @@ static void print_one_rcu_data(struct seq_file *m, struct rcu_data *rdp)
|
||||
rdp->rcu_qs_ctr_snap == per_cpu(rcu_qs_ctr, rdp->cpu),
|
||||
rdp->core_needs_qs);
|
||||
seq_printf(m, " dt=%d/%llx/%d df=%lu",
|
||||
atomic_read(&rdp->dynticks->dynticks),
|
||||
rcu_dynticks_snap(rdp->dynticks),
|
||||
rdp->dynticks->dynticks_nesting,
|
||||
rdp->dynticks->dynticks_nmi_nesting,
|
||||
rdp->dynticks_fqs);
|
||||
@@ -194,9 +194,8 @@ static int show_rcuexp(struct seq_file *m, void *v)
|
||||
s2 += atomic_long_read(&rdp->exp_workdone2);
|
||||
s3 += atomic_long_read(&rdp->exp_workdone3);
|
||||
}
|
||||
seq_printf(m, "s=%lu wd0=%lu wd1=%lu wd2=%lu wd3=%lu n=%lu enq=%d sc=%lu\n",
|
||||
seq_printf(m, "s=%lu wd0=%lu wd1=%lu wd2=%lu wd3=%lu enq=%d sc=%lu\n",
|
||||
rsp->expedited_sequence, s0, s1, s2, s3,
|
||||
atomic_long_read(&rsp->expedited_normal),
|
||||
atomic_read(&rsp->expedited_need_qs),
|
||||
rsp->expedited_sequence / 2);
|
||||
return 0;
|
||||
|
||||
+2
-4
@@ -132,8 +132,7 @@ bool rcu_gp_is_normal(void)
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(rcu_gp_is_normal);
|
||||
|
||||
static atomic_t rcu_expedited_nesting =
|
||||
ATOMIC_INIT(IS_ENABLED(CONFIG_RCU_EXPEDITE_BOOT) ? 1 : 0);
|
||||
static atomic_t rcu_expedited_nesting = ATOMIC_INIT(1);
|
||||
|
||||
/*
|
||||
* Should normal grace-period primitives be expedited? Intended for
|
||||
@@ -182,8 +181,7 @@ EXPORT_SYMBOL_GPL(rcu_unexpedite_gp);
|
||||
*/
|
||||
void rcu_end_inkernel_boot(void)
|
||||
{
|
||||
if (IS_ENABLED(CONFIG_RCU_EXPEDITE_BOOT))
|
||||
rcu_unexpedite_gp();
|
||||
rcu_unexpedite_gp();
|
||||
if (rcu_normal_after_boot)
|
||||
WRITE_ONCE(rcu_normal, 1);
|
||||
}
|
||||
|
||||
@@ -1450,6 +1450,7 @@ config RCU_CPU_STALL_TIMEOUT
|
||||
config RCU_TRACE
|
||||
bool "Enable tracing for RCU"
|
||||
depends on DEBUG_KERNEL
|
||||
default y if TREE_RCU
|
||||
select TRACE_CLOCK
|
||||
help
|
||||
This option provides tracing in RCU which presents stats
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user