You've already forked linux-apfs
mirror of
https://github.com/linux-apfs/linux-apfs.git
synced 2026-05-01 15:00:59 -07:00
Merge branch 'sched/for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip
* 'sched/for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip: (76 commits)
sched_clock: and multiplier for TSC to gtod drift
sched_clock: record TSC after gtod
sched_clock: only update deltas with local reads.
sched_clock: fix calculation of other CPU
sched_clock: stop maximum check on NO HZ
sched_clock: widen the max and min time
sched_clock: record from last tick
sched: fix accounting in task delay accounting & migration
sched: add avg-overlap support to RT tasks
sched: terminate newidle balancing once at least one task has moved over
sched: fix warning
sched: build fix
sched: sched_clock_cpu() based cpu_clock(), lockdep fix
sched: export cpu_clock
sched: make sched_{rt,fair}.c ifdefs more readable
sched: bias effective_load() error towards failing wake_affine().
sched: incremental effective_load()
sched: correct wakeup weight calculations
sched: fix mult overflow
sched: update shares on wakeup
...
This commit is contained in:
@@ -61,10 +61,7 @@ builder by #define'ing ARCH_HASH_SCHED_DOMAIN, and exporting your
|
||||
arch_init_sched_domains function. This function will attach domains to all
|
||||
CPUs using cpu_attach_domain.
|
||||
|
||||
Implementors should change the line
|
||||
#undef SCHED_DOMAIN_DEBUG
|
||||
to
|
||||
#define SCHED_DOMAIN_DEBUG
|
||||
in kernel/sched.c as this enables an error checking parse of the sched domains
|
||||
The sched-domains debugging infrastructure can be enabled by enabling
|
||||
CONFIG_SCHED_DEBUG. This enables an error checking parse of the sched domains
|
||||
which should catch most possible errors (described above). It also prints out
|
||||
the domain structure in a visual format.
|
||||
|
||||
@@ -51,9 +51,9 @@ needs only about 3% CPU time to do so, it can do with a 0.03 * 0.005s =
|
||||
0.00015s. So this group can be scheduled with a period of 0.005s and a run time
|
||||
of 0.00015s.
|
||||
|
||||
The remaining CPU time will be used for user input and other tass. Because
|
||||
The remaining CPU time will be used for user input and other tasks. Because
|
||||
realtime tasks have explicitly allocated the CPU time they need to perform
|
||||
their tasks, buffer underruns in the graphocs or audio can be eliminated.
|
||||
their tasks, buffer underruns in the graphics or audio can be eliminated.
|
||||
|
||||
NOTE: the above example is not fully implemented as of yet (2.6.25). We still
|
||||
lack an EDF scheduler to make non-uniform periods usable.
|
||||
|
||||
+30
-29
@@ -134,7 +134,6 @@ extern unsigned long nr_running(void);
|
||||
extern unsigned long nr_uninterruptible(void);
|
||||
extern unsigned long nr_active(void);
|
||||
extern unsigned long nr_iowait(void);
|
||||
extern unsigned long weighted_cpuload(const int cpu);
|
||||
|
||||
struct seq_file;
|
||||
struct cfs_rq;
|
||||
@@ -784,6 +783,8 @@ struct sched_domain {
|
||||
unsigned int balance_interval; /* initialise to 1. units in ms. */
|
||||
unsigned int nr_balance_failed; /* initialise to 0 */
|
||||
|
||||
u64 last_update;
|
||||
|
||||
#ifdef CONFIG_SCHEDSTATS
|
||||
/* load_balance() stats */
|
||||
unsigned int lb_count[CPU_MAX_IDLE_TYPES];
|
||||
@@ -823,23 +824,6 @@ extern int arch_reinit_sched_domains(void);
|
||||
|
||||
#endif /* CONFIG_SMP */
|
||||
|
||||
/*
|
||||
* A runqueue laden with a single nice 0 task scores a weighted_cpuload of
|
||||
* SCHED_LOAD_SCALE. This function returns 1 if any cpu is laden with a
|
||||
* task of nice 0 or enough lower priority tasks to bring up the
|
||||
* weighted_cpuload
|
||||
*/
|
||||
static inline int above_background_load(void)
|
||||
{
|
||||
unsigned long cpu;
|
||||
|
||||
for_each_online_cpu(cpu) {
|
||||
if (weighted_cpuload(cpu) >= SCHED_LOAD_SCALE)
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
struct io_context; /* See blkdev.h */
|
||||
#define NGROUPS_SMALL 32
|
||||
#define NGROUPS_PER_BLOCK ((unsigned int)(PAGE_SIZE / sizeof(gid_t)))
|
||||
@@ -921,8 +905,8 @@ struct sched_class {
|
||||
void (*set_cpus_allowed)(struct task_struct *p,
|
||||
const cpumask_t *newmask);
|
||||
|
||||
void (*join_domain)(struct rq *rq);
|
||||
void (*leave_domain)(struct rq *rq);
|
||||
void (*rq_online)(struct rq *rq);
|
||||
void (*rq_offline)(struct rq *rq);
|
||||
|
||||
void (*switched_from) (struct rq *this_rq, struct task_struct *task,
|
||||
int running);
|
||||
@@ -1039,6 +1023,7 @@ struct task_struct {
|
||||
#endif
|
||||
|
||||
int prio, static_prio, normal_prio;
|
||||
unsigned int rt_priority;
|
||||
const struct sched_class *sched_class;
|
||||
struct sched_entity se;
|
||||
struct sched_rt_entity rt;
|
||||
@@ -1122,7 +1107,6 @@ struct task_struct {
|
||||
int __user *set_child_tid; /* CLONE_CHILD_SETTID */
|
||||
int __user *clear_child_tid; /* CLONE_CHILD_CLEARTID */
|
||||
|
||||
unsigned int rt_priority;
|
||||
cputime_t utime, stime, utimescaled, stimescaled;
|
||||
cputime_t gtime;
|
||||
cputime_t prev_utime, prev_stime;
|
||||
@@ -1141,12 +1125,12 @@ struct task_struct {
|
||||
gid_t gid,egid,sgid,fsgid;
|
||||
struct group_info *group_info;
|
||||
kernel_cap_t cap_effective, cap_inheritable, cap_permitted, cap_bset;
|
||||
unsigned securebits;
|
||||
struct user_struct *user;
|
||||
unsigned securebits;
|
||||
#ifdef CONFIG_KEYS
|
||||
unsigned char jit_keyring; /* default keyring to attach requested keys to */
|
||||
struct key *request_key_auth; /* assumed request_key authority */
|
||||
struct key *thread_keyring; /* keyring private to this thread */
|
||||
unsigned char jit_keyring; /* default keyring to attach requested keys to */
|
||||
#endif
|
||||
char comm[TASK_COMM_LEN]; /* executable name excluding path
|
||||
- access with [gs]et_task_comm (which lock
|
||||
@@ -1233,8 +1217,8 @@ struct task_struct {
|
||||
# define MAX_LOCK_DEPTH 48UL
|
||||
u64 curr_chain_key;
|
||||
int lockdep_depth;
|
||||
struct held_lock held_locks[MAX_LOCK_DEPTH];
|
||||
unsigned int lockdep_recursion;
|
||||
struct held_lock held_locks[MAX_LOCK_DEPTH];
|
||||
#endif
|
||||
|
||||
/* journalling filesystem info */
|
||||
@@ -1262,10 +1246,6 @@ struct task_struct {
|
||||
u64 acct_vm_mem1; /* accumulated virtual memory usage */
|
||||
cputime_t acct_stimexpd;/* stime since last update */
|
||||
#endif
|
||||
#ifdef CONFIG_NUMA
|
||||
struct mempolicy *mempolicy;
|
||||
short il_next;
|
||||
#endif
|
||||
#ifdef CONFIG_CPUSETS
|
||||
nodemask_t mems_allowed;
|
||||
int cpuset_mems_generation;
|
||||
@@ -1284,6 +1264,10 @@ struct task_struct {
|
||||
#endif
|
||||
struct list_head pi_state_list;
|
||||
struct futex_pi_state *pi_state_cache;
|
||||
#endif
|
||||
#ifdef CONFIG_NUMA
|
||||
struct mempolicy *mempolicy;
|
||||
short il_next;
|
||||
#endif
|
||||
atomic_t fs_excl; /* holding fs exclusive resources */
|
||||
struct rcu_head rcu;
|
||||
@@ -1504,6 +1488,7 @@ static inline void put_task_struct(struct task_struct *t)
|
||||
#define PF_SWAPWRITE 0x00800000 /* Allowed to write to swap */
|
||||
#define PF_SPREAD_PAGE 0x01000000 /* Spread page cache over cpuset */
|
||||
#define PF_SPREAD_SLAB 0x02000000 /* Spread some slab caches over cpuset */
|
||||
#define PF_THREAD_BOUND 0x04000000 /* Thread bound to specific cpu */
|
||||
#define PF_MEMPOLICY 0x10000000 /* Non-default NUMA mempolicy */
|
||||
#define PF_MUTEX_TESTER 0x20000000 /* Thread belongs to the rt mutex tester */
|
||||
#define PF_FREEZER_SKIP 0x40000000 /* Freezer should not count it as freezeable */
|
||||
@@ -1573,13 +1558,28 @@ static inline void sched_clock_idle_sleep_event(void)
|
||||
static inline void sched_clock_idle_wakeup_event(u64 delta_ns)
|
||||
{
|
||||
}
|
||||
#else
|
||||
|
||||
#ifdef CONFIG_NO_HZ
|
||||
static inline void sched_clock_tick_stop(int cpu)
|
||||
{
|
||||
}
|
||||
|
||||
static inline void sched_clock_tick_start(int cpu)
|
||||
{
|
||||
}
|
||||
#endif
|
||||
|
||||
#else /* CONFIG_HAVE_UNSTABLE_SCHED_CLOCK */
|
||||
extern void sched_clock_init(void);
|
||||
extern u64 sched_clock_cpu(int cpu);
|
||||
extern void sched_clock_tick(void);
|
||||
extern void sched_clock_idle_sleep_event(void);
|
||||
extern void sched_clock_idle_wakeup_event(u64 delta_ns);
|
||||
#ifdef CONFIG_NO_HZ
|
||||
extern void sched_clock_tick_stop(int cpu);
|
||||
extern void sched_clock_tick_start(int cpu);
|
||||
#endif
|
||||
#endif /* CONFIG_HAVE_UNSTABLE_SCHED_CLOCK */
|
||||
|
||||
/*
|
||||
* For kernel-internal use: high-speed (but slightly incorrect) per-cpu
|
||||
@@ -1622,6 +1622,7 @@ extern unsigned int sysctl_sched_child_runs_first;
|
||||
extern unsigned int sysctl_sched_features;
|
||||
extern unsigned int sysctl_sched_migration_cost;
|
||||
extern unsigned int sysctl_sched_nr_migrate;
|
||||
extern unsigned int sysctl_sched_shares_ratelimit;
|
||||
|
||||
int sched_nr_latency_handler(struct ctl_table *table, int write,
|
||||
struct file *file, void __user *buffer, size_t *length,
|
||||
|
||||
+3
-2
@@ -3,7 +3,7 @@
|
||||
#
|
||||
|
||||
obj-y = sched.o fork.o exec_domain.o panic.o printk.o profile.o \
|
||||
exit.o itimer.o time.o softirq.o resource.o \
|
||||
cpu.o exit.o itimer.o time.o softirq.o resource.o \
|
||||
sysctl.o capability.o ptrace.o timer.o user.o \
|
||||
signal.o sys.o kmod.o workqueue.o pid.o \
|
||||
rcupdate.o extable.o params.o posix-timers.o \
|
||||
@@ -27,7 +27,7 @@ obj-$(CONFIG_RT_MUTEXES) += rtmutex.o
|
||||
obj-$(CONFIG_DEBUG_RT_MUTEXES) += rtmutex-debug.o
|
||||
obj-$(CONFIG_RT_MUTEX_TESTER) += rtmutex-tester.o
|
||||
obj-$(CONFIG_GENERIC_ISA_DMA) += dma.o
|
||||
obj-$(CONFIG_SMP) += cpu.o spinlock.o
|
||||
obj-$(CONFIG_SMP) += spinlock.o
|
||||
obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock.o
|
||||
obj-$(CONFIG_PROVE_LOCKING) += spinlock.o
|
||||
obj-$(CONFIG_UID16) += uid16.o
|
||||
@@ -69,6 +69,7 @@ obj-$(CONFIG_TASK_DELAY_ACCT) += delayacct.o
|
||||
obj-$(CONFIG_TASKSTATS) += taskstats.o tsacct.o
|
||||
obj-$(CONFIG_MARKERS) += marker.o
|
||||
obj-$(CONFIG_LATENCYTOP) += latencytop.o
|
||||
obj-$(CONFIG_SMP) += sched_cpupri.o
|
||||
|
||||
ifneq ($(CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER),y)
|
||||
# According to Alan Modra <alan@linuxcare.com.au>, the -fno-omit-frame-pointer is
|
||||
|
||||
@@ -15,6 +15,28 @@
|
||||
#include <linux/stop_machine.h>
|
||||
#include <linux/mutex.h>
|
||||
|
||||
/*
|
||||
* Represents all cpu's present in the system
|
||||
* In systems capable of hotplug, this map could dynamically grow
|
||||
* as new cpu's are detected in the system via any platform specific
|
||||
* method, such as ACPI for e.g.
|
||||
*/
|
||||
cpumask_t cpu_present_map __read_mostly;
|
||||
EXPORT_SYMBOL(cpu_present_map);
|
||||
|
||||
#ifndef CONFIG_SMP
|
||||
|
||||
/*
|
||||
* Represents all cpu's that are currently online.
|
||||
*/
|
||||
cpumask_t cpu_online_map __read_mostly = CPU_MASK_ALL;
|
||||
EXPORT_SYMBOL(cpu_online_map);
|
||||
|
||||
cpumask_t cpu_possible_map __read_mostly = CPU_MASK_ALL;
|
||||
EXPORT_SYMBOL(cpu_possible_map);
|
||||
|
||||
#else /* CONFIG_SMP */
|
||||
|
||||
/* Serializes the updates to cpu_online_map, cpu_present_map */
|
||||
static DEFINE_MUTEX(cpu_add_remove_lock);
|
||||
|
||||
@@ -403,3 +425,5 @@ out:
|
||||
cpu_maps_update_done();
|
||||
}
|
||||
#endif /* CONFIG_PM_SLEEP_SMP */
|
||||
|
||||
#endif /* CONFIG_SMP */
|
||||
|
||||
+13
-1
@@ -1194,6 +1194,15 @@ static int cpuset_can_attach(struct cgroup_subsys *ss,
|
||||
|
||||
if (cpus_empty(cs->cpus_allowed) || nodes_empty(cs->mems_allowed))
|
||||
return -ENOSPC;
|
||||
if (tsk->flags & PF_THREAD_BOUND) {
|
||||
cpumask_t mask;
|
||||
|
||||
mutex_lock(&callback_mutex);
|
||||
mask = cs->cpus_allowed;
|
||||
mutex_unlock(&callback_mutex);
|
||||
if (!cpus_equal(tsk->cpus_allowed, mask))
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
return security_task_setscheduler(tsk, 0, NULL);
|
||||
}
|
||||
@@ -1207,11 +1216,14 @@ static void cpuset_attach(struct cgroup_subsys *ss,
|
||||
struct mm_struct *mm;
|
||||
struct cpuset *cs = cgroup_cs(cont);
|
||||
struct cpuset *oldcs = cgroup_cs(oldcont);
|
||||
int err;
|
||||
|
||||
mutex_lock(&callback_mutex);
|
||||
guarantee_online_cpus(cs, &cpus);
|
||||
set_cpus_allowed_ptr(tsk, &cpus);
|
||||
err = set_cpus_allowed_ptr(tsk, &cpus);
|
||||
mutex_unlock(&callback_mutex);
|
||||
if (err)
|
||||
return;
|
||||
|
||||
from = oldcs->mems_allowed;
|
||||
to = cs->mems_allowed;
|
||||
|
||||
@@ -180,6 +180,7 @@ void kthread_bind(struct task_struct *k, unsigned int cpu)
|
||||
set_task_cpu(k, cpu);
|
||||
k->cpus_allowed = cpumask_of_cpu(cpu);
|
||||
k->rt.nr_cpus_allowed = 1;
|
||||
k->flags |= PF_THREAD_BOUND;
|
||||
}
|
||||
EXPORT_SYMBOL(kthread_bind);
|
||||
|
||||
|
||||
+491
-232
File diff suppressed because it is too large
Load Diff
+118
-19
@@ -3,6 +3,9 @@
|
||||
*
|
||||
* Copyright (C) 2008 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
|
||||
*
|
||||
* Updates and enhancements:
|
||||
* Copyright (C) 2008 Red Hat, Inc. Steven Rostedt <srostedt@redhat.com>
|
||||
*
|
||||
* Based on code by:
|
||||
* Ingo Molnar <mingo@redhat.com>
|
||||
* Guillaume Chazarain <guichaz@gmail.com>
|
||||
@@ -32,6 +35,11 @@
|
||||
|
||||
#ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
|
||||
|
||||
#define MULTI_SHIFT 15
|
||||
/* Max is double, Min is 1/2 */
|
||||
#define MAX_MULTI (2LL << MULTI_SHIFT)
|
||||
#define MIN_MULTI (1LL << (MULTI_SHIFT-1))
|
||||
|
||||
struct sched_clock_data {
|
||||
/*
|
||||
* Raw spinlock - this is a special case: this might be called
|
||||
@@ -40,11 +48,15 @@ struct sched_clock_data {
|
||||
*/
|
||||
raw_spinlock_t lock;
|
||||
|
||||
unsigned long prev_jiffies;
|
||||
unsigned long tick_jiffies;
|
||||
u64 prev_raw;
|
||||
u64 tick_raw;
|
||||
u64 tick_gtod;
|
||||
u64 clock;
|
||||
s64 multi;
|
||||
#ifdef CONFIG_NO_HZ
|
||||
int check_max;
|
||||
#endif
|
||||
};
|
||||
|
||||
static DEFINE_PER_CPU_SHARED_ALIGNED(struct sched_clock_data, sched_clock_data);
|
||||
@@ -71,41 +83,91 @@ void sched_clock_init(void)
|
||||
struct sched_clock_data *scd = cpu_sdc(cpu);
|
||||
|
||||
scd->lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
|
||||
scd->prev_jiffies = now_jiffies;
|
||||
scd->tick_jiffies = now_jiffies;
|
||||
scd->prev_raw = 0;
|
||||
scd->tick_raw = 0;
|
||||
scd->tick_gtod = ktime_now;
|
||||
scd->clock = ktime_now;
|
||||
scd->multi = 1 << MULTI_SHIFT;
|
||||
#ifdef CONFIG_NO_HZ
|
||||
scd->check_max = 1;
|
||||
#endif
|
||||
}
|
||||
|
||||
sched_clock_running = 1;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_NO_HZ
|
||||
/*
|
||||
* The dynamic ticks makes the delta jiffies inaccurate. This
|
||||
* prevents us from checking the maximum time update.
|
||||
* Disable the maximum check during stopped ticks.
|
||||
*/
|
||||
void sched_clock_tick_stop(int cpu)
|
||||
{
|
||||
struct sched_clock_data *scd = cpu_sdc(cpu);
|
||||
|
||||
scd->check_max = 0;
|
||||
}
|
||||
|
||||
void sched_clock_tick_start(int cpu)
|
||||
{
|
||||
struct sched_clock_data *scd = cpu_sdc(cpu);
|
||||
|
||||
scd->check_max = 1;
|
||||
}
|
||||
|
||||
static int check_max(struct sched_clock_data *scd)
|
||||
{
|
||||
return scd->check_max;
|
||||
}
|
||||
#else
|
||||
static int check_max(struct sched_clock_data *scd)
|
||||
{
|
||||
return 1;
|
||||
}
|
||||
#endif /* CONFIG_NO_HZ */
|
||||
|
||||
/*
|
||||
* update the percpu scd from the raw @now value
|
||||
*
|
||||
* - filter out backward motion
|
||||
* - use jiffies to generate a min,max window to clip the raw values
|
||||
*/
|
||||
static void __update_sched_clock(struct sched_clock_data *scd, u64 now)
|
||||
static void __update_sched_clock(struct sched_clock_data *scd, u64 now, u64 *time)
|
||||
{
|
||||
unsigned long now_jiffies = jiffies;
|
||||
long delta_jiffies = now_jiffies - scd->prev_jiffies;
|
||||
long delta_jiffies = now_jiffies - scd->tick_jiffies;
|
||||
u64 clock = scd->clock;
|
||||
u64 min_clock, max_clock;
|
||||
s64 delta = now - scd->prev_raw;
|
||||
|
||||
WARN_ON_ONCE(!irqs_disabled());
|
||||
min_clock = scd->tick_gtod + delta_jiffies * TICK_NSEC;
|
||||
|
||||
/*
|
||||
* At schedule tick the clock can be just under the gtod. We don't
|
||||
* want to push it too prematurely.
|
||||
*/
|
||||
min_clock = scd->tick_gtod + (delta_jiffies * TICK_NSEC);
|
||||
if (min_clock > TICK_NSEC)
|
||||
min_clock -= TICK_NSEC / 2;
|
||||
|
||||
if (unlikely(delta < 0)) {
|
||||
clock++;
|
||||
goto out;
|
||||
}
|
||||
|
||||
max_clock = min_clock + TICK_NSEC;
|
||||
/*
|
||||
* The clock must stay within a jiffie of the gtod.
|
||||
* But since we may be at the start of a jiffy or the end of one
|
||||
* we add another jiffy buffer.
|
||||
*/
|
||||
max_clock = scd->tick_gtod + (2 + delta_jiffies) * TICK_NSEC;
|
||||
|
||||
if (unlikely(clock + delta > max_clock)) {
|
||||
delta *= scd->multi;
|
||||
delta >>= MULTI_SHIFT;
|
||||
|
||||
if (unlikely(clock + delta > max_clock) && check_max(scd)) {
|
||||
if (clock < max_clock)
|
||||
clock = max_clock;
|
||||
else
|
||||
@@ -118,9 +180,12 @@ static void __update_sched_clock(struct sched_clock_data *scd, u64 now)
|
||||
if (unlikely(clock < min_clock))
|
||||
clock = min_clock;
|
||||
|
||||
scd->prev_raw = now;
|
||||
scd->prev_jiffies = now_jiffies;
|
||||
scd->clock = clock;
|
||||
if (time)
|
||||
*time = clock;
|
||||
else {
|
||||
scd->prev_raw = now;
|
||||
scd->clock = clock;
|
||||
}
|
||||
}
|
||||
|
||||
static void lock_double_clock(struct sched_clock_data *data1,
|
||||
@@ -160,25 +225,30 @@ u64 sched_clock_cpu(int cpu)
|
||||
now -= my_scd->tick_raw;
|
||||
now += scd->tick_raw;
|
||||
|
||||
now -= my_scd->tick_gtod;
|
||||
now += scd->tick_gtod;
|
||||
now += my_scd->tick_gtod;
|
||||
now -= scd->tick_gtod;
|
||||
|
||||
__raw_spin_unlock(&my_scd->lock);
|
||||
|
||||
__update_sched_clock(scd, now, &clock);
|
||||
|
||||
__raw_spin_unlock(&scd->lock);
|
||||
|
||||
} else {
|
||||
__raw_spin_lock(&scd->lock);
|
||||
__update_sched_clock(scd, now, NULL);
|
||||
clock = scd->clock;
|
||||
__raw_spin_unlock(&scd->lock);
|
||||
}
|
||||
|
||||
__update_sched_clock(scd, now);
|
||||
clock = scd->clock;
|
||||
|
||||
__raw_spin_unlock(&scd->lock);
|
||||
|
||||
return clock;
|
||||
}
|
||||
|
||||
void sched_clock_tick(void)
|
||||
{
|
||||
struct sched_clock_data *scd = this_scd();
|
||||
unsigned long now_jiffies = jiffies;
|
||||
s64 mult, delta_gtod, delta_raw;
|
||||
u64 now, now_gtod;
|
||||
|
||||
if (unlikely(!sched_clock_running))
|
||||
@@ -186,18 +256,33 @@ void sched_clock_tick(void)
|
||||
|
||||
WARN_ON_ONCE(!irqs_disabled());
|
||||
|
||||
now = sched_clock();
|
||||
now_gtod = ktime_to_ns(ktime_get());
|
||||
now = sched_clock();
|
||||
|
||||
__raw_spin_lock(&scd->lock);
|
||||
__update_sched_clock(scd, now);
|
||||
__update_sched_clock(scd, now, NULL);
|
||||
/*
|
||||
* update tick_gtod after __update_sched_clock() because that will
|
||||
* already observe 1 new jiffy; adding a new tick_gtod to that would
|
||||
* increase the clock 2 jiffies.
|
||||
*/
|
||||
delta_gtod = now_gtod - scd->tick_gtod;
|
||||
delta_raw = now - scd->tick_raw;
|
||||
|
||||
if ((long)delta_raw > 0) {
|
||||
mult = delta_gtod << MULTI_SHIFT;
|
||||
do_div(mult, delta_raw);
|
||||
scd->multi = mult;
|
||||
if (scd->multi > MAX_MULTI)
|
||||
scd->multi = MAX_MULTI;
|
||||
else if (scd->multi < MIN_MULTI)
|
||||
scd->multi = MIN_MULTI;
|
||||
} else
|
||||
scd->multi = 1 << MULTI_SHIFT;
|
||||
|
||||
scd->tick_raw = now;
|
||||
scd->tick_gtod = now_gtod;
|
||||
scd->tick_jiffies = now_jiffies;
|
||||
__raw_spin_unlock(&scd->lock);
|
||||
}
|
||||
|
||||
@@ -227,6 +312,7 @@ void sched_clock_idle_wakeup_event(u64 delta_ns)
|
||||
__raw_spin_lock(&scd->lock);
|
||||
scd->prev_raw = now;
|
||||
scd->clock += delta_ns;
|
||||
scd->multi = 1 << MULTI_SHIFT;
|
||||
__raw_spin_unlock(&scd->lock);
|
||||
|
||||
touch_softlockup_watchdog();
|
||||
@@ -244,3 +330,16 @@ unsigned long long __attribute__((weak)) sched_clock(void)
|
||||
{
|
||||
return (unsigned long long)jiffies * (NSEC_PER_SEC / HZ);
|
||||
}
|
||||
|
||||
unsigned long long cpu_clock(int cpu)
|
||||
{
|
||||
unsigned long long clock;
|
||||
unsigned long flags;
|
||||
|
||||
local_irq_save(flags);
|
||||
clock = sched_clock_cpu(cpu);
|
||||
local_irq_restore(flags);
|
||||
|
||||
return clock;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(cpu_clock);
|
||||
|
||||
@@ -0,0 +1,174 @@
|
||||
/*
|
||||
* kernel/sched_cpupri.c
|
||||
*
|
||||
* CPU priority management
|
||||
*
|
||||
* Copyright (C) 2007-2008 Novell
|
||||
*
|
||||
* Author: Gregory Haskins <ghaskins@novell.com>
|
||||
*
|
||||
* This code tracks the priority of each CPU so that global migration
|
||||
* decisions are easy to calculate. Each CPU can be in a state as follows:
|
||||
*
|
||||
* (INVALID), IDLE, NORMAL, RT1, ... RT99
|
||||
*
|
||||
* going from the lowest priority to the highest. CPUs in the INVALID state
|
||||
* are not eligible for routing. The system maintains this state with
|
||||
* a 2 dimensional bitmap (the first for priority class, the second for cpus
|
||||
* in that class). Therefore a typical application without affinity
|
||||
* restrictions can find a suitable CPU with O(1) complexity (e.g. two bit
|
||||
* searches). For tasks with affinity restrictions, the algorithm has a
|
||||
* worst case complexity of O(min(102, nr_domcpus)), though the scenario that
|
||||
* yields the worst case search is fairly contrived.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License
|
||||
* as published by the Free Software Foundation; version 2
|
||||
* of the License.
|
||||
*/
|
||||
|
||||
#include "sched_cpupri.h"
|
||||
|
||||
/* Convert between a 140 based task->prio, and our 102 based cpupri */
|
||||
static int convert_prio(int prio)
|
||||
{
|
||||
int cpupri;
|
||||
|
||||
if (prio == CPUPRI_INVALID)
|
||||
cpupri = CPUPRI_INVALID;
|
||||
else if (prio == MAX_PRIO)
|
||||
cpupri = CPUPRI_IDLE;
|
||||
else if (prio >= MAX_RT_PRIO)
|
||||
cpupri = CPUPRI_NORMAL;
|
||||
else
|
||||
cpupri = MAX_RT_PRIO - prio + 1;
|
||||
|
||||
return cpupri;
|
||||
}
|
||||
|
||||
#define for_each_cpupri_active(array, idx) \
|
||||
for (idx = find_first_bit(array, CPUPRI_NR_PRIORITIES); \
|
||||
idx < CPUPRI_NR_PRIORITIES; \
|
||||
idx = find_next_bit(array, CPUPRI_NR_PRIORITIES, idx+1))
|
||||
|
||||
/**
|
||||
* cpupri_find - find the best (lowest-pri) CPU in the system
|
||||
* @cp: The cpupri context
|
||||
* @p: The task
|
||||
* @lowest_mask: A mask to fill in with selected CPUs
|
||||
*
|
||||
* Note: This function returns the recommended CPUs as calculated during the
|
||||
* current invokation. By the time the call returns, the CPUs may have in
|
||||
* fact changed priorities any number of times. While not ideal, it is not
|
||||
* an issue of correctness since the normal rebalancer logic will correct
|
||||
* any discrepancies created by racing against the uncertainty of the current
|
||||
* priority configuration.
|
||||
*
|
||||
* Returns: (int)bool - CPUs were found
|
||||
*/
|
||||
int cpupri_find(struct cpupri *cp, struct task_struct *p,
|
||||
cpumask_t *lowest_mask)
|
||||
{
|
||||
int idx = 0;
|
||||
int task_pri = convert_prio(p->prio);
|
||||
|
||||
for_each_cpupri_active(cp->pri_active, idx) {
|
||||
struct cpupri_vec *vec = &cp->pri_to_cpu[idx];
|
||||
cpumask_t mask;
|
||||
|
||||
if (idx >= task_pri)
|
||||
break;
|
||||
|
||||
cpus_and(mask, p->cpus_allowed, vec->mask);
|
||||
|
||||
if (cpus_empty(mask))
|
||||
continue;
|
||||
|
||||
*lowest_mask = mask;
|
||||
return 1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* cpupri_set - update the cpu priority setting
|
||||
* @cp: The cpupri context
|
||||
* @cpu: The target cpu
|
||||
* @pri: The priority (INVALID-RT99) to assign to this CPU
|
||||
*
|
||||
* Note: Assumes cpu_rq(cpu)->lock is locked
|
||||
*
|
||||
* Returns: (void)
|
||||
*/
|
||||
void cpupri_set(struct cpupri *cp, int cpu, int newpri)
|
||||
{
|
||||
int *currpri = &cp->cpu_to_pri[cpu];
|
||||
int oldpri = *currpri;
|
||||
unsigned long flags;
|
||||
|
||||
newpri = convert_prio(newpri);
|
||||
|
||||
BUG_ON(newpri >= CPUPRI_NR_PRIORITIES);
|
||||
|
||||
if (newpri == oldpri)
|
||||
return;
|
||||
|
||||
/*
|
||||
* If the cpu was currently mapped to a different value, we
|
||||
* first need to unmap the old value
|
||||
*/
|
||||
if (likely(oldpri != CPUPRI_INVALID)) {
|
||||
struct cpupri_vec *vec = &cp->pri_to_cpu[oldpri];
|
||||
|
||||
spin_lock_irqsave(&vec->lock, flags);
|
||||
|
||||
vec->count--;
|
||||
if (!vec->count)
|
||||
clear_bit(oldpri, cp->pri_active);
|
||||
cpu_clear(cpu, vec->mask);
|
||||
|
||||
spin_unlock_irqrestore(&vec->lock, flags);
|
||||
}
|
||||
|
||||
if (likely(newpri != CPUPRI_INVALID)) {
|
||||
struct cpupri_vec *vec = &cp->pri_to_cpu[newpri];
|
||||
|
||||
spin_lock_irqsave(&vec->lock, flags);
|
||||
|
||||
cpu_set(cpu, vec->mask);
|
||||
vec->count++;
|
||||
if (vec->count == 1)
|
||||
set_bit(newpri, cp->pri_active);
|
||||
|
||||
spin_unlock_irqrestore(&vec->lock, flags);
|
||||
}
|
||||
|
||||
*currpri = newpri;
|
||||
}
|
||||
|
||||
/**
|
||||
* cpupri_init - initialize the cpupri structure
|
||||
* @cp: The cpupri context
|
||||
*
|
||||
* Returns: (void)
|
||||
*/
|
||||
void cpupri_init(struct cpupri *cp)
|
||||
{
|
||||
int i;
|
||||
|
||||
memset(cp, 0, sizeof(*cp));
|
||||
|
||||
for (i = 0; i < CPUPRI_NR_PRIORITIES; i++) {
|
||||
struct cpupri_vec *vec = &cp->pri_to_cpu[i];
|
||||
|
||||
spin_lock_init(&vec->lock);
|
||||
vec->count = 0;
|
||||
cpus_clear(vec->mask);
|
||||
}
|
||||
|
||||
for_each_possible_cpu(i)
|
||||
cp->cpu_to_pri[i] = CPUPRI_INVALID;
|
||||
}
|
||||
|
||||
|
||||
@@ -0,0 +1,36 @@
|
||||
#ifndef _LINUX_CPUPRI_H
|
||||
#define _LINUX_CPUPRI_H
|
||||
|
||||
#include <linux/sched.h>
|
||||
|
||||
#define CPUPRI_NR_PRIORITIES (MAX_RT_PRIO + 2)
|
||||
#define CPUPRI_NR_PRI_WORDS BITS_TO_LONGS(CPUPRI_NR_PRIORITIES)
|
||||
|
||||
#define CPUPRI_INVALID -1
|
||||
#define CPUPRI_IDLE 0
|
||||
#define CPUPRI_NORMAL 1
|
||||
/* values 2-101 are RT priorities 0-99 */
|
||||
|
||||
struct cpupri_vec {
|
||||
spinlock_t lock;
|
||||
int count;
|
||||
cpumask_t mask;
|
||||
};
|
||||
|
||||
struct cpupri {
|
||||
struct cpupri_vec pri_to_cpu[CPUPRI_NR_PRIORITIES];
|
||||
long pri_active[CPUPRI_NR_PRI_WORDS];
|
||||
int cpu_to_pri[NR_CPUS];
|
||||
};
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
int cpupri_find(struct cpupri *cp,
|
||||
struct task_struct *p, cpumask_t *lowest_mask);
|
||||
void cpupri_set(struct cpupri *cp, int cpu, int pri);
|
||||
void cpupri_init(struct cpupri *cp);
|
||||
#else
|
||||
#define cpupri_set(cp, cpu, pri) do { } while (0)
|
||||
#define cpupri_init() do { } while (0)
|
||||
#endif
|
||||
|
||||
#endif /* _LINUX_CPUPRI_H */
|
||||
+59
-5
@@ -119,9 +119,7 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
|
||||
struct sched_entity *last;
|
||||
unsigned long flags;
|
||||
|
||||
#if !defined(CONFIG_CGROUP_SCHED) || !defined(CONFIG_USER_SCHED)
|
||||
SEQ_printf(m, "\ncfs_rq[%d]:\n", cpu);
|
||||
#else
|
||||
#if defined(CONFIG_CGROUP_SCHED) && defined(CONFIG_FAIR_GROUP_SCHED)
|
||||
char path[128] = "";
|
||||
struct cgroup *cgroup = NULL;
|
||||
struct task_group *tg = cfs_rq->tg;
|
||||
@@ -133,6 +131,8 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
|
||||
cgroup_path(cgroup, path, sizeof(path));
|
||||
|
||||
SEQ_printf(m, "\ncfs_rq[%d]:%s\n", cpu, path);
|
||||
#else
|
||||
SEQ_printf(m, "\ncfs_rq[%d]:\n", cpu);
|
||||
#endif
|
||||
|
||||
SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "exec_clock",
|
||||
@@ -162,11 +162,64 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
|
||||
SEQ_printf(m, " .%-30s: %ld\n", "nr_running", cfs_rq->nr_running);
|
||||
SEQ_printf(m, " .%-30s: %ld\n", "load", cfs_rq->load.weight);
|
||||
#ifdef CONFIG_SCHEDSTATS
|
||||
SEQ_printf(m, " .%-30s: %d\n", "bkl_count",
|
||||
rq->bkl_count);
|
||||
#define P(n) SEQ_printf(m, " .%-30s: %d\n", #n, rq->n);
|
||||
|
||||
P(yld_exp_empty);
|
||||
P(yld_act_empty);
|
||||
P(yld_both_empty);
|
||||
P(yld_count);
|
||||
|
||||
P(sched_switch);
|
||||
P(sched_count);
|
||||
P(sched_goidle);
|
||||
|
||||
P(ttwu_count);
|
||||
P(ttwu_local);
|
||||
|
||||
P(bkl_count);
|
||||
|
||||
#undef P
|
||||
#endif
|
||||
SEQ_printf(m, " .%-30s: %ld\n", "nr_spread_over",
|
||||
cfs_rq->nr_spread_over);
|
||||
#ifdef CONFIG_FAIR_GROUP_SCHED
|
||||
#ifdef CONFIG_SMP
|
||||
SEQ_printf(m, " .%-30s: %lu\n", "shares", cfs_rq->shares);
|
||||
#endif
|
||||
#endif
|
||||
}
|
||||
|
||||
void print_rt_rq(struct seq_file *m, int cpu, struct rt_rq *rt_rq)
|
||||
{
|
||||
#if defined(CONFIG_CGROUP_SCHED) && defined(CONFIG_RT_GROUP_SCHED)
|
||||
char path[128] = "";
|
||||
struct cgroup *cgroup = NULL;
|
||||
struct task_group *tg = rt_rq->tg;
|
||||
|
||||
if (tg)
|
||||
cgroup = tg->css.cgroup;
|
||||
|
||||
if (cgroup)
|
||||
cgroup_path(cgroup, path, sizeof(path));
|
||||
|
||||
SEQ_printf(m, "\nrt_rq[%d]:%s\n", cpu, path);
|
||||
#else
|
||||
SEQ_printf(m, "\nrt_rq[%d]:\n", cpu);
|
||||
#endif
|
||||
|
||||
|
||||
#define P(x) \
|
||||
SEQ_printf(m, " .%-30s: %Ld\n", #x, (long long)(rt_rq->x))
|
||||
#define PN(x) \
|
||||
SEQ_printf(m, " .%-30s: %Ld.%06ld\n", #x, SPLIT_NS(rt_rq->x))
|
||||
|
||||
P(rt_nr_running);
|
||||
P(rt_throttled);
|
||||
PN(rt_time);
|
||||
PN(rt_runtime);
|
||||
|
||||
#undef PN
|
||||
#undef P
|
||||
}
|
||||
|
||||
static void print_cpu(struct seq_file *m, int cpu)
|
||||
@@ -208,6 +261,7 @@ static void print_cpu(struct seq_file *m, int cpu)
|
||||
#undef PN
|
||||
|
||||
print_cfs_stats(m, cpu);
|
||||
print_rt_stats(m, cpu);
|
||||
|
||||
print_rq(m, rq, cpu);
|
||||
}
|
||||
|
||||
+290
-123
File diff suppressed because it is too large
Load Diff
@@ -1,4 +1,5 @@
|
||||
SCHED_FEAT(NEW_FAIR_SLEEPERS, 1)
|
||||
SCHED_FEAT(NORMALIZED_SLEEPER, 1)
|
||||
SCHED_FEAT(WAKEUP_PREEMPT, 1)
|
||||
SCHED_FEAT(START_DEBIT, 1)
|
||||
SCHED_FEAT(AFFINE_WAKEUPS, 1)
|
||||
@@ -6,5 +7,7 @@ SCHED_FEAT(CACHE_HOT_BUDDY, 1)
|
||||
SCHED_FEAT(SYNC_WAKEUPS, 1)
|
||||
SCHED_FEAT(HRTICK, 1)
|
||||
SCHED_FEAT(DOUBLE_TICK, 0)
|
||||
SCHED_FEAT(NORMALIZED_SLEEPER, 1)
|
||||
SCHED_FEAT(DEADLINE, 1)
|
||||
SCHED_FEAT(ASYM_GRAN, 1)
|
||||
SCHED_FEAT(LB_BIAS, 0)
|
||||
SCHED_FEAT(LB_WAKEUP_UPDATE, 1)
|
||||
SCHED_FEAT(ASYM_EFF_LOAD, 1)
|
||||
|
||||
+264
-141
File diff suppressed because it is too large
Load Diff
+33
-9
@@ -118,6 +118,13 @@ rq_sched_info_depart(struct rq *rq, unsigned long long delta)
|
||||
if (rq)
|
||||
rq->rq_sched_info.cpu_time += delta;
|
||||
}
|
||||
|
||||
static inline void
|
||||
rq_sched_info_dequeued(struct rq *rq, unsigned long long delta)
|
||||
{
|
||||
if (rq)
|
||||
rq->rq_sched_info.run_delay += delta;
|
||||
}
|
||||
# define schedstat_inc(rq, field) do { (rq)->field++; } while (0)
|
||||
# define schedstat_add(rq, field, amt) do { (rq)->field += (amt); } while (0)
|
||||
# define schedstat_set(var, val) do { var = (val); } while (0)
|
||||
@@ -126,6 +133,9 @@ static inline void
|
||||
rq_sched_info_arrive(struct rq *rq, unsigned long long delta)
|
||||
{}
|
||||
static inline void
|
||||
rq_sched_info_dequeued(struct rq *rq, unsigned long long delta)
|
||||
{}
|
||||
static inline void
|
||||
rq_sched_info_depart(struct rq *rq, unsigned long long delta)
|
||||
{}
|
||||
# define schedstat_inc(rq, field) do { } while (0)
|
||||
@@ -134,6 +144,11 @@ rq_sched_info_depart(struct rq *rq, unsigned long long delta)
|
||||
#endif
|
||||
|
||||
#if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT)
|
||||
static inline void sched_info_reset_dequeued(struct task_struct *t)
|
||||
{
|
||||
t->sched_info.last_queued = 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Called when a process is dequeued from the active array and given
|
||||
* the cpu. We should note that with the exception of interactive
|
||||
@@ -143,15 +158,22 @@ rq_sched_info_depart(struct rq *rq, unsigned long long delta)
|
||||
* active queue, thus delaying tasks in the expired queue from running;
|
||||
* see scheduler_tick()).
|
||||
*
|
||||
* This function is only called from sched_info_arrive(), rather than
|
||||
* dequeue_task(). Even though a task may be queued and dequeued multiple
|
||||
* times as it is shuffled about, we're really interested in knowing how
|
||||
* long it was from the *first* time it was queued to the time that it
|
||||
* finally hit a cpu.
|
||||
* Though we are interested in knowing how long it was from the *first* time a
|
||||
* task was queued to the time that it finally hit a cpu, we call this routine
|
||||
* from dequeue_task() to account for possible rq->clock skew across cpus. The
|
||||
* delta taken on each cpu would annul the skew.
|
||||
*/
|
||||
static inline void sched_info_dequeued(struct task_struct *t)
|
||||
{
|
||||
t->sched_info.last_queued = 0;
|
||||
unsigned long long now = task_rq(t)->clock, delta = 0;
|
||||
|
||||
if (unlikely(sched_info_on()))
|
||||
if (t->sched_info.last_queued)
|
||||
delta = now - t->sched_info.last_queued;
|
||||
sched_info_reset_dequeued(t);
|
||||
t->sched_info.run_delay += delta;
|
||||
|
||||
rq_sched_info_dequeued(task_rq(t), delta);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -165,7 +187,7 @@ static void sched_info_arrive(struct task_struct *t)
|
||||
|
||||
if (t->sched_info.last_queued)
|
||||
delta = now - t->sched_info.last_queued;
|
||||
sched_info_dequeued(t);
|
||||
sched_info_reset_dequeued(t);
|
||||
t->sched_info.run_delay += delta;
|
||||
t->sched_info.last_arrival = now;
|
||||
t->sched_info.pcount++;
|
||||
@@ -242,7 +264,9 @@ sched_info_switch(struct task_struct *prev, struct task_struct *next)
|
||||
__sched_info_switch(prev, next);
|
||||
}
|
||||
#else
|
||||
#define sched_info_queued(t) do { } while (0)
|
||||
#define sched_info_switch(t, next) do { } while (0)
|
||||
#define sched_info_queued(t) do { } while (0)
|
||||
#define sched_info_reset_dequeued(t) do { } while (0)
|
||||
#define sched_info_dequeued(t) do { } while (0)
|
||||
#define sched_info_switch(t, next) do { } while (0)
|
||||
#endif /* CONFIG_SCHEDSTATS || CONFIG_TASK_DELAY_ACCT */
|
||||
|
||||
|
||||
@@ -264,6 +264,14 @@ static struct ctl_table kern_table[] = {
|
||||
.extra1 = &min_wakeup_granularity_ns,
|
||||
.extra2 = &max_wakeup_granularity_ns,
|
||||
},
|
||||
{
|
||||
.ctl_name = CTL_UNNUMBERED,
|
||||
.procname = "sched_shares_ratelimit",
|
||||
.data = &sysctl_sched_shares_ratelimit,
|
||||
.maxlen = sizeof(unsigned int),
|
||||
.mode = 0644,
|
||||
.proc_handler = &proc_dointvec,
|
||||
},
|
||||
{
|
||||
.ctl_name = CTL_UNNUMBERED,
|
||||
.procname = "sched_child_runs_first",
|
||||
|
||||
@@ -276,6 +276,7 @@ void tick_nohz_stop_sched_tick(void)
|
||||
ts->tick_stopped = 1;
|
||||
ts->idle_jiffies = last_jiffies;
|
||||
rcu_enter_nohz();
|
||||
sched_clock_tick_stop(cpu);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -375,6 +376,7 @@ void tick_nohz_restart_sched_tick(void)
|
||||
select_nohz_load_balancer(0);
|
||||
now = ktime_get();
|
||||
tick_do_update_jiffies64(now);
|
||||
sched_clock_tick_start(cpu);
|
||||
cpu_clear(cpu, nohz_cpu_mask);
|
||||
|
||||
/*
|
||||
|
||||
Reference in New Issue
Block a user