Merge tag 'v4.13-rc7' into for-4.14/block-postmerge

Linux 4.13-rc7

Signed-off-by: Jens Axboe <axboe@kernel.dk>
This commit is contained in:
Jens Axboe
2017-08-28 13:00:44 -06:00
1034 changed files with 15419 additions and 18261 deletions
+8 -6
View File
@@ -66,7 +66,7 @@ static struct fsnotify_group *audit_watch_group;
/* fsnotify events we care about. */
#define AUDIT_FS_WATCH (FS_MOVE | FS_CREATE | FS_DELETE | FS_DELETE_SELF |\
FS_MOVE_SELF | FS_EVENT_ON_CHILD)
FS_MOVE_SELF | FS_EVENT_ON_CHILD | FS_UNMOUNT)
static void audit_free_parent(struct audit_parent *parent)
{
@@ -457,13 +457,15 @@ void audit_remove_watch_rule(struct audit_krule *krule)
list_del(&krule->rlist);
if (list_empty(&watch->rules)) {
/*
* audit_remove_watch() drops our reference to 'parent' which
* can get freed. Grab our own reference to be safe.
*/
audit_get_parent(parent);
audit_remove_watch(watch);
if (list_empty(&parent->watches)) {
audit_get_parent(parent);
if (list_empty(&parent->watches))
fsnotify_destroy_mark(&parent->mark, audit_watch_group);
audit_put_parent(parent);
}
audit_put_parent(parent);
}
}
+2 -2
View File
@@ -1289,7 +1289,7 @@ static int bpf_prog_get_info_by_fd(struct bpf_prog *prog,
info_len = min_t(u32, sizeof(info), info_len);
if (copy_from_user(&info, uinfo, info_len))
return err;
return -EFAULT;
info.type = prog->type;
info.id = prog->aux->id;
@@ -1312,7 +1312,7 @@ static int bpf_prog_get_info_by_fd(struct bpf_prog *prog,
}
ulen = info.xlated_prog_len;
info.xlated_prog_len = bpf_prog_size(prog->len);
info.xlated_prog_len = bpf_prog_insn_size(prog);
if (info.xlated_prog_len && ulen) {
uinsns = u64_to_user_ptr(info.xlated_prog_insns);
ulen = min_t(u32, info.xlated_prog_len, ulen);
+15 -6
View File
@@ -1865,10 +1865,12 @@ static void adjust_reg_min_max_vals(struct bpf_verifier_env *env,
* do our normal operations to the register, we need to set the values
* to the min/max since they are undefined.
*/
if (min_val == BPF_REGISTER_MIN_RANGE)
dst_reg->min_value = BPF_REGISTER_MIN_RANGE;
if (max_val == BPF_REGISTER_MAX_RANGE)
dst_reg->max_value = BPF_REGISTER_MAX_RANGE;
if (opcode != BPF_SUB) {
if (min_val == BPF_REGISTER_MIN_RANGE)
dst_reg->min_value = BPF_REGISTER_MIN_RANGE;
if (max_val == BPF_REGISTER_MAX_RANGE)
dst_reg->max_value = BPF_REGISTER_MAX_RANGE;
}
switch (opcode) {
case BPF_ADD:
@@ -1879,10 +1881,17 @@ static void adjust_reg_min_max_vals(struct bpf_verifier_env *env,
dst_reg->min_align = min(src_align, dst_align);
break;
case BPF_SUB:
/* If one of our values was at the end of our ranges, then the
* _opposite_ value in the dst_reg goes to the end of our range.
*/
if (min_val == BPF_REGISTER_MIN_RANGE)
dst_reg->max_value = BPF_REGISTER_MAX_RANGE;
if (max_val == BPF_REGISTER_MAX_RANGE)
dst_reg->min_value = BPF_REGISTER_MIN_RANGE;
if (dst_reg->min_value != BPF_REGISTER_MIN_RANGE)
dst_reg->min_value -= min_val;
dst_reg->min_value -= max_val;
if (dst_reg->max_value != BPF_REGISTER_MAX_RANGE)
dst_reg->max_value -= max_val;
dst_reg->max_value -= min_val;
dst_reg->min_align = min(src_align, dst_align);
break;
case BPF_MUL:
+3
View File
@@ -33,6 +33,9 @@ struct cgroup_taskset {
struct list_head src_csets;
struct list_head dst_csets;
/* the number of tasks in the set */
int nr_tasks;
/* the subsys currently being processed */
int ssid;
+37 -29
View File
@@ -2007,6 +2007,8 @@ static void cgroup_migrate_add_task(struct task_struct *task,
if (!cset->mg_src_cgrp)
return;
mgctx->tset.nr_tasks++;
list_move_tail(&task->cg_list, &cset->mg_tasks);
if (list_empty(&cset->mg_node))
list_add_tail(&cset->mg_node,
@@ -2095,21 +2097,19 @@ static int cgroup_migrate_execute(struct cgroup_mgctx *mgctx)
struct css_set *cset, *tmp_cset;
int ssid, failed_ssid, ret;
/* methods shouldn't be called if no task is actually migrating */
if (list_empty(&tset->src_csets))
return 0;
/* check that we can legitimately attach to the cgroup */
do_each_subsys_mask(ss, ssid, mgctx->ss_mask) {
if (ss->can_attach) {
tset->ssid = ssid;
ret = ss->can_attach(tset);
if (ret) {
failed_ssid = ssid;
goto out_cancel_attach;
if (tset->nr_tasks) {
do_each_subsys_mask(ss, ssid, mgctx->ss_mask) {
if (ss->can_attach) {
tset->ssid = ssid;
ret = ss->can_attach(tset);
if (ret) {
failed_ssid = ssid;
goto out_cancel_attach;
}
}
}
} while_each_subsys_mask();
} while_each_subsys_mask();
}
/*
* Now that we're guaranteed success, proceed to move all tasks to
@@ -2138,25 +2138,29 @@ static int cgroup_migrate_execute(struct cgroup_mgctx *mgctx)
*/
tset->csets = &tset->dst_csets;
do_each_subsys_mask(ss, ssid, mgctx->ss_mask) {
if (ss->attach) {
tset->ssid = ssid;
ss->attach(tset);
}
} while_each_subsys_mask();
if (tset->nr_tasks) {
do_each_subsys_mask(ss, ssid, mgctx->ss_mask) {
if (ss->attach) {
tset->ssid = ssid;
ss->attach(tset);
}
} while_each_subsys_mask();
}
ret = 0;
goto out_release_tset;
out_cancel_attach:
do_each_subsys_mask(ss, ssid, mgctx->ss_mask) {
if (ssid == failed_ssid)
break;
if (ss->cancel_attach) {
tset->ssid = ssid;
ss->cancel_attach(tset);
}
} while_each_subsys_mask();
if (tset->nr_tasks) {
do_each_subsys_mask(ss, ssid, mgctx->ss_mask) {
if (ssid == failed_ssid)
break;
if (ss->cancel_attach) {
tset->ssid = ssid;
ss->cancel_attach(tset);
}
} while_each_subsys_mask();
}
out_release_tset:
spin_lock_irq(&css_set_lock);
list_splice_init(&tset->dst_csets, &tset->src_csets);
@@ -2998,11 +3002,11 @@ static ssize_t cgroup_subtree_control_write(struct kernfs_open_file *of,
cgrp->subtree_control &= ~disable;
ret = cgroup_apply_control(cgrp);
cgroup_finalize_control(cgrp, ret);
if (ret)
goto out_unlock;
kernfs_activate(cgrp->kn);
ret = 0;
out_unlock:
cgroup_kn_unlock(of->kn);
return ret ?: nbytes;
@@ -4670,6 +4674,10 @@ int __init cgroup_init(void)
if (ss->bind)
ss->bind(init_css_set.subsys[ssid]);
mutex_lock(&cgroup_mutex);
css_populate_dir(init_css_set.subsys[ssid]);
mutex_unlock(&cgroup_mutex);
}
/* init_css_set.subsys[] has been updated, re-hash */
+1
View File
@@ -63,6 +63,7 @@
#include <linux/cgroup.h>
#include <linux/wait.h>
DEFINE_STATIC_KEY_FALSE(cpusets_pre_enable_key);
DEFINE_STATIC_KEY_FALSE(cpusets_enabled_key);
/* See "Frequency meter" comments, below. */
+58 -28
View File
@@ -2217,6 +2217,33 @@ static int group_can_go_on(struct perf_event *event,
return can_add_hw;
}
/*
* Complement to update_event_times(). This computes the tstamp_* values to
* continue 'enabled' state from @now, and effectively discards the time
* between the prior tstamp_stopped and now (as we were in the OFF state, or
* just switched (context) time base).
*
* This further assumes '@event->state == INACTIVE' (we just came from OFF) and
* cannot have been scheduled in yet. And going into INACTIVE state means
* '@event->tstamp_stopped = @now'.
*
* Thus given the rules of update_event_times():
*
* total_time_enabled = tstamp_stopped - tstamp_enabled
* total_time_running = tstamp_stopped - tstamp_running
*
* We can insert 'tstamp_stopped == now' and reverse them to compute new
* tstamp_* values.
*/
static void __perf_event_enable_time(struct perf_event *event, u64 now)
{
WARN_ON_ONCE(event->state != PERF_EVENT_STATE_INACTIVE);
event->tstamp_stopped = now;
event->tstamp_enabled = now - event->total_time_enabled;
event->tstamp_running = now - event->total_time_running;
}
static void add_event_to_ctx(struct perf_event *event,
struct perf_event_context *ctx)
{
@@ -2224,9 +2251,12 @@ static void add_event_to_ctx(struct perf_event *event,
list_add_event(event, ctx);
perf_group_attach(event);
event->tstamp_enabled = tstamp;
event->tstamp_running = tstamp;
event->tstamp_stopped = tstamp;
/*
* We can be called with event->state == STATE_OFF when we create with
* .disabled = 1. In that case the IOC_ENABLE will call this function.
*/
if (event->state == PERF_EVENT_STATE_INACTIVE)
__perf_event_enable_time(event, tstamp);
}
static void ctx_sched_out(struct perf_event_context *ctx,
@@ -2471,10 +2501,11 @@ static void __perf_event_mark_enabled(struct perf_event *event)
u64 tstamp = perf_event_time(event);
event->state = PERF_EVENT_STATE_INACTIVE;
event->tstamp_enabled = tstamp - event->total_time_enabled;
__perf_event_enable_time(event, tstamp);
list_for_each_entry(sub, &event->sibling_list, group_entry) {
/* XXX should not be > INACTIVE if event isn't */
if (sub->state >= PERF_EVENT_STATE_INACTIVE)
sub->tstamp_enabled = tstamp - sub->total_time_enabled;
__perf_event_enable_time(sub, tstamp);
}
}
@@ -5090,7 +5121,7 @@ static void perf_mmap_open(struct vm_area_struct *vma)
atomic_inc(&event->rb->aux_mmap_count);
if (event->pmu->event_mapped)
event->pmu->event_mapped(event);
event->pmu->event_mapped(event, vma->vm_mm);
}
static void perf_pmu_output_stop(struct perf_event *event);
@@ -5113,7 +5144,7 @@ static void perf_mmap_close(struct vm_area_struct *vma)
unsigned long size = perf_data_size(rb);
if (event->pmu->event_unmapped)
event->pmu->event_unmapped(event);
event->pmu->event_unmapped(event, vma->vm_mm);
/*
* rb->aux_mmap_count will always drop before rb->mmap_count and
@@ -5411,7 +5442,7 @@ aux_unlock:
vma->vm_ops = &perf_mmap_vmops;
if (event->pmu->event_mapped)
event->pmu->event_mapped(event);
event->pmu->event_mapped(event, vma->vm_mm);
return ret;
}
@@ -10001,28 +10032,27 @@ SYSCALL_DEFINE5(perf_event_open,
goto err_context;
/*
* Do not allow to attach to a group in a different
* task or CPU context:
* Make sure we're both events for the same CPU;
* grouping events for different CPUs is broken; since
* you can never concurrently schedule them anyhow.
*/
if (move_group) {
/*
* Make sure we're both on the same task, or both
* per-cpu events.
*/
if (group_leader->ctx->task != ctx->task)
goto err_context;
if (group_leader->cpu != event->cpu)
goto err_context;
/*
* Make sure we're both events for the same CPU;
* grouping events for different CPUs is broken; since
* you can never concurrently schedule them anyhow.
*/
if (group_leader->cpu != event->cpu)
goto err_context;
} else {
if (group_leader->ctx != ctx)
goto err_context;
}
/*
* Make sure we're both on the same task, or both
* per-CPU events.
*/
if (group_leader->ctx->task != ctx->task)
goto err_context;
/*
* Do not allow to attach to a group in a different task
* or CPU context. If we're moving SW events, we'll fix
* this up later, so allow that.
*/
if (!move_group && group_leader->ctx != ctx)
goto err_context;
/*
* Only a group leader can be exclusive or pinned
+2 -1
View File
@@ -806,8 +806,9 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p,
mm_init_cpumask(mm);
mm_init_aio(mm);
mm_init_owner(mm, p);
RCU_INIT_POINTER(mm->exe_file, NULL);
mmu_notifier_mm_init(mm);
clear_tlb_flush_pending(mm);
init_tlb_flush_pending(mm);
#if defined(CONFIG_TRANSPARENT_HUGEPAGE) && !USE_SPLIT_PMD_PTLOCKS
mm->pmd_huge_pte = NULL;
#endif
+3 -2
View File
@@ -670,13 +670,14 @@ again:
* this reference was taken by ihold under the page lock
* pinning the inode in place so i_lock was unnecessary. The
* only way for this check to fail is if the inode was
* truncated in parallel so warn for now if this happens.
* truncated in parallel which is almost certainly an
* application bug. In such a case, just retry.
*
* We are not calling into get_futex_key_refs() in file-backed
* cases, therefore a successful atomic_inc return below will
* guarantee that get_futex_key() will still imply smp_mb(); (B).
*/
if (WARN_ON_ONCE(!atomic_inc_not_zero(&inode->i_count))) {
if (!atomic_inc_not_zero(&inode->i_count)) {
rcu_read_unlock();
put_page(page);
+8 -2
View File
@@ -1000,7 +1000,7 @@ EXPORT_SYMBOL_GPL(irq_set_chip_and_handler_name);
void irq_modify_status(unsigned int irq, unsigned long clr, unsigned long set)
{
unsigned long flags;
unsigned long flags, trigger, tmp;
struct irq_desc *desc = irq_get_desc_lock(irq, &flags, 0);
if (!desc)
@@ -1014,6 +1014,8 @@ void irq_modify_status(unsigned int irq, unsigned long clr, unsigned long set)
irq_settings_clr_and_set(desc, clr, set);
trigger = irqd_get_trigger_type(&desc->irq_data);
irqd_clear(&desc->irq_data, IRQD_NO_BALANCING | IRQD_PER_CPU |
IRQD_TRIGGER_MASK | IRQD_LEVEL | IRQD_MOVE_PCNTXT);
if (irq_settings_has_no_balance_set(desc))
@@ -1025,7 +1027,11 @@ void irq_modify_status(unsigned int irq, unsigned long clr, unsigned long set)
if (irq_settings_is_level(desc))
irqd_set(&desc->irq_data, IRQD_LEVEL);
irqd_set(&desc->irq_data, irq_settings_get_trigger_mask(desc));
tmp = irq_settings_get_trigger_mask(desc);
if (tmp != IRQ_TYPE_NONE)
trigger = tmp;
irqd_set(&desc->irq_data, trigger);
irq_put_desc_unlock(desc, flags);
}
+7 -2
View File
@@ -95,8 +95,13 @@ static bool migrate_one_irq(struct irq_desc *desc)
affinity = cpu_online_mask;
brokeaff = true;
}
err = irq_do_set_affinity(d, affinity, true);
/*
* Do not set the force argument of irq_do_set_affinity() as this
* disables the masking of offline CPUs from the supplied affinity
* mask and therefore might keep/reassign the irq to the outgoing
* CPU.
*/
err = irq_do_set_affinity(d, affinity, false);
if (err) {
pr_warn_ratelimited("IRQ%u: set affinity failed(%d).\n",
d->irq, err);
+2 -2
View File
@@ -165,7 +165,7 @@ irq_hw_number_t ipi_get_hwirq(unsigned int irq, unsigned int cpu)
struct irq_data *data = irq_get_irq_data(irq);
struct cpumask *ipimask = data ? irq_data_get_affinity_mask(data) : NULL;
if (!data || !ipimask || cpu > nr_cpu_ids)
if (!data || !ipimask || cpu >= nr_cpu_ids)
return INVALID_HWIRQ;
if (!cpumask_test_cpu(cpu, ipimask))
@@ -195,7 +195,7 @@ static int ipi_send_verify(struct irq_chip *chip, struct irq_data *data,
if (!chip->ipi_send_single && !chip->ipi_send_mask)
return -EINVAL;
if (cpu > nr_cpu_ids)
if (cpu >= nr_cpu_ids)
return -EINVAL;
if (dest) {
+23 -2
View File
@@ -70,6 +70,18 @@ static DECLARE_RWSEM(umhelper_sem);
static atomic_t kmod_concurrent_max = ATOMIC_INIT(MAX_KMOD_CONCURRENT);
static DECLARE_WAIT_QUEUE_HEAD(kmod_wq);
/*
* This is a restriction on having *all* MAX_KMOD_CONCURRENT threads
* running at the same time without returning. When this happens we
* believe you've somehow ended up with a recursive module dependency
* creating a loop.
*
* We have no option but to fail.
*
* Userspace should proactively try to detect and prevent these.
*/
#define MAX_KMOD_ALL_BUSY_TIMEOUT 5
/*
modprobe_path is set via /proc/sys.
*/
@@ -167,8 +179,17 @@ int __request_module(bool wait, const char *fmt, ...)
pr_warn_ratelimited("request_module: kmod_concurrent_max (%u) close to 0 (max_modprobes: %u), for module %s, throttling...",
atomic_read(&kmod_concurrent_max),
MAX_KMOD_CONCURRENT, module_name);
wait_event_interruptible(kmod_wq,
atomic_dec_if_positive(&kmod_concurrent_max) >= 0);
ret = wait_event_killable_timeout(kmod_wq,
atomic_dec_if_positive(&kmod_concurrent_max) >= 0,
MAX_KMOD_ALL_BUSY_TIMEOUT * HZ);
if (!ret) {
pr_warn_ratelimited("request_module: modprobe %s cannot be processed, kmod busy with %d threads for more than %d seconds now",
module_name, MAX_KMOD_CONCURRENT, MAX_KMOD_ALL_BUSY_TIMEOUT);
return -ETIME;
} else if (ret == -ERESTARTSYS) {
pr_warn_ratelimited("request_module: sigkill sent for modprobe %s, giving up", module_name);
return ret;
}
}
trace_module_request(module_name, wait, _RET_IP_);
+4 -10
View File
@@ -527,8 +527,11 @@ pid_t __task_pid_nr_ns(struct task_struct *task, enum pid_type type,
if (!ns)
ns = task_active_pid_ns(current);
if (likely(pid_alive(task))) {
if (type != PIDTYPE_PID)
if (type != PIDTYPE_PID) {
if (type == __PIDTYPE_TGID)
type = PIDTYPE_PID;
task = task->group_leader;
}
nr = pid_nr_ns(rcu_dereference(task->pids[type].pid), ns);
}
rcu_read_unlock();
@@ -537,12 +540,6 @@ pid_t __task_pid_nr_ns(struct task_struct *task, enum pid_type type,
}
EXPORT_SYMBOL(__task_pid_nr_ns);
pid_t task_tgid_nr_ns(struct task_struct *tsk, struct pid_namespace *ns)
{
return pid_nr_ns(task_tgid(tsk), ns);
}
EXPORT_SYMBOL(task_tgid_nr_ns);
struct pid_namespace *task_active_pid_ns(struct task_struct *tsk)
{
return ns_of_pid(task_pid(tsk));
@@ -575,13 +572,10 @@ struct pid *find_ge_pid(int nr, struct pid_namespace *ns)
*/
void __init pidhash_init(void)
{
unsigned int pidhash_size;
pid_hash = alloc_large_system_hash("PID", sizeof(*pid_hash), 0, 18,
HASH_EARLY | HASH_SMALL | HASH_ZERO,
&pidhash_shift, NULL,
0, 4096);
pidhash_size = 1U << pidhash_shift;
}
void __init pidmap_init(void)
+1 -1
View File
@@ -1650,7 +1650,7 @@ static unsigned long minimum_image_size(unsigned long saveable)
{
unsigned long size;
size = global_page_state(NR_SLAB_RECLAIMABLE)
size = global_node_page_state(NR_SLAB_RECLAIMABLE)
+ global_node_page_state(NR_ACTIVE_ANON)
+ global_node_page_state(NR_INACTIVE_ANON)
+ global_node_page_state(NR_ACTIVE_FILE)
+1 -1
View File
@@ -2069,7 +2069,7 @@ out:
/**
* try_to_wake_up_local - try to wake up a local task with rq lock held
* @p: the thread to be awakened
* @cookie: context's cookie for pinning
* @rf: request-queue flags for pinning
*
* Put @p on the run-queue if it's not already there. The caller must
* ensure that this_rq() is locked, @p is bound to this_rq() and not
+4 -3
View File
@@ -70,9 +70,10 @@ static void __wake_up_common(struct wait_queue_head *wq_head, unsigned int mode,
list_for_each_entry_safe(curr, next, &wq_head->head, entry) {
unsigned flags = curr->flags;
if (curr->func(curr, mode, wake_flags, key) &&
(flags & WQ_FLAG_EXCLUSIVE) && !--nr_exclusive)
int ret = curr->func(curr, mode, wake_flags, key);
if (ret < 0)
break;
if (ret && (flags & WQ_FLAG_EXCLUSIVE) && !--nr_exclusive)
break;
}
}
+12 -5
View File
@@ -1194,7 +1194,11 @@ force_sig_info(int sig, struct siginfo *info, struct task_struct *t)
recalc_sigpending_and_wake(t);
}
}
if (action->sa.sa_handler == SIG_DFL)
/*
* Don't clear SIGNAL_UNKILLABLE for traced tasks, users won't expect
* debugging to leave init killable.
*/
if (action->sa.sa_handler == SIG_DFL && !t->ptrace)
t->signal->flags &= ~SIGNAL_UNKILLABLE;
ret = specific_send_sig_info(sig, info, t);
spin_unlock_irqrestore(&t->sighand->siglock, flags);
@@ -3303,12 +3307,15 @@ SYSCALL_DEFINE1(sigpending, old_sigset_t __user *, set)
#ifdef CONFIG_COMPAT
COMPAT_SYSCALL_DEFINE1(sigpending, compat_old_sigset_t __user *, set32)
{
#ifdef __BIG_ENDIAN
sigset_t set;
int err = do_sigpending(&set, sizeof(old_sigset_t));
if (err == 0)
if (copy_to_user(set32, &set, sizeof(old_sigset_t)))
err = -EFAULT;
int err = do_sigpending(&set, sizeof(set.sig[0]));
if (!err)
err = put_user(set.sig[0], set32);
return err;
#else
return sys_rt_sigpending((sigset_t __user *)set32, sizeof(*set32));
#endif
}
#endif
+42 -10
View File
@@ -203,6 +203,7 @@ struct timer_base {
bool migration_enabled;
bool nohz_active;
bool is_idle;
bool must_forward_clk;
DECLARE_BITMAP(pending_map, WHEEL_SIZE);
struct hlist_head vectors[WHEEL_SIZE];
} ____cacheline_aligned;
@@ -856,13 +857,19 @@ get_target_base(struct timer_base *base, unsigned tflags)
static inline void forward_timer_base(struct timer_base *base)
{
unsigned long jnow = READ_ONCE(jiffies);
unsigned long jnow;
/*
* We only forward the base when it's idle and we have a delta between
* base clock and jiffies.
* We only forward the base when we are idle or have just come out of
* idle (must_forward_clk logic), and have a delta between base clock
* and jiffies. In the common case, run_timers will take care of it.
*/
if (!base->is_idle || (long) (jnow - base->clk) < 2)
if (likely(!base->must_forward_clk))
return;
jnow = READ_ONCE(jiffies);
base->must_forward_clk = base->is_idle;
if ((long)(jnow - base->clk) < 2)
return;
/*
@@ -938,6 +945,11 @@ __mod_timer(struct timer_list *timer, unsigned long expires, bool pending_only)
* same array bucket then just return:
*/
if (timer_pending(timer)) {
/*
* The downside of this optimization is that it can result in
* larger granularity than you would get from adding a new
* timer with this expiry.
*/
if (timer->expires == expires)
return 1;
@@ -948,6 +960,7 @@ __mod_timer(struct timer_list *timer, unsigned long expires, bool pending_only)
* dequeue/enqueue dance.
*/
base = lock_timer_base(timer, &flags);
forward_timer_base(base);
clk = base->clk;
idx = calc_wheel_index(expires, clk);
@@ -964,6 +977,7 @@ __mod_timer(struct timer_list *timer, unsigned long expires, bool pending_only)
}
} else {
base = lock_timer_base(timer, &flags);
forward_timer_base(base);
}
ret = detach_if_pending(timer, base, false);
@@ -991,12 +1005,10 @@ __mod_timer(struct timer_list *timer, unsigned long expires, bool pending_only)
raw_spin_lock(&base->lock);
WRITE_ONCE(timer->flags,
(timer->flags & ~TIMER_BASEMASK) | base->cpu);
forward_timer_base(base);
}
}
/* Try to forward a stale timer base clock */
forward_timer_base(base);
timer->expires = expires;
/*
* If 'idx' was calculated above and the base time did not advance
@@ -1112,6 +1124,7 @@ void add_timer_on(struct timer_list *timer, int cpu)
WRITE_ONCE(timer->flags,
(timer->flags & ~TIMER_BASEMASK) | cpu);
}
forward_timer_base(base);
debug_activate(timer, timer->expires);
internal_add_timer(base, timer);
@@ -1495,12 +1508,18 @@ u64 get_next_timer_interrupt(unsigned long basej, u64 basem)
base->is_idle = false;
} else {
if (!is_max_delta)
expires = basem + (nextevt - basej) * TICK_NSEC;
expires = basem + (u64)(nextevt - basej) * TICK_NSEC;
/*
* If we expect to sleep more than a tick, mark the base idle:
* If we expect to sleep more than a tick, mark the base idle.
* Also the tick is stopped so any added timer must forward
* the base clk itself to keep granularity small. This idle
* logic is only maintained for the BASE_STD base, deferrable
* timers may still see large granularity skew (by design).
*/
if ((expires - basem) > TICK_NSEC)
if ((expires - basem) > TICK_NSEC) {
base->must_forward_clk = true;
base->is_idle = true;
}
}
raw_spin_unlock(&base->lock);
@@ -1611,6 +1630,19 @@ static __latent_entropy void run_timer_softirq(struct softirq_action *h)
{
struct timer_base *base = this_cpu_ptr(&timer_bases[BASE_STD]);
/*
* must_forward_clk must be cleared before running timers so that any
* timer functions that call mod_timer will not try to forward the
* base. idle trcking / clock forwarding logic is only used with
* BASE_STD timers.
*
* The deferrable base does not do idle tracking at all, so we do
* not forward it. This can result in very large variations in
* granularity for deferrable timers, but they can be deferred for
* long periods due to idle.
*/
base->must_forward_clk = false;
__run_timers(base);
if (IS_ENABLED(CONFIG_NO_HZ_COMMON) && base->nohz_active)
__run_timers(this_cpu_ptr(&timer_bases[BASE_DEF]));
+30 -4
View File
@@ -204,10 +204,36 @@ BPF_CALL_5(bpf_trace_printk, char *, fmt, u32, fmt_size, u64, arg1,
fmt_cnt++;
}
return __trace_printk(1/* fake ip will not be printed */, fmt,
mod[0] == 2 ? arg1 : mod[0] == 1 ? (long) arg1 : (u32) arg1,
mod[1] == 2 ? arg2 : mod[1] == 1 ? (long) arg2 : (u32) arg2,
mod[2] == 2 ? arg3 : mod[2] == 1 ? (long) arg3 : (u32) arg3);
/* Horrid workaround for getting va_list handling working with different
* argument type combinations generically for 32 and 64 bit archs.
*/
#define __BPF_TP_EMIT() __BPF_ARG3_TP()
#define __BPF_TP(...) \
__trace_printk(1 /* Fake ip will not be printed. */, \
fmt, ##__VA_ARGS__)
#define __BPF_ARG1_TP(...) \
((mod[0] == 2 || (mod[0] == 1 && __BITS_PER_LONG == 64)) \
? __BPF_TP(arg1, ##__VA_ARGS__) \
: ((mod[0] == 1 || (mod[0] == 0 && __BITS_PER_LONG == 32)) \
? __BPF_TP((long)arg1, ##__VA_ARGS__) \
: __BPF_TP((u32)arg1, ##__VA_ARGS__)))
#define __BPF_ARG2_TP(...) \
((mod[1] == 2 || (mod[1] == 1 && __BITS_PER_LONG == 64)) \
? __BPF_ARG1_TP(arg2, ##__VA_ARGS__) \
: ((mod[1] == 1 || (mod[1] == 0 && __BITS_PER_LONG == 32)) \
? __BPF_ARG1_TP((long)arg2, ##__VA_ARGS__) \
: __BPF_ARG1_TP((u32)arg2, ##__VA_ARGS__)))
#define __BPF_ARG3_TP(...) \
((mod[2] == 2 || (mod[2] == 1 && __BITS_PER_LONG == 64)) \
? __BPF_ARG2_TP(arg3, ##__VA_ARGS__) \
: ((mod[2] == 1 || (mod[2] == 0 && __BITS_PER_LONG == 32)) \
? __BPF_ARG2_TP((long)arg3, ##__VA_ARGS__) \
: __BPF_ARG2_TP((u32)arg3, ##__VA_ARGS__)))
return __BPF_TP_EMIT();
}
static const struct bpf_func_proto bpf_trace_printk_proto = {

Some files were not shown because too many files have changed in this diff Show More