Merge tag 'sched-urgent-2024-05-18' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull scheduler fixes from Ingo Molnar:

 - Fix a sched_balance_newidle setting bug

 - Fix bug in the setting of /sys/fs/cgroup/test/cpu.max.burst

 - Fix variable-shadowing build warning

 - Extend sched-domains debug output

 - Fix documentation

 - Fix comments

* tag 'sched-urgent-2024-05-18' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  sched/core: Fix incorrect initialization of the 'burst' parameter in cpu_max_write()
  sched/fair: Remove stale FREQUENCY_UTIL comment
  sched/fair: Fix initial util_avg calculation
  docs: cgroup-v1: Clarify that domain levels are system-specific
  sched/debug: Dump domains' level
  sched/fair: Allow disabling sched_balance_newidle with sched_relax_domain_level
  arch/topology: Fix variable naming to avoid shadowing
This commit is contained in:
Linus Torvalds
2024-05-19 11:38:15 -07:00
7 changed files with 19 additions and 12 deletions

View File

@@ -568,7 +568,7 @@ on the next tick. For some applications in special situation, waiting
The 'cpuset.sched_relax_domain_level' file allows you to request changing
this searching range as you like. This file takes int value which
indicates size of searching range in levels ideally as follows,
indicates size of searching range in levels approximately as follows,
otherwise initial value -1 that indicates the cpuset has no request.
====== ===========================================================
@@ -581,6 +581,11 @@ otherwise initial value -1 that indicates the cpuset has no request.
5 search system wide [on NUMA system]
====== ===========================================================
Not all levels can be present and values can change depending on the
system architecture and kernel configuration. Check
/sys/kernel/debug/sched/domains/cpu*/domain*/ for system-specific
details.
The system default is architecture dependent. The system default
can be changed using the relax_domain_level= boot parameter.

View File

@@ -179,7 +179,7 @@ DEFINE_PER_CPU(unsigned long, hw_pressure);
void topology_update_hw_pressure(const struct cpumask *cpus,
unsigned long capped_freq)
{
unsigned long max_capacity, capacity, hw_pressure;
unsigned long max_capacity, capacity, pressure;
u32 max_freq;
int cpu;
@@ -196,12 +196,12 @@ void topology_update_hw_pressure(const struct cpumask *cpus,
else
capacity = mult_frac(max_capacity, capped_freq, max_freq);
hw_pressure = max_capacity - capacity;
pressure = max_capacity - capacity;
trace_hw_pressure_update(cpu, hw_pressure);
trace_hw_pressure_update(cpu, pressure);
for_each_cpu(cpu, cpus)
WRITE_ONCE(per_cpu(hw_pressure, cpu), hw_pressure);
WRITE_ONCE(per_cpu(hw_pressure, cpu), pressure);
}
EXPORT_SYMBOL_GPL(topology_update_hw_pressure);

View File

@@ -2941,7 +2941,7 @@ bool current_cpuset_is_being_rebound(void)
static int update_relax_domain_level(struct cpuset *cs, s64 val)
{
#ifdef CONFIG_SMP
if (val < -1 || val >= sched_domain_level_max)
if (val < -1 || val > sched_domain_level_max + 1)
return -EINVAL;
#endif

View File

@@ -11401,7 +11401,7 @@ static ssize_t cpu_max_write(struct kernfs_open_file *of,
{
struct task_group *tg = css_tg(of_css(of));
u64 period = tg_get_cfs_period(tg);
u64 burst = tg_get_cfs_burst(tg);
u64 burst = tg->cfs_bandwidth.burst;
u64 quota;
int ret;

View File

@@ -425,6 +425,7 @@ static void register_sd(struct sched_domain *sd, struct dentry *parent)
debugfs_create_file("flags", 0444, parent, &sd->flags, &sd_flags_fops);
debugfs_create_file("groups_flags", 0444, parent, &sd->groups->flags, &sd_flags_fops);
debugfs_create_u32("level", 0444, parent, (u32 *)&sd->level);
}
void update_sched_domain_debugfs(void)

View File

@@ -1030,7 +1030,8 @@ void init_entity_runnable_average(struct sched_entity *se)
* With new tasks being created, their initial util_avgs are extrapolated
* based on the cfs_rq's current util_avg:
*
* util_avg = cfs_rq->util_avg / (cfs_rq->load_avg + 1) * se.load.weight
* util_avg = cfs_rq->avg.util_avg / (cfs_rq->avg.load_avg + 1)
* * se_weight(se)
*
* However, in many cases, the above util_avg does not give a desired
* value. Moreover, the sum of the util_avgs may be divergent, such
@@ -1077,7 +1078,7 @@ void post_init_entity_util_avg(struct task_struct *p)
if (cap > 0) {
if (cfs_rq->avg.util_avg != 0) {
sa->util_avg = cfs_rq->avg.util_avg * se->load.weight;
sa->util_avg = cfs_rq->avg.util_avg * se_weight(se);
sa->util_avg /= (cfs_rq->avg.load_avg + 1);
if (sa->util_avg > cap)
@@ -7898,8 +7899,8 @@ eenv_pd_max_util(struct energy_env *eenv, struct cpumask *pd_cpus,
* Performance domain frequency: utilization clamping
* must be considered since it affects the selection
* of the performance domain frequency.
* NOTE: in case RT tasks are running, by default the
* FREQUENCY_UTIL's utilization can be max OPP.
* NOTE: in case RT tasks are running, by default the min
* utilization can be max OPP.
*/
eff_util = effective_cpu_util(cpu, util, &min, &max);

View File

@@ -1474,7 +1474,7 @@ static void set_domain_attribute(struct sched_domain *sd,
} else
request = attr->relax_domain_level;
if (sd->level > request) {
if (sd->level >= request) {
/* Turn off idle balance on this domain: */
sd->flags &= ~(SD_BALANCE_WAKE|SD_BALANCE_NEWIDLE);
}