You've already forked linux-apfs
mirror of
https://github.com/linux-apfs/linux-apfs.git
synced 2026-05-01 15:00:59 -07:00
Merge back earlier cpufreq changes for v4.11.
This commit is contained in:
@@ -8,6 +8,8 @@
|
||||
|
||||
Dominik Brodowski <linux@brodo.de>
|
||||
David Kimdon <dwhedon@debian.org>
|
||||
Rafael J. Wysocki <rafael.j.wysocki@intel.com>
|
||||
Viresh Kumar <viresh.kumar@linaro.org>
|
||||
|
||||
|
||||
|
||||
@@ -36,10 +38,11 @@ speed limits (like LCD drivers on ARM architecture). Additionally, the
|
||||
kernel "constant" loops_per_jiffy is updated on frequency changes
|
||||
here.
|
||||
|
||||
Reference counting is done by cpufreq_get_cpu and cpufreq_put_cpu,
|
||||
which make sure that the cpufreq processor driver is correctly
|
||||
registered with the core, and will not be unloaded until
|
||||
cpufreq_put_cpu is called.
|
||||
Reference counting of the cpufreq policies is done by cpufreq_cpu_get
|
||||
and cpufreq_cpu_put, which make sure that the cpufreq driver is
|
||||
correctly registered with the core, and will not be unloaded until
|
||||
cpufreq_put_cpu is called. That also ensures that the respective cpufreq
|
||||
policy doesn't get freed while being used.
|
||||
|
||||
2. CPUFreq notifiers
|
||||
====================
|
||||
@@ -69,18 +72,16 @@ CPUFreq policy notifier is called twice for a policy transition:
|
||||
The phase is specified in the second argument to the notifier.
|
||||
|
||||
The third argument, a void *pointer, points to a struct cpufreq_policy
|
||||
consisting of five values: cpu, min, max, policy and max_cpu_freq. min
|
||||
and max are the lower and upper frequencies (in kHz) of the new
|
||||
policy, policy the new policy, cpu the number of the affected CPU; and
|
||||
max_cpu_freq the maximum supported CPU frequency. This value is given
|
||||
for informational purposes only.
|
||||
consisting of several values, including min, max (the lower and upper
|
||||
frequencies (in kHz) of the new policy).
|
||||
|
||||
|
||||
2.2 CPUFreq transition notifiers
|
||||
--------------------------------
|
||||
|
||||
These are notified twice when the CPUfreq driver switches the CPU core
|
||||
frequency and this change has any external implications.
|
||||
These are notified twice for each online CPU in the policy, when the
|
||||
CPUfreq driver switches the CPU core frequency and this change has no
|
||||
any external implications.
|
||||
|
||||
The second argument specifies the phase - CPUFREQ_PRECHANGE or
|
||||
CPUFREQ_POSTCHANGE.
|
||||
@@ -90,6 +91,7 @@ values:
|
||||
cpu - number of the affected CPU
|
||||
old - old frequency
|
||||
new - new frequency
|
||||
flags - flags of the cpufreq driver
|
||||
|
||||
3. CPUFreq Table Generation with Operating Performance Point (OPP)
|
||||
==================================================================
|
||||
|
||||
@@ -9,6 +9,8 @@
|
||||
|
||||
|
||||
Dominik Brodowski <linux@brodo.de>
|
||||
Rafael J. Wysocki <rafael.j.wysocki@intel.com>
|
||||
Viresh Kumar <viresh.kumar@linaro.org>
|
||||
|
||||
|
||||
|
||||
@@ -49,49 +51,65 @@ using cpufreq_register_driver()
|
||||
|
||||
What shall this struct cpufreq_driver contain?
|
||||
|
||||
cpufreq_driver.name - The name of this driver.
|
||||
.name - The name of this driver.
|
||||
|
||||
cpufreq_driver.init - A pointer to the per-CPU initialization
|
||||
function.
|
||||
.init - A pointer to the per-policy initialization function.
|
||||
|
||||
cpufreq_driver.verify - A pointer to a "verification" function.
|
||||
.verify - A pointer to a "verification" function.
|
||||
|
||||
cpufreq_driver.setpolicy _or_
|
||||
cpufreq_driver.target/
|
||||
target_index - See below on the differences.
|
||||
.setpolicy _or_ .fast_switch _or_ .target _or_ .target_index - See
|
||||
below on the differences.
|
||||
|
||||
And optionally
|
||||
|
||||
cpufreq_driver.exit - A pointer to a per-CPU cleanup
|
||||
function called during CPU_POST_DEAD
|
||||
phase of cpu hotplug process.
|
||||
.flags - Hints for the cpufreq core.
|
||||
|
||||
cpufreq_driver.stop_cpu - A pointer to a per-CPU stop function
|
||||
called during CPU_DOWN_PREPARE phase of
|
||||
cpu hotplug process.
|
||||
.driver_data - cpufreq driver specific data.
|
||||
|
||||
cpufreq_driver.resume - A pointer to a per-CPU resume function
|
||||
which is called with interrupts disabled
|
||||
and _before_ the pre-suspend frequency
|
||||
and/or policy is restored by a call to
|
||||
->target/target_index or ->setpolicy.
|
||||
.resolve_freq - Returns the most appropriate frequency for a target
|
||||
frequency. Doesn't change the frequency though.
|
||||
|
||||
cpufreq_driver.attr - A pointer to a NULL-terminated list of
|
||||
"struct freq_attr" which allow to
|
||||
export values to sysfs.
|
||||
.get_intermediate and target_intermediate - Used to switch to stable
|
||||
frequency while changing CPU frequency.
|
||||
|
||||
cpufreq_driver.get_intermediate
|
||||
and target_intermediate Used to switch to stable frequency while
|
||||
changing CPU frequency.
|
||||
.get - Returns current frequency of the CPU.
|
||||
|
||||
.bios_limit - Returns HW/BIOS max frequency limitations for the CPU.
|
||||
|
||||
.exit - A pointer to a per-policy cleanup function called during
|
||||
CPU_POST_DEAD phase of cpu hotplug process.
|
||||
|
||||
.stop_cpu - A pointer to a per-policy stop function called during
|
||||
CPU_DOWN_PREPARE phase of cpu hotplug process.
|
||||
|
||||
.suspend - A pointer to a per-policy suspend function which is called
|
||||
with interrupts disabled and _after_ the governor is stopped for the
|
||||
policy.
|
||||
|
||||
.resume - A pointer to a per-policy resume function which is called
|
||||
with interrupts disabled and _before_ the governor is started again.
|
||||
|
||||
.ready - A pointer to a per-policy ready function which is called after
|
||||
the policy is fully initialized.
|
||||
|
||||
.attr - A pointer to a NULL-terminated list of "struct freq_attr" which
|
||||
allow to export values to sysfs.
|
||||
|
||||
.boost_enabled - If set, boost frequencies are enabled.
|
||||
|
||||
.set_boost - A pointer to a per-policy function to enable/disable boost
|
||||
frequencies.
|
||||
|
||||
|
||||
1.2 Per-CPU Initialization
|
||||
--------------------------
|
||||
|
||||
Whenever a new CPU is registered with the device model, or after the
|
||||
cpufreq driver registers itself, the per-CPU initialization function
|
||||
cpufreq_driver.init is called. It takes a struct cpufreq_policy
|
||||
*policy as argument. What to do now?
|
||||
cpufreq driver registers itself, the per-policy initialization function
|
||||
cpufreq_driver.init is called if no cpufreq policy existed for the CPU.
|
||||
Note that the .init() and .exit() routines are called only once for the
|
||||
policy and not for each CPU managed by the policy. It takes a struct
|
||||
cpufreq_policy *policy as argument. What to do now?
|
||||
|
||||
If necessary, activate the CPUfreq support on your CPU.
|
||||
|
||||
@@ -117,47 +135,45 @@ policy->governor must contain the "default policy" for
|
||||
cpufreq_driver.setpolicy or
|
||||
cpufreq_driver.target/target_index is called
|
||||
with these values.
|
||||
policy->cpus Update this with the masks of the
|
||||
(online + offline) CPUs that do DVFS
|
||||
along with this CPU (i.e. that share
|
||||
clock/voltage rails with it).
|
||||
|
||||
For setting some of these values (cpuinfo.min[max]_freq, policy->min[max]), the
|
||||
frequency table helpers might be helpful. See the section 2 for more information
|
||||
on them.
|
||||
|
||||
SMP systems normally have same clock source for a group of cpus. For these the
|
||||
.init() would be called only once for the first online cpu. Here the .init()
|
||||
routine must initialize policy->cpus with mask of all possible cpus (Online +
|
||||
Offline) that share the clock. Then the core would copy this mask onto
|
||||
policy->related_cpus and will reset policy->cpus to carry only online cpus.
|
||||
|
||||
|
||||
1.3 verify
|
||||
------------
|
||||
----------
|
||||
|
||||
When the user decides a new policy (consisting of
|
||||
"policy,governor,min,max") shall be set, this policy must be validated
|
||||
so that incompatible values can be corrected. For verifying these
|
||||
values, a frequency table helper and/or the
|
||||
cpufreq_verify_within_limits(struct cpufreq_policy *policy, unsigned
|
||||
int min_freq, unsigned int max_freq) function might be helpful. See
|
||||
section 2 for details on frequency table helpers.
|
||||
values cpufreq_verify_within_limits(struct cpufreq_policy *policy,
|
||||
unsigned int min_freq, unsigned int max_freq) function might be helpful.
|
||||
See section 2 for details on frequency table helpers.
|
||||
|
||||
You need to make sure that at least one valid frequency (or operating
|
||||
range) is within policy->min and policy->max. If necessary, increase
|
||||
policy->max first, and only if this is no solution, decrease policy->min.
|
||||
|
||||
|
||||
1.4 target/target_index or setpolicy?
|
||||
----------------------------
|
||||
1.4 target or target_index or setpolicy or fast_switch?
|
||||
-------------------------------------------------------
|
||||
|
||||
Most cpufreq drivers or even most cpu frequency scaling algorithms
|
||||
only allow the CPU to be set to one frequency. For these, you use the
|
||||
->target/target_index call.
|
||||
only allow the CPU frequency to be set to predefined fixed values. For
|
||||
these, you use the ->target(), ->target_index() or ->fast_switch()
|
||||
callbacks.
|
||||
|
||||
Some cpufreq-capable processors switch the frequency between certain
|
||||
limits on their own. These shall use the ->setpolicy call
|
||||
Some cpufreq capable processors switch the frequency between certain
|
||||
limits on their own. These shall use the ->setpolicy() callback.
|
||||
|
||||
|
||||
1.5. target/target_index
|
||||
-------------
|
||||
------------------------
|
||||
|
||||
The target_index call has two arguments: struct cpufreq_policy *policy,
|
||||
and unsigned int index (into the exposed frequency table).
|
||||
@@ -186,9 +202,20 @@ actual frequency must be determined using the following rules:
|
||||
Here again the frequency table helper might assist you - see section 2
|
||||
for details.
|
||||
|
||||
1.6. fast_switch
|
||||
----------------
|
||||
|
||||
1.6 setpolicy
|
||||
---------------
|
||||
This function is used for frequency switching from scheduler's context.
|
||||
Not all drivers are expected to implement it, as sleeping from within
|
||||
this callback isn't allowed. This callback must be highly optimized to
|
||||
do switching as fast as possible.
|
||||
|
||||
This function has two arguments: struct cpufreq_policy *policy and
|
||||
unsigned int target_frequency.
|
||||
|
||||
|
||||
1.7 setpolicy
|
||||
-------------
|
||||
|
||||
The setpolicy call only takes a struct cpufreq_policy *policy as
|
||||
argument. You need to set the lower limit of the in-processor or
|
||||
@@ -198,7 +225,7 @@ setting when policy->policy is CPUFREQ_POLICY_PERFORMANCE, and a
|
||||
powersaving-oriented setting when CPUFREQ_POLICY_POWERSAVE. Also check
|
||||
the reference implementation in drivers/cpufreq/longrun.c
|
||||
|
||||
1.7 get_intermediate and target_intermediate
|
||||
1.8 get_intermediate and target_intermediate
|
||||
--------------------------------------------
|
||||
|
||||
Only for drivers with target_index() and CPUFREQ_ASYNC_NOTIFICATION unset.
|
||||
@@ -222,42 +249,36 @@ failures as core would send notifications for that.
|
||||
|
||||
As most cpufreq processors only allow for being set to a few specific
|
||||
frequencies, a "frequency table" with some functions might assist in
|
||||
some work of the processor driver. Such a "frequency table" consists
|
||||
of an array of struct cpufreq_frequency_table entries, with any value in
|
||||
"driver_data" you want to use, and the corresponding frequency in
|
||||
"frequency". At the end of the table, you need to add a
|
||||
cpufreq_frequency_table entry with frequency set to CPUFREQ_TABLE_END. And
|
||||
if you want to skip one entry in the table, set the frequency to
|
||||
CPUFREQ_ENTRY_INVALID. The entries don't need to be in ascending
|
||||
order.
|
||||
some work of the processor driver. Such a "frequency table" consists of
|
||||
an array of struct cpufreq_frequency_table entries, with driver specific
|
||||
values in "driver_data", the corresponding frequency in "frequency" and
|
||||
flags set. At the end of the table, you need to add a
|
||||
cpufreq_frequency_table entry with frequency set to CPUFREQ_TABLE_END.
|
||||
And if you want to skip one entry in the table, set the frequency to
|
||||
CPUFREQ_ENTRY_INVALID. The entries don't need to be in sorted in any
|
||||
particular order, but if they are cpufreq core will do DVFS a bit
|
||||
quickly for them as search for best match is faster.
|
||||
|
||||
By calling cpufreq_table_validate_and_show(struct cpufreq_policy *policy,
|
||||
struct cpufreq_frequency_table *table);
|
||||
the cpuinfo.min_freq and cpuinfo.max_freq values are detected, and
|
||||
policy->min and policy->max are set to the same values. This is
|
||||
helpful for the per-CPU initialization stage.
|
||||
By calling cpufreq_table_validate_and_show(), the cpuinfo.min_freq and
|
||||
cpuinfo.max_freq values are detected, and policy->min and policy->max
|
||||
are set to the same values. This is helpful for the per-CPU
|
||||
initialization stage.
|
||||
|
||||
int cpufreq_frequency_table_verify(struct cpufreq_policy *policy,
|
||||
struct cpufreq_frequency_table *table);
|
||||
assures that at least one valid frequency is within policy->min and
|
||||
policy->max, and all other criteria are met. This is helpful for the
|
||||
->verify call.
|
||||
cpufreq_frequency_table_verify() assures that at least one valid
|
||||
frequency is within policy->min and policy->max, and all other criteria
|
||||
are met. This is helpful for the ->verify call.
|
||||
|
||||
int cpufreq_frequency_table_target(struct cpufreq_policy *policy,
|
||||
unsigned int target_freq,
|
||||
unsigned int relation);
|
||||
|
||||
is the corresponding frequency table helper for the ->target
|
||||
stage. Just pass the values to this function, and this function
|
||||
returns the number of the frequency table entry which contains
|
||||
the frequency the CPU shall be set to.
|
||||
cpufreq_frequency_table_target() is the corresponding frequency table
|
||||
helper for the ->target stage. Just pass the values to this function,
|
||||
and this function returns the of the frequency table entry which
|
||||
contains the frequency the CPU shall be set to.
|
||||
|
||||
The following macros can be used as iterators over cpufreq_frequency_table:
|
||||
|
||||
cpufreq_for_each_entry(pos, table) - iterates over all entries of frequency
|
||||
table.
|
||||
|
||||
cpufreq-for_each_valid_entry(pos, table) - iterates over all entries,
|
||||
cpufreq_for_each_valid_entry(pos, table) - iterates over all entries,
|
||||
excluding CPUFREQ_ENTRY_INVALID frequencies.
|
||||
Use arguments "pos" - a cpufreq_frequency_table * as a loop cursor and
|
||||
"table" - the cpufreq_frequency_table * you want to iterate over.
|
||||
|
||||
@@ -34,10 +34,10 @@ cpufreq stats provides following statistics (explained in detail below).
|
||||
- total_trans
|
||||
- trans_table
|
||||
|
||||
All the statistics will be from the time the stats driver has been inserted
|
||||
to the time when a read of a particular statistic is done. Obviously, stats
|
||||
driver will not have any information about the frequency transitions before
|
||||
the stats driver insertion.
|
||||
All the statistics will be from the time the stats driver has been inserted
|
||||
(or the time the stats were reset) to the time when a read of a particular
|
||||
statistic is done. Obviously, stats driver will not have any information
|
||||
about the frequency transitions before the stats driver insertion.
|
||||
|
||||
--------------------------------------------------------------------------------
|
||||
<mysystem>:/sys/devices/system/cpu/cpu0/cpufreq/stats # ls -l
|
||||
@@ -110,25 +110,13 @@ Config Main Menu
|
||||
CPU Frequency scaling --->
|
||||
[*] CPU Frequency scaling
|
||||
[*] CPU frequency translation statistics
|
||||
[*] CPU frequency translation statistics details
|
||||
|
||||
|
||||
"CPU Frequency scaling" (CONFIG_CPU_FREQ) should be enabled to configure
|
||||
cpufreq-stats.
|
||||
|
||||
"CPU frequency translation statistics" (CONFIG_CPU_FREQ_STAT) provides the
|
||||
basic statistics which includes time_in_state and total_trans.
|
||||
statistics which includes time_in_state, total_trans and trans_table.
|
||||
|
||||
"CPU frequency translation statistics details" (CONFIG_CPU_FREQ_STAT_DETAILS)
|
||||
provides fine grained cpufreq stats by trans_table. The reason for having a
|
||||
separate config option for trans_table is:
|
||||
- trans_table goes against the traditional /sysfs rule of one value per
|
||||
interface. It provides a whole bunch of value in a 2 dimensional matrix
|
||||
form.
|
||||
|
||||
Once these two options are enabled and your CPU supports cpufrequency, you
|
||||
Once this option is enabled and your CPU supports cpufrequency, you
|
||||
will be able to see the CPU frequency statistics in /sysfs.
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -10,6 +10,8 @@
|
||||
|
||||
Dominik Brodowski <linux@brodo.de>
|
||||
some additions and corrections by Nico Golde <nico@ngolde.de>
|
||||
Rafael J. Wysocki <rafael.j.wysocki@intel.com>
|
||||
Viresh Kumar <viresh.kumar@linaro.org>
|
||||
|
||||
|
||||
|
||||
@@ -28,32 +30,27 @@ Contents:
|
||||
2.3 Userspace
|
||||
2.4 Ondemand
|
||||
2.5 Conservative
|
||||
2.6 Schedutil
|
||||
|
||||
3. The Governor Interface in the CPUfreq Core
|
||||
|
||||
4. References
|
||||
|
||||
|
||||
1. What Is A CPUFreq Governor?
|
||||
==============================
|
||||
|
||||
Most cpufreq drivers (except the intel_pstate and longrun) or even most
|
||||
cpu frequency scaling algorithms only offer the CPU to be set to one
|
||||
frequency. In order to offer dynamic frequency scaling, the cpufreq
|
||||
core must be able to tell these drivers of a "target frequency". So
|
||||
these specific drivers will be transformed to offer a "->target/target_index"
|
||||
call instead of the existing "->setpolicy" call. For "longrun", all
|
||||
stays the same, though.
|
||||
cpu frequency scaling algorithms only allow the CPU frequency to be set
|
||||
to predefined fixed values. In order to offer dynamic frequency
|
||||
scaling, the cpufreq core must be able to tell these drivers of a
|
||||
"target frequency". So these specific drivers will be transformed to
|
||||
offer a "->target/target_index/fast_switch()" call instead of the
|
||||
"->setpolicy()" call. For set_policy drivers, all stays the same,
|
||||
though.
|
||||
|
||||
How to decide what frequency within the CPUfreq policy should be used?
|
||||
That's done using "cpufreq governors". Two are already in this patch
|
||||
-- they're the already existing "powersave" and "performance" which
|
||||
set the frequency statically to the lowest or highest frequency,
|
||||
respectively. At least two more such governors will be ready for
|
||||
addition in the near future, but likely many more as there are various
|
||||
different theories and models about dynamic frequency scaling
|
||||
around. Using such a generic interface as cpufreq offers to scaling
|
||||
governors, these can be tested extensively, and the best one can be
|
||||
selected for each specific use.
|
||||
That's done using "cpufreq governors".
|
||||
|
||||
Basically, it's the following flow graph:
|
||||
|
||||
@@ -71,7 +68,7 @@ CPU can be set to switch independently | CPU can only be set
|
||||
/ the limits of policy->{min,max}
|
||||
/ \
|
||||
/ \
|
||||
Using the ->setpolicy call, Using the ->target/target_index call,
|
||||
Using the ->setpolicy call, Using the ->target/target_index/fast_switch call,
|
||||
the limits and the the frequency closest
|
||||
"policy" is set. to target_freq is set.
|
||||
It is assured that it
|
||||
@@ -109,114 +106,159 @@ directory.
|
||||
2.4 Ondemand
|
||||
------------
|
||||
|
||||
The CPUfreq governor "ondemand" sets the CPU depending on the
|
||||
current usage. To do this the CPU must have the capability to
|
||||
switch the frequency very quickly. There are a number of sysfs file
|
||||
accessible parameters:
|
||||
The CPUfreq governor "ondemand" sets the CPU frequency depending on the
|
||||
current system load. Load estimation is triggered by the scheduler
|
||||
through the update_util_data->func hook; when triggered, cpufreq checks
|
||||
the CPU-usage statistics over the last period and the governor sets the
|
||||
CPU accordingly. The CPU must have the capability to switch the
|
||||
frequency very quickly.
|
||||
|
||||
sampling_rate: measured in uS (10^-6 seconds), this is how often you
|
||||
want the kernel to look at the CPU usage and to make decisions on
|
||||
what to do about the frequency. Typically this is set to values of
|
||||
around '10000' or more. It's default value is (cmp. with users-guide.txt):
|
||||
transition_latency * 1000
|
||||
Be aware that transition latency is in ns and sampling_rate is in us, so you
|
||||
get the same sysfs value by default.
|
||||
Sampling rate should always get adjusted considering the transition latency
|
||||
To set the sampling rate 750 times as high as the transition latency
|
||||
in the bash (as said, 1000 is default), do:
|
||||
echo `$(($(cat cpuinfo_transition_latency) * 750 / 1000)) \
|
||||
>ondemand/sampling_rate
|
||||
Sysfs files:
|
||||
|
||||
sampling_rate_min:
|
||||
The sampling rate is limited by the HW transition latency:
|
||||
transition_latency * 100
|
||||
Or by kernel restrictions:
|
||||
If CONFIG_NO_HZ_COMMON is set, the limit is 10ms fixed.
|
||||
If CONFIG_NO_HZ_COMMON is not set or nohz=off boot parameter is used, the
|
||||
limits depend on the CONFIG_HZ option:
|
||||
HZ=1000: min=20000us (20ms)
|
||||
HZ=250: min=80000us (80ms)
|
||||
HZ=100: min=200000us (200ms)
|
||||
The highest value of kernel and HW latency restrictions is shown and
|
||||
used as the minimum sampling rate.
|
||||
* sampling_rate:
|
||||
|
||||
up_threshold: defines what the average CPU usage between the samplings
|
||||
of 'sampling_rate' needs to be for the kernel to make a decision on
|
||||
whether it should increase the frequency. For example when it is set
|
||||
to its default value of '95' it means that between the checking
|
||||
intervals the CPU needs to be on average more than 95% in use to then
|
||||
decide that the CPU frequency needs to be increased.
|
||||
Measured in uS (10^-6 seconds), this is how often you want the kernel
|
||||
to look at the CPU usage and to make decisions on what to do about the
|
||||
frequency. Typically this is set to values of around '10000' or more.
|
||||
It's default value is (cmp. with users-guide.txt): transition_latency
|
||||
* 1000. Be aware that transition latency is in ns and sampling_rate
|
||||
is in us, so you get the same sysfs value by default. Sampling rate
|
||||
should always get adjusted considering the transition latency to set
|
||||
the sampling rate 750 times as high as the transition latency in the
|
||||
bash (as said, 1000 is default), do:
|
||||
|
||||
ignore_nice_load: this parameter takes a value of '0' or '1'. When
|
||||
set to '0' (its default), all processes are counted towards the
|
||||
'cpu utilisation' value. When set to '1', the processes that are
|
||||
run with a 'nice' value will not count (and thus be ignored) in the
|
||||
overall usage calculation. This is useful if you are running a CPU
|
||||
intensive calculation on your laptop that you do not care how long it
|
||||
takes to complete as you can 'nice' it and prevent it from taking part
|
||||
in the deciding process of whether to increase your CPU frequency.
|
||||
$ echo `$(($(cat cpuinfo_transition_latency) * 750 / 1000)) > ondemand/sampling_rate
|
||||
|
||||
sampling_down_factor: this parameter controls the rate at which the
|
||||
kernel makes a decision on when to decrease the frequency while running
|
||||
at top speed. When set to 1 (the default) decisions to reevaluate load
|
||||
are made at the same interval regardless of current clock speed. But
|
||||
when set to greater than 1 (e.g. 100) it acts as a multiplier for the
|
||||
scheduling interval for reevaluating load when the CPU is at its top
|
||||
speed due to high load. This improves performance by reducing the overhead
|
||||
of load evaluation and helping the CPU stay at its top speed when truly
|
||||
busy, rather than shifting back and forth in speed. This tunable has no
|
||||
effect on behavior at lower speeds/lower CPU loads.
|
||||
* sampling_rate_min:
|
||||
|
||||
powersave_bias: this parameter takes a value between 0 to 1000. It
|
||||
defines the percentage (times 10) value of the target frequency that
|
||||
will be shaved off of the target. For example, when set to 100 -- 10%,
|
||||
when ondemand governor would have targeted 1000 MHz, it will target
|
||||
1000 MHz - (10% of 1000 MHz) = 900 MHz instead. This is set to 0
|
||||
(disabled) by default.
|
||||
When AMD frequency sensitivity powersave bias driver --
|
||||
drivers/cpufreq/amd_freq_sensitivity.c is loaded, this parameter
|
||||
defines the workload frequency sensitivity threshold in which a lower
|
||||
frequency is chosen instead of ondemand governor's original target.
|
||||
The frequency sensitivity is a hardware reported (on AMD Family 16h
|
||||
Processors and above) value between 0 to 100% that tells software how
|
||||
the performance of the workload running on a CPU will change when
|
||||
frequency changes. A workload with sensitivity of 0% (memory/IO-bound)
|
||||
will not perform any better on higher core frequency, whereas a
|
||||
workload with sensitivity of 100% (CPU-bound) will perform better
|
||||
higher the frequency. When the driver is loaded, this is set to 400
|
||||
by default -- for CPUs running workloads with sensitivity value below
|
||||
40%, a lower frequency is chosen. Unloading the driver or writing 0
|
||||
will disable this feature.
|
||||
The sampling rate is limited by the HW transition latency:
|
||||
transition_latency * 100
|
||||
|
||||
Or by kernel restrictions:
|
||||
- If CONFIG_NO_HZ_COMMON is set, the limit is 10ms fixed.
|
||||
- If CONFIG_NO_HZ_COMMON is not set or nohz=off boot parameter is
|
||||
used, the limits depend on the CONFIG_HZ option:
|
||||
HZ=1000: min=20000us (20ms)
|
||||
HZ=250: min=80000us (80ms)
|
||||
HZ=100: min=200000us (200ms)
|
||||
|
||||
The highest value of kernel and HW latency restrictions is shown and
|
||||
used as the minimum sampling rate.
|
||||
|
||||
* up_threshold:
|
||||
|
||||
This defines what the average CPU usage between the samplings of
|
||||
'sampling_rate' needs to be for the kernel to make a decision on
|
||||
whether it should increase the frequency. For example when it is set
|
||||
to its default value of '95' it means that between the checking
|
||||
intervals the CPU needs to be on average more than 95% in use to then
|
||||
decide that the CPU frequency needs to be increased.
|
||||
|
||||
* ignore_nice_load:
|
||||
|
||||
This parameter takes a value of '0' or '1'. When set to '0' (its
|
||||
default), all processes are counted towards the 'cpu utilisation'
|
||||
value. When set to '1', the processes that are run with a 'nice'
|
||||
value will not count (and thus be ignored) in the overall usage
|
||||
calculation. This is useful if you are running a CPU intensive
|
||||
calculation on your laptop that you do not care how long it takes to
|
||||
complete as you can 'nice' it and prevent it from taking part in the
|
||||
deciding process of whether to increase your CPU frequency.
|
||||
|
||||
* sampling_down_factor:
|
||||
|
||||
This parameter controls the rate at which the kernel makes a decision
|
||||
on when to decrease the frequency while running at top speed. When set
|
||||
to 1 (the default) decisions to reevaluate load are made at the same
|
||||
interval regardless of current clock speed. But when set to greater
|
||||
than 1 (e.g. 100) it acts as a multiplier for the scheduling interval
|
||||
for reevaluating load when the CPU is at its top speed due to high
|
||||
load. This improves performance by reducing the overhead of load
|
||||
evaluation and helping the CPU stay at its top speed when truly busy,
|
||||
rather than shifting back and forth in speed. This tunable has no
|
||||
effect on behavior at lower speeds/lower CPU loads.
|
||||
|
||||
* powersave_bias:
|
||||
|
||||
This parameter takes a value between 0 to 1000. It defines the
|
||||
percentage (times 10) value of the target frequency that will be
|
||||
shaved off of the target. For example, when set to 100 -- 10%, when
|
||||
ondemand governor would have targeted 1000 MHz, it will target
|
||||
1000 MHz - (10% of 1000 MHz) = 900 MHz instead. This is set to 0
|
||||
(disabled) by default.
|
||||
|
||||
When AMD frequency sensitivity powersave bias driver --
|
||||
drivers/cpufreq/amd_freq_sensitivity.c is loaded, this parameter
|
||||
defines the workload frequency sensitivity threshold in which a lower
|
||||
frequency is chosen instead of ondemand governor's original target.
|
||||
The frequency sensitivity is a hardware reported (on AMD Family 16h
|
||||
Processors and above) value between 0 to 100% that tells software how
|
||||
the performance of the workload running on a CPU will change when
|
||||
frequency changes. A workload with sensitivity of 0% (memory/IO-bound)
|
||||
will not perform any better on higher core frequency, whereas a
|
||||
workload with sensitivity of 100% (CPU-bound) will perform better
|
||||
higher the frequency. When the driver is loaded, this is set to 400 by
|
||||
default -- for CPUs running workloads with sensitivity value below
|
||||
40%, a lower frequency is chosen. Unloading the driver or writing 0
|
||||
will disable this feature.
|
||||
|
||||
|
||||
2.5 Conservative
|
||||
----------------
|
||||
|
||||
The CPUfreq governor "conservative", much like the "ondemand"
|
||||
governor, sets the CPU depending on the current usage. It differs in
|
||||
behaviour in that it gracefully increases and decreases the CPU speed
|
||||
rather than jumping to max speed the moment there is any load on the
|
||||
CPU. This behaviour more suitable in a battery powered environment.
|
||||
The governor is tweaked in the same manner as the "ondemand" governor
|
||||
through sysfs with the addition of:
|
||||
governor, sets the CPU frequency depending on the current usage. It
|
||||
differs in behaviour in that it gracefully increases and decreases the
|
||||
CPU speed rather than jumping to max speed the moment there is any load
|
||||
on the CPU. This behaviour is more suitable in a battery powered
|
||||
environment. The governor is tweaked in the same manner as the
|
||||
"ondemand" governor through sysfs with the addition of:
|
||||
|
||||
freq_step: this describes what percentage steps the cpu freq should be
|
||||
increased and decreased smoothly by. By default the cpu frequency will
|
||||
increase in 5% chunks of your maximum cpu frequency. You can change this
|
||||
value to anywhere between 0 and 100 where '0' will effectively lock your
|
||||
CPU at a speed regardless of its load whilst '100' will, in theory, make
|
||||
it behave identically to the "ondemand" governor.
|
||||
* freq_step:
|
||||
|
||||
down_threshold: same as the 'up_threshold' found for the "ondemand"
|
||||
governor but for the opposite direction. For example when set to its
|
||||
default value of '20' it means that if the CPU usage needs to be below
|
||||
20% between samples to have the frequency decreased.
|
||||
This describes what percentage steps the cpu freq should be increased
|
||||
and decreased smoothly by. By default the cpu frequency will increase
|
||||
in 5% chunks of your maximum cpu frequency. You can change this value
|
||||
to anywhere between 0 and 100 where '0' will effectively lock your CPU
|
||||
at a speed regardless of its load whilst '100' will, in theory, make
|
||||
it behave identically to the "ondemand" governor.
|
||||
|
||||
* down_threshold:
|
||||
|
||||
Same as the 'up_threshold' found for the "ondemand" governor but for
|
||||
the opposite direction. For example when set to its default value of
|
||||
'20' it means that if the CPU usage needs to be below 20% between
|
||||
samples to have the frequency decreased.
|
||||
|
||||
* sampling_down_factor:
|
||||
|
||||
Similar functionality as in "ondemand" governor. But in
|
||||
"conservative", it controls the rate at which the kernel makes a
|
||||
decision on when to decrease the frequency while running in any speed.
|
||||
Load for frequency increase is still evaluated every sampling rate.
|
||||
|
||||
|
||||
2.6 Schedutil
|
||||
-------------
|
||||
|
||||
The "schedutil" governor aims at better integration with the Linux
|
||||
kernel scheduler. Load estimation is achieved through the scheduler's
|
||||
Per-Entity Load Tracking (PELT) mechanism, which also provides
|
||||
information about the recent load [1]. This governor currently does
|
||||
load based DVFS only for tasks managed by CFS. RT and DL scheduler tasks
|
||||
are always run at the highest frequency. Unlike all the other
|
||||
governors, the code is located under the kernel/sched/ directory.
|
||||
|
||||
Sysfs files:
|
||||
|
||||
* rate_limit_us:
|
||||
|
||||
This contains a value in microseconds. The governor waits for
|
||||
rate_limit_us time before reevaluating the load again, after it has
|
||||
evaluated the load once.
|
||||
|
||||
For an in-depth comparison with the other governors refer to [2].
|
||||
|
||||
sampling_down_factor: similar functionality as in "ondemand" governor.
|
||||
But in "conservative", it controls the rate at which the kernel makes
|
||||
a decision on when to decrease the frequency while running in any
|
||||
speed. Load for frequency increase is still evaluated every
|
||||
sampling rate.
|
||||
|
||||
3. The Governor Interface in the CPUfreq Core
|
||||
=============================================
|
||||
@@ -225,26 +267,10 @@ A new governor must register itself with the CPUfreq core using
|
||||
"cpufreq_register_governor". The struct cpufreq_governor, which has to
|
||||
be passed to that function, must contain the following values:
|
||||
|
||||
governor->name - A unique name for this governor
|
||||
governor->governor - The governor callback function
|
||||
governor->owner - .THIS_MODULE for the governor module (if
|
||||
appropriate)
|
||||
|
||||
The governor->governor callback is called with the current (or to-be-set)
|
||||
cpufreq_policy struct for that CPU, and an unsigned int event. The
|
||||
following events are currently defined:
|
||||
|
||||
CPUFREQ_GOV_START: This governor shall start its duty for the CPU
|
||||
policy->cpu
|
||||
CPUFREQ_GOV_STOP: This governor shall end its duty for the CPU
|
||||
policy->cpu
|
||||
CPUFREQ_GOV_LIMITS: The limits for CPU policy->cpu have changed to
|
||||
policy->min and policy->max.
|
||||
|
||||
If you need other "events" externally of your driver, _only_ use the
|
||||
cpufreq_governor_l(unsigned int cpu, unsigned int event) call to the
|
||||
CPUfreq core to ensure proper locking.
|
||||
governor->name - A unique name for this governor.
|
||||
governor->owner - .THIS_MODULE for the governor module (if appropriate).
|
||||
|
||||
plus a set of hooks to the functions implementing the governor's logic.
|
||||
|
||||
The CPUfreq governor may call the CPU processor driver using one of
|
||||
these two functions:
|
||||
@@ -258,12 +284,18 @@ int __cpufreq_driver_target(struct cpufreq_policy *policy,
|
||||
unsigned int relation);
|
||||
|
||||
target_freq must be within policy->min and policy->max, of course.
|
||||
What's the difference between these two functions? When your governor
|
||||
still is in a direct code path of a call to governor->governor, the
|
||||
per-CPU cpufreq lock is still held in the cpufreq core, and there's
|
||||
no need to lock it again (in fact, this would cause a deadlock). So
|
||||
use __cpufreq_driver_target only in these cases. In all other cases
|
||||
(for example, when there's a "daemonized" function that wakes up
|
||||
every second), use cpufreq_driver_target to lock the cpufreq per-CPU
|
||||
lock before the command is passed to the cpufreq processor driver.
|
||||
What's the difference between these two functions? When your governor is
|
||||
in a direct code path of a call to governor callbacks, like
|
||||
governor->start(), the policy->rwsem is still held in the cpufreq core,
|
||||
and there's no need to lock it again (in fact, this would cause a
|
||||
deadlock). So use __cpufreq_driver_target only in these cases. In all
|
||||
other cases (for example, when there's a "daemonized" function that
|
||||
wakes up every second), use cpufreq_driver_target to take policy->rwsem
|
||||
before the command is passed to the cpufreq driver.
|
||||
|
||||
4. References
|
||||
=============
|
||||
|
||||
[1] Per-entity load tracking: https://lwn.net/Articles/531853/
|
||||
[2] Improvements in CPU frequency management: https://lwn.net/Articles/682391/
|
||||
|
||||
|
||||
@@ -18,16 +18,29 @@
|
||||
|
||||
Documents in this directory:
|
||||
----------------------------
|
||||
core.txt - General description of the CPUFreq core and
|
||||
of CPUFreq notifiers
|
||||
|
||||
cpu-drivers.txt - How to implement a new cpufreq processor driver
|
||||
amd-powernow.txt - AMD powernow driver specific file.
|
||||
|
||||
boost.txt - Frequency boosting support.
|
||||
|
||||
core.txt - General description of the CPUFreq core and
|
||||
of CPUFreq notifiers.
|
||||
|
||||
cpu-drivers.txt - How to implement a new cpufreq processor driver.
|
||||
|
||||
cpufreq-nforce2.txt - nVidia nForce2 platform specific file.
|
||||
|
||||
cpufreq-stats.txt - General description of sysfs cpufreq stats.
|
||||
|
||||
governors.txt - What are cpufreq governors and how to
|
||||
implement them?
|
||||
|
||||
index.txt - File index, Mailing list and Links (this document)
|
||||
|
||||
intel-pstate.txt - Intel pstate cpufreq driver specific file.
|
||||
|
||||
pcc-cpufreq.txt - PCC cpufreq driver specific file.
|
||||
|
||||
user-guide.txt - User Guide to CPUFreq
|
||||
|
||||
|
||||
@@ -35,9 +48,7 @@ Mailing List
|
||||
------------
|
||||
There is a CPU frequency changing CVS commit and general list where
|
||||
you can report bugs, problems or submit patches. To post a message,
|
||||
send an email to linux-pm@vger.kernel.org, to subscribe go to
|
||||
http://vger.kernel.org/vger-lists.html#linux-pm and follow the
|
||||
instructions there.
|
||||
send an email to linux-pm@vger.kernel.org.
|
||||
|
||||
Links
|
||||
-----
|
||||
@@ -48,7 +59,7 @@ how to access the CVS repository:
|
||||
* http://cvs.arm.linux.org.uk/
|
||||
|
||||
the CPUFreq Mailing list:
|
||||
* http://vger.kernel.org/vger-lists.html#cpufreq
|
||||
* http://vger.kernel.org/vger-lists.html#linux-pm
|
||||
|
||||
Clock and voltage scaling for the SA-1100:
|
||||
* http://www.lartmaker.nl/projects/scaling
|
||||
|
||||
@@ -85,6 +85,21 @@ Sysfs will show :
|
||||
Refer to "Intel® 64 and IA-32 Architectures Software Developer’s Manual
|
||||
Volume 3: System Programming Guide" to understand ratios.
|
||||
|
||||
There is one more sysfs attribute in /sys/devices/system/cpu/intel_pstate/
|
||||
that can be used for controlling the operation mode of the driver:
|
||||
|
||||
status: Three settings are possible:
|
||||
"off" - The driver is not in use at this time.
|
||||
"active" - The driver works as a P-state governor (default).
|
||||
"passive" - The driver works as a regular cpufreq one and collaborates
|
||||
with the generic cpufreq governors (it sets P-states as
|
||||
requested by those governors).
|
||||
The current setting is returned by reads from this attribute. Writing one
|
||||
of the above strings to it changes the operation mode as indicated by that
|
||||
string, if possible. If HW-managed P-states (HWP) are enabled, it is not
|
||||
possible to change the driver's operation mode and attempts to write to
|
||||
this attribute will fail.
|
||||
|
||||
cpufreq sysfs for Intel P-State
|
||||
|
||||
Since this driver registers with cpufreq, cpufreq sysfs is also presented.
|
||||
|
||||
@@ -18,7 +18,7 @@
|
||||
Contents:
|
||||
---------
|
||||
1. Supported Architectures and Processors
|
||||
1.1 ARM
|
||||
1.1 ARM and ARM64
|
||||
1.2 x86
|
||||
1.3 sparc64
|
||||
1.4 ppc
|
||||
@@ -37,16 +37,10 @@ Contents:
|
||||
1. Supported Architectures and Processors
|
||||
=========================================
|
||||
|
||||
1.1 ARM
|
||||
-------
|
||||
|
||||
The following ARM processors are supported by cpufreq:
|
||||
|
||||
ARM Integrator
|
||||
ARM-SA1100
|
||||
ARM-SA1110
|
||||
Intel PXA
|
||||
1.1 ARM and ARM64
|
||||
-----------------
|
||||
|
||||
Almost all ARM and ARM64 platforms support CPU frequency scaling.
|
||||
|
||||
1.2 x86
|
||||
-------
|
||||
@@ -69,6 +63,7 @@ Transmeta Crusoe
|
||||
Transmeta Efficeon
|
||||
VIA Cyrix 3 / C3
|
||||
various processors on some ACPI 2.0-compatible systems [*]
|
||||
And many more
|
||||
|
||||
[*] Only if "ACPI Processor Performance States" are available
|
||||
to the ACPI<->BIOS interface.
|
||||
@@ -147,10 +142,19 @@ mounted it at /sys, the cpufreq interface is located in a subdirectory
|
||||
"cpufreq" within the cpu-device directory
|
||||
(e.g. /sys/devices/system/cpu/cpu0/cpufreq/ for the first CPU).
|
||||
|
||||
affected_cpus : List of Online CPUs that require software
|
||||
coordination of frequency.
|
||||
|
||||
cpuinfo_cur_freq : Current frequency of the CPU as obtained from
|
||||
the hardware, in KHz. This is the frequency
|
||||
the CPU actually runs at.
|
||||
|
||||
cpuinfo_min_freq : this file shows the minimum operating
|
||||
frequency the processor can run at(in kHz)
|
||||
|
||||
cpuinfo_max_freq : this file shows the maximum operating
|
||||
frequency the processor can run at(in kHz)
|
||||
|
||||
cpuinfo_transition_latency The time it takes on this CPU to
|
||||
switch between two frequencies in nano
|
||||
seconds. If unknown or known to be
|
||||
@@ -163,25 +167,30 @@ cpuinfo_transition_latency The time it takes on this CPU to
|
||||
userspace daemon. Make sure to not
|
||||
switch the frequency too often
|
||||
resulting in performance loss.
|
||||
scaling_driver : this file shows what cpufreq driver is
|
||||
used to set the frequency on this CPU
|
||||
|
||||
related_cpus : List of Online + Offline CPUs that need software
|
||||
coordination of frequency.
|
||||
|
||||
scaling_available_frequencies : List of available frequencies, in KHz.
|
||||
|
||||
scaling_available_governors : this file shows the CPUfreq governors
|
||||
available in this kernel. You can see the
|
||||
currently activated governor in
|
||||
|
||||
scaling_cur_freq : Current frequency of the CPU as determined by
|
||||
the governor and cpufreq core, in KHz. This is
|
||||
the frequency the kernel thinks the CPU runs
|
||||
at.
|
||||
|
||||
scaling_driver : this file shows what cpufreq driver is
|
||||
used to set the frequency on this CPU
|
||||
|
||||
scaling_governor, and by "echoing" the name of another
|
||||
governor you can change it. Please note
|
||||
that some governors won't load - they only
|
||||
work on some specific architectures or
|
||||
processors.
|
||||
|
||||
cpuinfo_cur_freq : Current frequency of the CPU as obtained from
|
||||
the hardware, in KHz. This is the frequency
|
||||
the CPU actually runs at.
|
||||
|
||||
scaling_available_frequencies : List of available frequencies, in KHz.
|
||||
|
||||
scaling_min_freq and
|
||||
scaling_max_freq show the current "policy limits" (in
|
||||
kHz). By echoing new values into these
|
||||
@@ -190,16 +199,11 @@ scaling_max_freq show the current "policy limits" (in
|
||||
first set scaling_max_freq, then
|
||||
scaling_min_freq.
|
||||
|
||||
affected_cpus : List of Online CPUs that require software
|
||||
coordination of frequency.
|
||||
|
||||
related_cpus : List of Online + Offline CPUs that need software
|
||||
coordination of frequency.
|
||||
|
||||
scaling_cur_freq : Current frequency of the CPU as determined by
|
||||
the governor and cpufreq core, in KHz. This is
|
||||
the frequency the kernel thinks the CPU runs
|
||||
at.
|
||||
scaling_setspeed This can be read to get the currently programmed
|
||||
value by the governor. This can be written to
|
||||
change the current frequency for a group of
|
||||
CPUs, represented by a policy. This is supported
|
||||
currently only by the userspace governor.
|
||||
|
||||
bios_limit : If the BIOS tells the OS to limit a CPU to
|
||||
lower frequencies, the user can read out the
|
||||
|
||||
@@ -15,6 +15,9 @@ Properties:
|
||||
Second cell specifies the irq distribution mode to cores
|
||||
0=Round Robin; 1=cpu0, 2=cpu1, 4=cpu2, 8=cpu3
|
||||
|
||||
The second cell in interrupts property is deprecated and may be ignored by
|
||||
the kernel.
|
||||
|
||||
intc accessed via the special ARC AUX register interface, hence "reg" property
|
||||
is not specified.
|
||||
|
||||
|
||||
@@ -7,7 +7,7 @@ have dual GMAC each represented by a child node..
|
||||
* Ethernet controller node
|
||||
|
||||
Required properties:
|
||||
- compatible: Should be "mediatek,mt7623-eth"
|
||||
- compatible: Should be "mediatek,mt2701-eth"
|
||||
- reg: Address and length of the register set for the device
|
||||
- interrupts: Should contain the three frame engines interrupts in numeric
|
||||
order. These are fe_int0, fe_int1 and fe_int2.
|
||||
|
||||
@@ -19,8 +19,9 @@ Optional Properties:
|
||||
specifications. If neither of these are specified, the default is to
|
||||
assume clause 22.
|
||||
|
||||
If the phy's identifier is known then the list may contain an entry
|
||||
of the form: "ethernet-phy-idAAAA.BBBB" where
|
||||
If the PHY reports an incorrect ID (or none at all) then the
|
||||
"compatible" list may contain an entry with the correct PHY ID in the
|
||||
form: "ethernet-phy-idAAAA.BBBB" where
|
||||
AAAA - The value of the 16 bit Phy Identifier 1 register as
|
||||
4 hex digits. This is the chip vendor OUI bits 3:18
|
||||
BBBB - The value of the 16 bit Phy Identifier 2 register as
|
||||
|
||||
@@ -212,10 +212,11 @@ asynchronous manner and the value may not be very precise. To see a precise
|
||||
snapshot of a moment, you can see /proc/<pid>/smaps file and scan page table.
|
||||
It's slow but very precise.
|
||||
|
||||
Table 1-2: Contents of the status files (as of 4.1)
|
||||
Table 1-2: Contents of the status files (as of 4.8)
|
||||
..............................................................................
|
||||
Field Content
|
||||
Name filename of the executable
|
||||
Umask file mode creation mask
|
||||
State state (R is running, S is sleeping, D is sleeping
|
||||
in an uninterruptible wait, Z is zombie,
|
||||
T is traced or stopped)
|
||||
@@ -226,7 +227,6 @@ Table 1-2: Contents of the status files (as of 4.1)
|
||||
TracerPid PID of process tracing this process (0 if not)
|
||||
Uid Real, effective, saved set, and file system UIDs
|
||||
Gid Real, effective, saved set, and file system GIDs
|
||||
Umask file mode creation mask
|
||||
FDSize number of file descriptor slots currently allocated
|
||||
Groups supplementary group list
|
||||
NStgid descendant namespace thread group ID hierarchy
|
||||
@@ -236,6 +236,7 @@ Table 1-2: Contents of the status files (as of 4.1)
|
||||
VmPeak peak virtual memory size
|
||||
VmSize total program size
|
||||
VmLck locked memory size
|
||||
VmPin pinned memory size
|
||||
VmHWM peak resident set size ("high water mark")
|
||||
VmRSS size of memory portions. It contains the three
|
||||
following parts (VmRSS = RssAnon + RssFile + RssShmem)
|
||||
|
||||
@@ -35,9 +35,7 @@ only one way to cause the system to go into the Suspend-To-RAM state (write
|
||||
The default suspend mode (ie. the one to be used without writing anything into
|
||||
/sys/power/mem_sleep) is either "deep" (if Suspend-To-RAM is supported) or
|
||||
"s2idle", but it can be overridden by the value of the "mem_sleep_default"
|
||||
parameter in the kernel command line. On some ACPI-based systems, depending on
|
||||
the information in the FADT, the default may be "s2idle" even if Suspend-To-RAM
|
||||
is supported.
|
||||
parameter in the kernel command line.
|
||||
|
||||
The properties of all of the sleep states are described below.
|
||||
|
||||
|
||||
+18
-5
@@ -3567,7 +3567,7 @@ F: drivers/infiniband/hw/cxgb3/
|
||||
F: include/uapi/rdma/cxgb3-abi.h
|
||||
|
||||
CXGB4 ETHERNET DRIVER (CXGB4)
|
||||
M: Hariprasad S <hariprasad@chelsio.com>
|
||||
M: Ganesh Goudar <ganeshgr@chelsio.com>
|
||||
L: netdev@vger.kernel.org
|
||||
W: http://www.chelsio.com
|
||||
S: Supported
|
||||
@@ -4100,12 +4100,18 @@ F: drivers/gpu/drm/bridge/
|
||||
|
||||
DRM DRIVER FOR BOCHS VIRTUAL GPU
|
||||
M: Gerd Hoffmann <kraxel@redhat.com>
|
||||
S: Odd Fixes
|
||||
L: virtualization@lists.linux-foundation.org
|
||||
T: git git://git.kraxel.org/linux drm-qemu
|
||||
S: Maintained
|
||||
F: drivers/gpu/drm/bochs/
|
||||
|
||||
DRM DRIVER FOR QEMU'S CIRRUS DEVICE
|
||||
M: Dave Airlie <airlied@redhat.com>
|
||||
S: Odd Fixes
|
||||
M: Gerd Hoffmann <kraxel@redhat.com>
|
||||
L: virtualization@lists.linux-foundation.org
|
||||
T: git git://git.kraxel.org/linux drm-qemu
|
||||
S: Obsolete
|
||||
W: https://www.kraxel.org/blog/2014/10/qemu-using-cirrus-considered-harmful/
|
||||
F: drivers/gpu/drm/cirrus/
|
||||
|
||||
RADEON and AMDGPU DRM DRIVERS
|
||||
@@ -4147,7 +4153,7 @@ F: Documentation/gpu/i915.rst
|
||||
INTEL GVT-g DRIVERS (Intel GPU Virtualization)
|
||||
M: Zhenyu Wang <zhenyuw@linux.intel.com>
|
||||
M: Zhi Wang <zhi.a.wang@intel.com>
|
||||
L: igvt-g-dev@lists.01.org
|
||||
L: intel-gvt-dev@lists.freedesktop.org
|
||||
L: intel-gfx@lists.freedesktop.org
|
||||
W: https://01.org/igvt-g
|
||||
T: git https://github.com/01org/gvt-linux.git
|
||||
@@ -4298,7 +4304,10 @@ F: Documentation/devicetree/bindings/display/renesas,du.txt
|
||||
|
||||
DRM DRIVER FOR QXL VIRTUAL GPU
|
||||
M: Dave Airlie <airlied@redhat.com>
|
||||
S: Odd Fixes
|
||||
M: Gerd Hoffmann <kraxel@redhat.com>
|
||||
L: virtualization@lists.linux-foundation.org
|
||||
T: git git://git.kraxel.org/linux drm-qemu
|
||||
S: Maintained
|
||||
F: drivers/gpu/drm/qxl/
|
||||
F: include/uapi/drm/qxl_drm.h
|
||||
|
||||
@@ -13092,6 +13101,7 @@ M: David Airlie <airlied@linux.ie>
|
||||
M: Gerd Hoffmann <kraxel@redhat.com>
|
||||
L: dri-devel@lists.freedesktop.org
|
||||
L: virtualization@lists.linux-foundation.org
|
||||
T: git git://git.kraxel.org/linux drm-qemu
|
||||
S: Maintained
|
||||
F: drivers/gpu/drm/virtio/
|
||||
F: include/uapi/linux/virtio_gpu.h
|
||||
@@ -13443,6 +13453,7 @@ F: arch/x86/
|
||||
|
||||
X86 PLATFORM DRIVERS
|
||||
M: Darren Hart <dvhart@infradead.org>
|
||||
M: Andy Shevchenko <andy@infradead.org>
|
||||
L: platform-driver-x86@vger.kernel.org
|
||||
T: git git://git.infradead.org/users/dvhart/linux-platform-drivers-x86.git
|
||||
S: Maintained
|
||||
@@ -13614,6 +13625,7 @@ F: drivers/net/hamradio/z8530.h
|
||||
|
||||
ZBUD COMPRESSED PAGE ALLOCATOR
|
||||
M: Seth Jennings <sjenning@redhat.com>
|
||||
M: Dan Streetman <ddstreet@ieee.org>
|
||||
L: linux-mm@kvack.org
|
||||
S: Maintained
|
||||
F: mm/zbud.c
|
||||
@@ -13669,6 +13681,7 @@ F: Documentation/vm/zsmalloc.txt
|
||||
|
||||
ZSWAP COMPRESSED SWAP CACHING
|
||||
M: Seth Jennings <sjenning@redhat.com>
|
||||
M: Dan Streetman <ddstreet@ieee.org>
|
||||
L: linux-mm@kvack.org
|
||||
S: Maintained
|
||||
F: mm/zswap.c
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
VERSION = 4
|
||||
PATCHLEVEL = 10
|
||||
SUBLEVEL = 0
|
||||
EXTRAVERSION = -rc5
|
||||
NAME = Anniversary Edition
|
||||
EXTRAVERSION = -rc6
|
||||
NAME = Fearless Coyote
|
||||
|
||||
# *DOCUMENTATION*
|
||||
# To see a list of typical targets execute "make help"
|
||||
|
||||
@@ -26,7 +26,9 @@ static inline void __delay(unsigned long loops)
|
||||
" lp 1f \n"
|
||||
" nop \n"
|
||||
"1: \n"
|
||||
: : "r"(loops));
|
||||
:
|
||||
: "r"(loops)
|
||||
: "lp_count");
|
||||
}
|
||||
|
||||
extern void __bad_udelay(void);
|
||||
|
||||
@@ -71,14 +71,14 @@ ENTRY(stext)
|
||||
GET_CPU_ID r5
|
||||
cmp r5, 0
|
||||
mov.nz r0, r5
|
||||
#ifdef CONFIG_ARC_SMP_HALT_ON_RESET
|
||||
; Non-Master can proceed as system would be booted sufficiently
|
||||
jnz first_lines_of_secondary
|
||||
#else
|
||||
bz .Lmaster_proceed
|
||||
|
||||
; Non-Masters wait for Master to boot enough and bring them up
|
||||
jnz arc_platform_smp_wait_to_boot
|
||||
#endif
|
||||
; Master falls thru
|
||||
; when they resume, tail-call to entry point
|
||||
mov blink, @first_lines_of_secondary
|
||||
j arc_platform_smp_wait_to_boot
|
||||
|
||||
.Lmaster_proceed:
|
||||
#endif
|
||||
|
||||
; Clear BSS before updating any globals
|
||||
|
||||
+23
-32
@@ -93,11 +93,10 @@ static void mcip_probe_n_setup(void)
|
||||
READ_BCR(ARC_REG_MCIP_BCR, mp);
|
||||
|
||||
sprintf(smp_cpuinfo_buf,
|
||||
"Extn [SMP]\t: ARConnect (v%d): %d cores with %s%s%s%s%s\n",
|
||||
"Extn [SMP]\t: ARConnect (v%d): %d cores with %s%s%s%s\n",
|
||||
mp.ver, mp.num_cores,
|
||||
IS_AVAIL1(mp.ipi, "IPI "),
|
||||
IS_AVAIL1(mp.idu, "IDU "),
|
||||
IS_AVAIL1(mp.llm, "LLM "),
|
||||
IS_AVAIL1(mp.dbg, "DEBUG "),
|
||||
IS_AVAIL1(mp.gfrc, "GFRC"));
|
||||
|
||||
@@ -175,7 +174,6 @@ static void idu_irq_unmask(struct irq_data *data)
|
||||
raw_spin_unlock_irqrestore(&mcip_lock, flags);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
static int
|
||||
idu_irq_set_affinity(struct irq_data *data, const struct cpumask *cpumask,
|
||||
bool force)
|
||||
@@ -205,12 +203,27 @@ idu_irq_set_affinity(struct irq_data *data, const struct cpumask *cpumask,
|
||||
|
||||
return IRQ_SET_MASK_OK;
|
||||
}
|
||||
#endif
|
||||
|
||||
static void idu_irq_enable(struct irq_data *data)
|
||||
{
|
||||
/*
|
||||
* By default send all common interrupts to all available online CPUs.
|
||||
* The affinity of common interrupts in IDU must be set manually since
|
||||
* in some cases the kernel will not call irq_set_affinity() by itself:
|
||||
* 1. When the kernel is not configured with support of SMP.
|
||||
* 2. When the kernel is configured with support of SMP but upper
|
||||
* interrupt controllers does not support setting of the affinity
|
||||
* and cannot propagate it to IDU.
|
||||
*/
|
||||
idu_irq_set_affinity(data, cpu_online_mask, false);
|
||||
idu_irq_unmask(data);
|
||||
}
|
||||
|
||||
static struct irq_chip idu_irq_chip = {
|
||||
.name = "MCIP IDU Intc",
|
||||
.irq_mask = idu_irq_mask,
|
||||
.irq_unmask = idu_irq_unmask,
|
||||
.irq_enable = idu_irq_enable,
|
||||
#ifdef CONFIG_SMP
|
||||
.irq_set_affinity = idu_irq_set_affinity,
|
||||
#endif
|
||||
@@ -243,36 +256,14 @@ static int idu_irq_xlate(struct irq_domain *d, struct device_node *n,
|
||||
const u32 *intspec, unsigned int intsize,
|
||||
irq_hw_number_t *out_hwirq, unsigned int *out_type)
|
||||
{
|
||||
irq_hw_number_t hwirq = *out_hwirq = intspec[0];
|
||||
int distri = intspec[1];
|
||||
unsigned long flags;
|
||||
|
||||
/*
|
||||
* Ignore value of interrupt distribution mode for common interrupts in
|
||||
* IDU which resides in intspec[1] since setting an affinity using value
|
||||
* from Device Tree is deprecated in ARC.
|
||||
*/
|
||||
*out_hwirq = intspec[0];
|
||||
*out_type = IRQ_TYPE_NONE;
|
||||
|
||||
/* XXX: validate distribution scheme again online cpu mask */
|
||||
if (distri == 0) {
|
||||
/* 0 - Round Robin to all cpus, otherwise 1 bit per core */
|
||||
raw_spin_lock_irqsave(&mcip_lock, flags);
|
||||
idu_set_dest(hwirq, BIT(num_online_cpus()) - 1);
|
||||
idu_set_mode(hwirq, IDU_M_TRIG_LEVEL, IDU_M_DISTRI_RR);
|
||||
raw_spin_unlock_irqrestore(&mcip_lock, flags);
|
||||
} else {
|
||||
/*
|
||||
* DEST based distribution for Level Triggered intr can only
|
||||
* have 1 CPU, so generalize it to always contain 1 cpu
|
||||
*/
|
||||
int cpu = ffs(distri);
|
||||
|
||||
if (cpu != fls(distri))
|
||||
pr_warn("IDU irq %lx distri mode set to cpu %x\n",
|
||||
hwirq, cpu);
|
||||
|
||||
raw_spin_lock_irqsave(&mcip_lock, flags);
|
||||
idu_set_dest(hwirq, cpu);
|
||||
idu_set_mode(hwirq, IDU_M_TRIG_LEVEL, IDU_M_DISTRI_DEST);
|
||||
raw_spin_unlock_irqrestore(&mcip_lock, flags);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
+20
-5
@@ -90,22 +90,37 @@ void __init smp_cpus_done(unsigned int max_cpus)
|
||||
*/
|
||||
static volatile int wake_flag;
|
||||
|
||||
#ifdef CONFIG_ISA_ARCOMPACT
|
||||
|
||||
#define __boot_read(f) f
|
||||
#define __boot_write(f, v) f = v
|
||||
|
||||
#else
|
||||
|
||||
#define __boot_read(f) arc_read_uncached_32(&f)
|
||||
#define __boot_write(f, v) arc_write_uncached_32(&f, v)
|
||||
|
||||
#endif
|
||||
|
||||
static void arc_default_smp_cpu_kick(int cpu, unsigned long pc)
|
||||
{
|
||||
BUG_ON(cpu == 0);
|
||||
wake_flag = cpu;
|
||||
|
||||
__boot_write(wake_flag, cpu);
|
||||
}
|
||||
|
||||
void arc_platform_smp_wait_to_boot(int cpu)
|
||||
{
|
||||
while (wake_flag != cpu)
|
||||
/* for halt-on-reset, we've waited already */
|
||||
if (IS_ENABLED(CONFIG_ARC_SMP_HALT_ON_RESET))
|
||||
return;
|
||||
|
||||
while (__boot_read(wake_flag) != cpu)
|
||||
;
|
||||
|
||||
wake_flag = 0;
|
||||
__asm__ __volatile__("j @first_lines_of_secondary \n");
|
||||
__boot_write(wake_flag, 0);
|
||||
}
|
||||
|
||||
|
||||
const char *arc_platform_smp_cpuinfo(void)
|
||||
{
|
||||
return plat_smp_ops.info ? : "";
|
||||
|
||||
@@ -241,8 +241,9 @@ int misaligned_fixup(unsigned long address, struct pt_regs *regs,
|
||||
if (state.fault)
|
||||
goto fault;
|
||||
|
||||
/* clear any remanants of delay slot */
|
||||
if (delay_mode(regs)) {
|
||||
regs->ret = regs->bta;
|
||||
regs->ret = regs->bta ~1U;
|
||||
regs->status32 &= ~STATUS_DE_MASK;
|
||||
} else {
|
||||
regs->ret += state.instr_len;
|
||||
|
||||
@@ -24,7 +24,7 @@ CONFIG_ARM_APPENDED_DTB=y
|
||||
CONFIG_ARM_ATAG_DTB_COMPAT=y
|
||||
CONFIG_CMDLINE="root=/dev/ram0 rw ramdisk=8192 initrd=0x41000000,8M console=ttySAC1,115200 init=/linuxrc mem=256M"
|
||||
CONFIG_CPU_FREQ=y
|
||||
CONFIG_CPU_FREQ_STAT_DETAILS=y
|
||||
CONFIG_CPU_FREQ_STAT=y
|
||||
CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND=y
|
||||
CONFIG_CPU_FREQ_GOV_POWERSAVE=m
|
||||
CONFIG_CPU_FREQ_GOV_USERSPACE=m
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user