Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux-2.6

This commit is contained in:
David Woodhouse
2007-07-11 14:55:48 +01:00
983 changed files with 41273 additions and 98073 deletions
@@ -0,0 +1,16 @@
What: legacy isochronous ABI of raw1394 (1st generation iso ABI)
Date: June 2007 (scheduled), removed in kernel v2.6.23
Contact: linux1394-devel@lists.sourceforge.net
Description:
The two request types RAW1394_REQ_ISO_SEND, RAW1394_REQ_ISO_LISTEN have
been deprecated for quite some time. They are very inefficient as they
come with high interrupt load and several layers of callbacks for each
packet. Because of these deficiencies, the video1394 and dv1394 drivers
and the 3rd-generation isochronous ABI in raw1394 (rawiso) were created.
Users:
libraw1394 users via the long deprecated API raw1394_iso_write,
raw1394_start_iso_write, raw1394_start_iso_rcv, raw1394_stop_iso_rcv
libdc1394, which optionally uses these old libraw1394 calls
alternatively to the more efficient video1394 ABI
+11
View File
@@ -643,4 +643,15 @@ X!Idrivers/video/console/fonts.c
!Edrivers/spi/spi.c
</chapter>
<chapter id="splice">
<title>splice API</title>
<para>)
splice is a method for moving blocks of data around inside the
kernel, without continually transferring it between the kernel
and user space.
</para>
!Iinclude/linux/splice.h
!Ffs/splice.c
</chapter>
</book>
+3 -13
View File
@@ -82,23 +82,12 @@ including draining and flushing.
typedef void (prepare_flush_fn)(request_queue_t *q, struct request *rq);
int blk_queue_ordered(request_queue_t *q, unsigned ordered,
prepare_flush_fn *prepare_flush_fn,
unsigned gfp_mask);
int blk_queue_ordered_locked(request_queue_t *q, unsigned ordered,
prepare_flush_fn *prepare_flush_fn,
unsigned gfp_mask);
The only difference between the two functions is whether or not the
caller is holding q->queue_lock on entry. The latter expects the
caller is holding the lock.
prepare_flush_fn *prepare_flush_fn);
@q : the queue in question
@ordered : the ordered mode the driver/device supports
@prepare_flush_fn : this function should prepare @rq such that it
flushes cache to physical medium when executed
@gfp_mask : gfp_mask used when allocating data structures
for ordered processing
For example, SCSI disk driver's prepare_flush_fn looks like the
following.
@@ -106,9 +95,10 @@ following.
static void sd_prepare_flush(request_queue_t *q, struct request *rq)
{
memset(rq->cmd, 0, sizeof(rq->cmd));
rq->flags |= REQ_BLOCK_PC;
rq->cmd_type = REQ_TYPE_BLOCK_PC;
rq->timeout = SD_TIMEOUT;
rq->cmd[0] = SYNCHRONIZE_CACHE;
rq->cmd_len = 10;
}
The following seven ordered modes are supported. The following table
@@ -49,16 +49,6 @@ Who: Adrian Bunk <bunk@stusta.de>
---------------------------
What: raw1394: requests of type RAW1394_REQ_ISO_SEND, RAW1394_REQ_ISO_LISTEN
When: June 2007
Why: Deprecated in favour of the more efficient and robust rawiso interface.
Affected are applications which use the deprecated part of libraw1394
(raw1394_iso_write, raw1394_start_iso_write, raw1394_start_iso_rcv,
raw1394_stop_iso_rcv) or bypass libraw1394.
Who: Dan Dennedy <dan@dennedy.org>, Stefan Richter <stefanr@s5r6.in-berlin.de>
---------------------------
What: old NCR53C9x driver
When: October 2007
Why: Replaced by the much better esp_scsi driver. Actual low-level
@@ -258,14 +248,6 @@ Who: Len Brown <len.brown@intel.com>
---------------------------
What: sk98lin network driver
When: July 2007
Why: In kernel tree version of driver is unmaintained. Sk98lin driver
replaced by the skge driver.
Who: Stephen Hemminger <shemminger@osdl.org>
---------------------------
What: Compaq touchscreen device emulation
When: Oct 2007
Files: drivers/input/tsdev.c
-43
View File
@@ -1014,49 +1014,6 @@ and is between 256 and 4096 characters. It is defined in the file
mga= [HW,DRM]
migration_cost=
[KNL,SMP] debug: override scheduler migration costs
Format: <level-1-usecs>,<level-2-usecs>,...
This debugging option can be used to override the
default scheduler migration cost matrix. The numbers
are indexed by 'CPU domain distance'.
E.g. migration_cost=1000,2000,3000 on an SMT NUMA
box will set up an intra-core migration cost of
1 msec, an inter-core migration cost of 2 msecs,
and an inter-node migration cost of 3 msecs.
WARNING: using the wrong values here can break
scheduler performance, so it's only for scheduler
development purposes, not production environments.
migration_debug=
[KNL,SMP] migration cost auto-detect verbosity
Format=<0|1|2>
If a system's migration matrix reported at bootup
seems erroneous then this option can be used to
increase verbosity of the detection process.
We default to 0 (no extra messages), 1 will print
some more information, and 2 will be really
verbose (probably only useful if you also have a
serial console attached to the system).
migration_factor=
[KNL,SMP] multiply/divide migration costs by a factor
Format=<percent>
This debug option can be used to proportionally
increase or decrease the auto-detected migration
costs for all entries of the migration matrix.
E.g. migration_factor=150 will increase migration
costs by 50%. (and thus the scheduler will be less
eager migrating cache-hot tasks)
migration_factor=80 will decrease migration costs
by 20%. (thus the scheduler will be more eager to
migrate tasks)
WARNING: using the wrong values here can break
scheduler performance, so it's only for scheduler
development purposes, not production environments.
mousedev.tap_time=
[MOUSE] Maximum time between finger touching and
leaving touchpad surface for touch to be considered
-3
View File
@@ -96,9 +96,6 @@ routing.txt
- the new routing mechanism
shaper.txt
- info on the module that can shape/limit transmitted traffic.
sk98lin.txt
- Marvell Yukon Chipset / SysKonnect SK-98xx compliant Gigabit
Ethernet Adapter family driver info
skfp.txt
- SysKonnect FDDI (SK-5xxx, Compaq Netelligent) driver info.
smc9.txt
File diff suppressed because it is too large Load Diff
+204
View File
@@ -0,0 +1,204 @@
The Spidernet Device Driver
===========================
Written by Linas Vepstas <linas@austin.ibm.com>
Version of 7 June 2007
Abstract
========
This document sketches the structure of portions of the spidernet
device driver in the Linux kernel tree. The spidernet is a gigabit
ethernet device built into the Toshiba southbridge commonly used
in the SONY Playstation 3 and the IBM QS20 Cell blade.
The Structure of the RX Ring.
=============================
The receive (RX) ring is a circular linked list of RX descriptors,
together with three pointers into the ring that are used to manage its
contents.
The elements of the ring are called "descriptors" or "descrs"; they
describe the received data. This includes a pointer to a buffer
containing the received data, the buffer size, and various status bits.
There are three primary states that a descriptor can be in: "empty",
"full" and "not-in-use". An "empty" or "ready" descriptor is ready
to receive data from the hardware. A "full" descriptor has data in it,
and is waiting to be emptied and processed by the OS. A "not-in-use"
descriptor is neither empty or full; it is simply not ready. It may
not even have a data buffer in it, or is otherwise unusable.
During normal operation, on device startup, the OS (specifically, the
spidernet device driver) allocates a set of RX descriptors and RX
buffers. These are all marked "empty", ready to receive data. This
ring is handed off to the hardware, which sequentially fills in the
buffers, and marks them "full". The OS follows up, taking the full
buffers, processing them, and re-marking them empty.
This filling and emptying is managed by three pointers, the "head"
and "tail" pointers, managed by the OS, and a hardware current
descriptor pointer (GDACTDPA). The GDACTDPA points at the descr
currently being filled. When this descr is filled, the hardware
marks it full, and advances the GDACTDPA by one. Thus, when there is
flowing RX traffic, every descr behind it should be marked "full",
and everything in front of it should be "empty". If the hardware
discovers that the current descr is not empty, it will signal an
interrupt, and halt processing.
The tail pointer tails or trails the hardware pointer. When the
hardware is ahead, the tail pointer will be pointing at a "full"
descr. The OS will process this descr, and then mark it "not-in-use",
and advance the tail pointer. Thus, when there is flowing RX traffic,
all of the descrs in front of the tail pointer should be "full", and
all of those behind it should be "not-in-use". When RX traffic is not
flowing, then the tail pointer can catch up to the hardware pointer.
The OS will then note that the current tail is "empty", and halt
processing.
The head pointer (somewhat mis-named) follows after the tail pointer.
When traffic is flowing, then the head pointer will be pointing at
a "not-in-use" descr. The OS will perform various housekeeping duties
on this descr. This includes allocating a new data buffer and
dma-mapping it so as to make it visible to the hardware. The OS will
then mark the descr as "empty", ready to receive data. Thus, when there
is flowing RX traffic, everything in front of the head pointer should
be "not-in-use", and everything behind it should be "empty". If no
RX traffic is flowing, then the head pointer can catch up to the tail
pointer, at which point the OS will notice that the head descr is
"empty", and it will halt processing.
Thus, in an idle system, the GDACTDPA, tail and head pointers will
all be pointing at the same descr, which should be "empty". All of the
other descrs in the ring should be "empty" as well.
The show_rx_chain() routine will print out the the locations of the
GDACTDPA, tail and head pointers. It will also summarize the contents
of the ring, starting at the tail pointer, and listing the status
of the descrs that follow.
A typical example of the output, for a nearly idle system, might be
net eth1: Total number of descrs=256
net eth1: Chain tail located at descr=20
net eth1: Chain head is at 20
net eth1: HW curr desc (GDACTDPA) is at 21
net eth1: Have 1 descrs with stat=x40800101
net eth1: HW next desc (GDACNEXTDA) is at 22
net eth1: Last 255 descrs with stat=xa0800000
In the above, the hardware has filled in one descr, number 20. Both
head and tail are pointing at 20, because it has not yet been emptied.
Meanwhile, hw is pointing at 21, which is free.
The "Have nnn decrs" refers to the descr starting at the tail: in this
case, nnn=1 descr, starting at descr 20. The "Last nnn descrs" refers
to all of the rest of the descrs, from the last status change. The "nnn"
is a count of how many descrs have exactly the same status.
The status x4... corresponds to "full" and status xa... corresponds
to "empty". The actual value printed is RXCOMST_A.
In the device driver source code, a different set of names are
used for these same concepts, so that
"empty" == SPIDER_NET_DESCR_CARDOWNED == 0xa
"full" == SPIDER_NET_DESCR_FRAME_END == 0x4
"not in use" == SPIDER_NET_DESCR_NOT_IN_USE == 0xf
The RX RAM full bug/feature
===========================
As long as the OS can empty out the RX buffers at a rate faster than
the hardware can fill them, there is no problem. If, for some reason,
the OS fails to empty the RX ring fast enough, the hardware GDACTDPA
pointer will catch up to the head, notice the not-empty condition,
ad stop. However, RX packets may still continue arriving on the wire.
The spidernet chip can save some limited number of these in local RAM.
When this local ram fills up, the spider chip will issue an interrupt
indicating this (GHIINT0STS will show ERRINT, and the GRMFLLINT bit
will be set in GHIINT1STS). When the RX ram full condition occurs,
a certain bug/feature is triggered that has to be specially handled.
This section describes the special handling for this condition.
When the OS finally has a chance to run, it will empty out the RX ring.
In particular, it will clear the descriptor on which the hardware had
stopped. However, once the hardware has decided that a certain
descriptor is invalid, it will not restart at that descriptor; instead
it will restart at the next descr. This potentially will lead to a
deadlock condition, as the tail pointer will be pointing at this descr,
which, from the OS point of view, is empty; the OS will be waiting for
this descr to be filled. However, the hardware has skipped this descr,
and is filling the next descrs. Since the OS doesn't see this, there
is a potential deadlock, with the OS waiting for one descr to fill,
while the hardware is waiting for a different set of descrs to become
empty.
A call to show_rx_chain() at this point indicates the nature of the
problem. A typical print when the network is hung shows the following:
net eth1: Spider RX RAM full, incoming packets might be discarded!
net eth1: Total number of descrs=256
net eth1: Chain tail located at descr=255
net eth1: Chain head is at 255
net eth1: HW curr desc (GDACTDPA) is at 0
net eth1: Have 1 descrs with stat=xa0800000
net eth1: HW next desc (GDACNEXTDA) is at 1
net eth1: Have 127 descrs with stat=x40800101
net eth1: Have 1 descrs with stat=x40800001
net eth1: Have 126 descrs with stat=x40800101
net eth1: Last 1 descrs with stat=xa0800000
Both the tail and head pointers are pointing at descr 255, which is
marked xa... which is "empty". Thus, from the OS point of view, there
is nothing to be done. In particular, there is the implicit assumption
that everything in front of the "empty" descr must surely also be empty,
as explained in the last section. The OS is waiting for descr 255 to
become non-empty, which, in this case, will never happen.
The HW pointer is at descr 0. This descr is marked 0x4.. or "full".
Since its already full, the hardware can do nothing more, and thus has
halted processing. Notice that descrs 0 through 254 are all marked
"full", while descr 254 and 255 are empty. (The "Last 1 descrs" is
descr 254, since tail was at 255.) Thus, the system is deadlocked,
and there can be no forward progress; the OS thinks there's nothing
to do, and the hardware has nowhere to put incoming data.
This bug/feature is worked around with the spider_net_resync_head_ptr()
routine. When the driver receives RX interrupts, but an examination
of the RX chain seems to show it is empty, then it is probable that
the hardware has skipped a descr or two (sometimes dozens under heavy
network conditions). The spider_net_resync_head_ptr() subroutine will
search the ring for the next full descr, and the driver will resume
operations there. Since this will leave "holes" in the ring, there
is also a spider_net_resync_tail_ptr() that will skip over such holes.
As of this writing, the spider_net_resync() strategy seems to work very
well, even under heavy network loads.
The TX ring
===========
The TX ring uses a low-watermark interrupt scheme to make sure that
the TX queue is appropriately serviced for large packet sizes.
For packet sizes greater than about 1KBytes, the kernel can fill
the TX ring quicker than the device can drain it. Once the ring
is full, the netdev is stopped. When there is room in the ring,
the netdev needs to be reawakened, so that more TX packets are placed
in the ring. The hardware can empty the ring about four times per jiffy,
so its not appropriate to wait for the poll routine to refill, since
the poll routine runs only once per jiffy. The low-watermark mechanism
marks a descr about 1/4th of the way from the bottom of the queue, so
that an interrupt is generated when the descr is processed. This
interrupt wakes up the netdev, which can then refill the queue.
For large packets, this mechanism generates a relatively small number
of interrupts, about 1K/sec. For smaller packets, this will drop to zero
interrupts, as the hardware can empty the queue faster than the kernel
can fill it.
======= END OF DOCUMENT ========
+167
View File
@@ -0,0 +1,167 @@
Linux power supply class
========================
Synopsis
~~~~~~~~
Power supply class used to represent battery, UPS, AC or DC power supply
properties to user-space.
It defines core set of attributes, which should be applicable to (almost)
every power supply out there. Attributes are available via sysfs and uevent
interfaces.
Each attribute has well defined meaning, up to unit of measure used. While
the attributes provided are believed to be universally applicable to any
power supply, specific monitoring hardware may not be able to provide them
all, so any of them may be skipped.
Power supply class is extensible, and allows to define drivers own attributes.
The core attribute set is subject to the standard Linux evolution (i.e.
if it will be found that some attribute is applicable to many power supply
types or their drivers, it can be added to the core set).
It also integrates with LED framework, for the purpose of providing
typically expected feedback of battery charging/fully charged status and
AC/USB power supply online status. (Note that specific details of the
indication (including whether to use it at all) are fully controllable by
user and/or specific machine defaults, per design principles of LED
framework).
Attributes/properties
~~~~~~~~~~~~~~~~~~~~~
Power supply class has predefined set of attributes, this eliminates code
duplication across drivers. Power supply class insist on reusing its
predefined attributes *and* their units.
So, userspace gets predictable set of attributes and their units for any
kind of power supply, and can process/present them to a user in consistent
manner. Results for different power supplies and machines are also directly
comparable.
See drivers/power/ds2760_battery.c and drivers/power/pda_power.c for the
example how to declare and handle attributes.
Units
~~~~~
Quoting include/linux/power_supply.h:
All voltages, currents, charges, energies, time and temperatures in µV,
µA, µAh, µWh, seconds and tenths of degree Celsius unless otherwise
stated. It's driver's job to convert its raw values to units in which
this class operates.
Attributes/properties detailed
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
~ ~ ~ ~ ~ ~ ~ Charge/Energy/Capacity - how to not confuse ~ ~ ~ ~ ~ ~ ~
~ ~
~ Because both "charge" (µAh) and "energy" (µWh) represents "capacity" ~
~ of battery, this class distinguish these terms. Don't mix them! ~
~ ~
~ CHARGE_* attributes represents capacity in µAh only. ~
~ ENERGY_* attributes represents capacity in µWh only. ~
~ CAPACITY attribute represents capacity in *percents*, from 0 to 100. ~
~ ~
~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~
Postfixes:
_AVG - *hardware* averaged value, use it if your hardware is really able to
report averaged values.
_NOW - momentary/instantaneous values.
STATUS - this attribute represents operating status (charging, full,
discharging (i.e. powering a load), etc.). This corresponds to
BATTERY_STATUS_* values, as defined in battery.h.
HEALTH - represents health of the battery, values corresponds to
POWER_SUPPLY_HEALTH_*, defined in battery.h.
VOLTAGE_MAX_DESIGN, VOLTAGE_MIN_DESIGN - design values for maximal and
minimal power supply voltages. Maximal/minimal means values of voltages
when battery considered "full"/"empty" at normal conditions. Yes, there is
no direct relation between voltage and battery capacity, but some dumb
batteries use voltage for very approximated calculation of capacity.
Battery driver also can use this attribute just to inform userspace
about maximal and minimal voltage thresholds of a given battery.
CHARGE_FULL_DESIGN, CHARGE_EMPTY_DESIGN - design charge values, when
battery considered full/empty.
ENERGY_FULL_DESIGN, ENERGY_EMPTY_DESIGN - same as above but for energy.
CHARGE_FULL, CHARGE_EMPTY - These attributes means "last remembered value
of charge when battery became full/empty". It also could mean "value of
charge when battery considered full/empty at given conditions (temperature,
age)". I.e. these attributes represents real thresholds, not design values.
ENERGY_FULL, ENERGY_EMPTY - same as above but for energy.
CAPACITY - capacity in percents.
CAPACITY_LEVEL - capacity level. This corresponds to
POWER_SUPPLY_CAPACITY_LEVEL_*.
TEMP - temperature of the power supply.
TEMP_AMBIENT - ambient temperature.
TIME_TO_EMPTY - seconds left for battery to be considered empty (i.e.
while battery powers a load)
TIME_TO_FULL - seconds left for battery to be considered full (i.e.
while battery is charging)
Battery <-> external power supply interaction
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Often power supplies are acting as supplies and supplicants at the same
time. Batteries are good example. So, batteries usually care if they're
externally powered or not.
For that case, power supply class implements notification mechanism for
batteries.
External power supply (AC) lists supplicants (batteries) names in
"supplied_to" struct member, and each power_supply_changed() call
issued by external power supply will notify supplicants via
external_power_changed callback.
QA
~~
Q: Where is POWER_SUPPLY_PROP_XYZ attribute?
A: If you cannot find attribute suitable for your driver needs, feel free
to add it and send patch along with your driver.
The attributes available currently are the ones currently provided by the
drivers written.
Good candidates to add in future: model/part#, cycle_time, manufacturer,
etc.
Q: I have some very specific attribute (e.g. battery color), should I add
this attribute to standard ones?
A: Most likely, no. Such attribute can be placed in the driver itself, if
it is useful. Of course, if the attribute in question applicable to
large set of batteries, provided by many drivers, and/or comes from
some general battery specification/standard, it may be a candidate to
be added to the core attribute set.
Q: Suppose, my battery monitoring chip/firmware does not provides capacity
in percents, but provides charge_{now,full,empty}. Should I calculate
percentage capacity manually, inside the driver, and register CAPACITY
attribute? The same question about time_to_empty/time_to_full.
A: Most likely, no. This class is designed to export properties which are
directly measurable by the specific hardware available.
Inferring not available properties using some heuristics or mathematical
model is not subject of work for a battery driver. Such functionality
should be factored out, and in fact, apm_power, the driver to serve
legacy APM API on top of power supply class, uses a simple heuristic of
approximating remaining battery capacity based on its charge, current,
voltage and so on. But full-fledged battery model is likely not subject
for kernel at all, as it would require floating point calculation to deal
with things like differential equations and Kalman filters. This is
better be handled by batteryd/libbattery, yet to be written.
+119
View File
@@ -0,0 +1,119 @@
This is the CFS scheduler.
80% of CFS's design can be summed up in a single sentence: CFS basically
models an "ideal, precise multi-tasking CPU" on real hardware.
"Ideal multi-tasking CPU" is a (non-existent :-)) CPU that has 100%
physical power and which can run each task at precise equal speed, in
parallel, each at 1/nr_running speed. For example: if there are 2 tasks
running then it runs each at 50% physical power - totally in parallel.
On real hardware, we can run only a single task at once, so while that
one task runs, the other tasks that are waiting for the CPU are at a
disadvantage - the current task gets an unfair amount of CPU time. In
CFS this fairness imbalance is expressed and tracked via the per-task
p->wait_runtime (nanosec-unit) value. "wait_runtime" is the amount of
time the task should now run on the CPU for it to become completely fair
and balanced.
( small detail: on 'ideal' hardware, the p->wait_runtime value would
always be zero - no task would ever get 'out of balance' from the
'ideal' share of CPU time. )
CFS's task picking logic is based on this p->wait_runtime value and it
is thus very simple: it always tries to run the task with the largest
p->wait_runtime value. In other words, CFS tries to run the task with
the 'gravest need' for more CPU time. So CFS always tries to split up
CPU time between runnable tasks as close to 'ideal multitasking
hardware' as possible.
Most of the rest of CFS's design just falls out of this really simple
concept, with a few add-on embellishments like nice levels,
multiprocessing and various algorithm variants to recognize sleepers.
In practice it works like this: the system runs a task a bit, and when
the task schedules (or a scheduler tick happens) the task's CPU usage is
'accounted for': the (small) time it just spent using the physical CPU
is deducted from p->wait_runtime. [minus the 'fair share' it would have
gotten anyway]. Once p->wait_runtime gets low enough so that another
task becomes the 'leftmost task' of the time-ordered rbtree it maintains
(plus a small amount of 'granularity' distance relative to the leftmost
task so that we do not over-schedule tasks and trash the cache) then the
new leftmost task is picked and the current task is preempted.
The rq->fair_clock value tracks the 'CPU time a runnable task would have
fairly gotten, had it been runnable during that time'. So by using
rq->fair_clock values we can accurately timestamp and measure the
'expected CPU time' a task should have gotten. All runnable tasks are
sorted in the rbtree by the "rq->fair_clock - p->wait_runtime" key, and
CFS picks the 'leftmost' task and sticks to it. As the system progresses
forwards, newly woken tasks are put into the tree more and more to the
right - slowly but surely giving a chance for every task to become the
'leftmost task' and thus get on the CPU within a deterministic amount of
time.
Some implementation details:
- the introduction of Scheduling Classes: an extensible hierarchy of
scheduler modules. These modules encapsulate scheduling policy
details and are handled by the scheduler core without the core
code assuming about them too much.
- sched_fair.c implements the 'CFS desktop scheduler': it is a
replacement for the vanilla scheduler's SCHED_OTHER interactivity
code.
I'd like to give credit to Con Kolivas for the general approach here:
he has proven via RSDL/SD that 'fair scheduling' is possible and that
it results in better desktop scheduling. Kudos Con!
The CFS patch uses a completely different approach and implementation
from RSDL/SD. My goal was to make CFS's interactivity quality exceed
that of RSDL/SD, which is a high standard to meet :-) Testing
feedback is welcome to decide this one way or another. [ and, in any
case, all of SD's logic could be added via a kernel/sched_sd.c module
as well, if Con is interested in such an approach. ]
CFS's design is quite radical: it does not use runqueues, it uses a
time-ordered rbtree to build a 'timeline' of future task execution,
and thus has no 'array switch' artifacts (by which both the vanilla
scheduler and RSDL/SD are affected).
CFS uses nanosecond granularity accounting and does not rely on any
jiffies or other HZ detail. Thus the CFS scheduler has no notion of
'timeslices' and has no heuristics whatsoever. There is only one
central tunable:
/proc/sys/kernel/sched_granularity_ns
which can be used to tune the scheduler from 'desktop' (low
latencies) to 'server' (good batching) workloads. It defaults to a
setting suitable for desktop workloads. SCHED_BATCH is handled by the
CFS scheduler module too.
Due to its design, the CFS scheduler is not prone to any of the
'attacks' that exist today against the heuristics of the stock
scheduler: fiftyp.c, thud.c, chew.c, ring-test.c, massive_intr.c all
work fine and do not impact interactivity and produce the expected
behavior.
the CFS scheduler has a much stronger handling of nice levels and
SCHED_BATCH: both types of workloads should be isolated much more
agressively than under the vanilla scheduler.
( another detail: due to nanosec accounting and timeline sorting,
sched_yield() support is very simple under CFS, and in fact under
CFS sched_yield() behaves much better than under any other
scheduler i have tested so far. )
- sched_rt.c implements SCHED_FIFO and SCHED_RR semantics, in a simpler
way than the vanilla scheduler does. It uses 100 runqueues (for all
100 RT priority levels, instead of 140 in the vanilla scheduler)
and it needs no expired array.
- reworked/sanitized SMP load-balancing: the runqueue-walking
assumptions are gone from the load-balancing code now, and
iterators of the scheduling modules are used. The balancing code got
quite a bit simpler as a result.
+19 -2
View File
@@ -1856,7 +1856,7 @@ W: http://www.openib.org/
T: git kernel.org:/pub/scm/linux/kernel/git/roland/infiniband.git
S: Supported
INPUT (KEYBOARD, MOUSE, JOYSTICK) DRIVERS
INPUT (KEYBOARD, MOUSE, JOYSTICK, TOUCHSCREEN) DRIVERS
P: Dmitry Torokhov
M: dmitry.torokhov@gmail.com
M: dtor@mail.ru
@@ -2101,7 +2101,7 @@ S: Maintained
KERNEL JANITORS
P: Several
L: kernel-janitors@lists.linux-foundation.org
L: kernel-janitors@vger.kernel.org
W: http://www.kerneljanitors.org/
S: Maintained
@@ -2930,6 +2930,13 @@ M: mikpe@it.uu.se
L: linux-ide@vger.kernel.org
S: Maintained
PS3 NETWORK SUPPORT
P: Masakazu Mokuno
M: mokuno@sm.sony.co.jp
L: netdev@vger.kernel.org
L: cbe-oss-dev@ozlabs.org
S: Supported
PS3 PLATFORM SUPPORT
P: Geoff Levand
M: geoffrey.levand@am.sony.com
@@ -3049,6 +3056,16 @@ S: Maintained
RISCOM8 DRIVER
S: Orphan
RTL818X WIRELESS DRIVER
P: Michael Wu
M: flamingice@sourmilk.net
P: Andrea Merello
M: andreamrl@tiscali.it
L: linux-wireless@vger.kernel.org
W: http://linuxwireless.org/
T: git kernel.org:/pub/scm/linux/kernel/git/mwu/mac80211-drivers.git
S: Maintained
S3 SAVAGE FRAMEBUFFER DRIVER
P: Antonino Daplas
M: adaplas@gmail.com
+1 -1
View File
@@ -1,7 +1,7 @@
VERSION = 2
PATCHLEVEL = 6
SUBLEVEL = 22
EXTRAVERSION = -rc7
EXTRAVERSION =
NAME = Holy Dancing Manatees, Batman!
# *DOCUMENTATION*
-2
View File
@@ -181,9 +181,7 @@ static void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk)
void dump_stack(void)
{
#ifdef CONFIG_DEBUG_ERRORS
__backtrace();
#endif
}
EXPORT_SYMBOL(dump_stack);
+2 -2
View File
@@ -140,9 +140,9 @@ void pxa_cpu_pm_enter(suspend_state_t state)
extern void pxa_cpu_resume(void);
if (state == PM_SUSPEND_STANDBY)
CKEN = CKEN_MEMC | CKEN_OSTIMER | CKEN_LCD | CKEN_PWM0;
CKEN = (1 << CKEN_MEMC) | (1 << CKEN_OSTIMER) | (1 << CKEN_LCD) | (1 << CKEN_PWM0);
else
CKEN = CKEN_MEMC | CKEN_OSTIMER;
CKEN = (1 << CKEN_MEMC) | (1 << CKEN_OSTIMER);
/* ensure voltage-change sequencer not initiated, which hangs */
PCFR &= ~PCFR_FVC;
+4 -1
View File
@@ -117,7 +117,10 @@ static int versatile_read_config(struct pci_bus *bus, unsigned int devfn, int wh
} else {
switch (size) {
case 1:
v = __raw_readb(addr);
v = __raw_readl(addr);
if (where & 2) v >>= 16;
if (where & 1) v >>= 8;
v &= 0xff;
break;
case 2:
+2 -2
View File
@@ -527,9 +527,9 @@ void __init create_mapping(struct map_desc *md)
return;
}
addr = md->virtual;
addr = md->virtual & PAGE_MASK;
phys = (unsigned long)__pfn_to_phys(md->pfn);
length = PAGE_ALIGN(md->length);
length = PAGE_ALIGN(md->length + (md->virtual & ~PAGE_MASK));
if (type->prot_l1 == 0 && ((addr | phys | length) & ~SECTION_MASK)) {
printk(KERN_WARNING "BUG: map for 0x%08lx at 0x%08lx can not "
+2 -2
View File
@@ -441,8 +441,8 @@ config X86_REBOOTFIXUPS
this config is intended, is when reboot ends with a stalled/hung
system.
Currently, the only fixup is for the Geode GX1/CS5530A/TROM2.1.
combination.
Currently, the only fixup is for the Geode machines using
CS5530A and CS5536 chipsets.
Say Y if you want to enable the fixup. Currently, it's safe to
enable this option even if you don't need it.
+4
View File
@@ -229,6 +229,8 @@ static void set_mtrr(unsigned int reg, unsigned long base,
data.smp_size = size;
data.smp_type = type;
atomic_set(&data.count, num_booting_cpus() - 1);
/* make sure data.count is visible before unleashing other CPUs */
smp_wmb();
atomic_set(&data.gate,0);
/* Start the ball rolling on other CPUs */
@@ -242,6 +244,7 @@ static void set_mtrr(unsigned int reg, unsigned long base,
/* ok, reset count and toggle gate */
atomic_set(&data.count, num_booting_cpus() - 1);
smp_wmb();
atomic_set(&data.gate,1);
/* do our MTRR business */
@@ -260,6 +263,7 @@ static void set_mtrr(unsigned int reg, unsigned long base,
cpu_relax();
atomic_set(&data.count, num_booting_cpus() - 1);
smp_wmb();
atomic_set(&data.gate,0);
/*
+4 -4
View File
@@ -367,10 +367,6 @@ ENTRY(system_call)
CFI_ADJUST_CFA_OFFSET 4
SAVE_ALL
GET_THREAD_INFO(%ebp)
testl $TF_MASK,PT_EFLAGS(%esp)
jz no_singlestep
orl $_TIF_SINGLESTEP,TI_flags(%ebp)
no_singlestep:
# system call tracing in operation / emulation
/* Note, _TIF_SECCOMP is bit number 8, and so it needs testw and not testb */
testw $(_TIF_SYSCALL_EMU|_TIF_SYSCALL_TRACE|_TIF_SECCOMP|_TIF_SYSCALL_AUDIT),TI_flags(%ebp)
@@ -385,6 +381,10 @@ syscall_exit:
# setting need_resched or sigpending
# between sampling and the iret
TRACE_IRQS_OFF
testl $TF_MASK,PT_EFLAGS(%esp) # If tracing set singlestep flag on exit
jz no_singlestep
orl $_TIF_SINGLESTEP,TI_flags(%ebp)
no_singlestep:
movl TI_flags(%ebp), %ecx
testw $_TIF_ALLWORK_MASK, %cx # current->work
jne syscall_exit_work
+2 -3
View File
@@ -20,8 +20,6 @@ static void __devinit quirk_intel_irqbalance(struct pci_dev *dev)
if (rev > 0x9)
return;
printk(KERN_INFO "Intel E7520/7320/7525 detected.");
/* enable access to config space*/
pci_read_config_byte(dev, 0xf4, &config);
pci_write_config_byte(dev, 0xf4, config|0x2);
@@ -30,7 +28,8 @@ static void __devinit quirk_intel_irqbalance(struct pci_dev *dev)
raw_pci_ops->read(0, 0, 0x40, 0x4c, 2, &word);
if (!(word & (1 << 13))) {
printk(KERN_INFO "Disabling irq balancing and affinity\n");
printk(KERN_INFO "Intel E7520/7320/7525 detected. "
"Disabling irq balancing and affinity\n");
#ifdef CONFIG_IRQBALANCE
irqbalance_disable("");
#endif

Some files were not shown because too many files have changed in this diff Show More