You've already forked linux-apfs
mirror of
https://github.com/linux-apfs/linux-apfs.git
synced 2026-05-01 15:00:59 -07:00
Merge tag 'docs-4.11' of git://git.lwn.net/linux
Pull documentation updates from Jonathan Corbet: "A slightly quieter cycle for documentation this time around. Three more DocBook template files have been converted to RST; only 21 to go. There are various build improvements and the usual array of documentation improvements and fixes" * tag 'docs-4.11' of git://git.lwn.net/linux: (44 commits) docs / driver-api: Fix structure references in device_link.rst PM / docs: Fix structure references in device.rst Add a target to check broken external links in the Documentation Documentation: Fix linux-api list typo Documentation: DocBook/Makefile comment typo Improve sparse documentation Documentation: make Makefile.sphinx no-ops quieter Documentation: DMA-ISA-LPC.txt Documentation: input: fix path to input code definitions docs: Remove the copyright year from conf.py docs: Fix a warning in the Korean HOWTO.rst translation PM / sleep / docs: Convert PM notifiers document to reST PM / core / docs: Convert sleep states API document to reST PM / core: Update kerneldoc comments in pm.h doc-rst: Fix recursive make invocation from macros doc-rst: Delete output of failed dot-SVG conversion doc-rst: Break shell command sequences on failure Documentation/sphinx: make targets independent of Sphinx work for HAVE_SPHINX=0 doc-rst: fixed cleandoc target when used with O=dir Documentation/sphinx: prevent generation of .pyc files in the source tree ...
This commit is contained in:
@@ -42,7 +42,7 @@ requirements you pass the flag GFP_DMA to kmalloc.
|
|||||||
|
|
||||||
Unfortunately the memory available for ISA DMA is scarce so unless you
|
Unfortunately the memory available for ISA DMA is scarce so unless you
|
||||||
allocate the memory during boot-up it's a good idea to also pass
|
allocate the memory during boot-up it's a good idea to also pass
|
||||||
__GFP_REPEAT and __GFP_NOWARN to make the allocater try a bit harder.
|
__GFP_REPEAT and __GFP_NOWARN to make the allocator try a bit harder.
|
||||||
|
|
||||||
(This scarcity also means that you should allocate the buffer as
|
(This scarcity also means that you should allocate the buffer as
|
||||||
early as possible and not release it until the driver is unloaded.)
|
early as possible and not release it until the driver is unloaded.)
|
||||||
|
|||||||
@@ -13,7 +13,7 @@ DOCBOOKS := z8530book.xml \
|
|||||||
gadget.xml libata.xml mtdnand.xml librs.xml rapidio.xml \
|
gadget.xml libata.xml mtdnand.xml librs.xml rapidio.xml \
|
||||||
genericirq.xml s390-drivers.xml scsi.xml \
|
genericirq.xml s390-drivers.xml scsi.xml \
|
||||||
sh.xml regulator.xml w1.xml \
|
sh.xml regulator.xml w1.xml \
|
||||||
writing_musb_glue_layer.xml iio.xml
|
writing_musb_glue_layer.xml
|
||||||
|
|
||||||
ifeq ($(DOCBOOKS),)
|
ifeq ($(DOCBOOKS),)
|
||||||
|
|
||||||
@@ -71,6 +71,7 @@ installmandocs: mandocs
|
|||||||
# no-op for the DocBook toolchain
|
# no-op for the DocBook toolchain
|
||||||
epubdocs:
|
epubdocs:
|
||||||
latexdocs:
|
latexdocs:
|
||||||
|
linkcheckdocs:
|
||||||
|
|
||||||
###
|
###
|
||||||
#External programs used
|
#External programs used
|
||||||
@@ -272,6 +273,6 @@ cleandocs:
|
|||||||
$(Q)rm -rf $(call objectify, $(clean-dirs))
|
$(Q)rm -rf $(call objectify, $(clean-dirs))
|
||||||
|
|
||||||
# Declare the contents of the .PHONY variable as phony. We keep that
|
# Declare the contents of the .PHONY variable as phony. We keep that
|
||||||
# information in a variable se we can use it in if_changed and friends.
|
# information in a variable so we can use it in if_changed and friends.
|
||||||
|
|
||||||
.PHONY: $(PHONY)
|
.PHONY: $(PHONY)
|
||||||
|
|||||||
@@ -1,323 +0,0 @@
|
|||||||
<?xml version="1.0" encoding="UTF-8"?>
|
|
||||||
<!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.1.2//EN"
|
|
||||||
"http://www.oasis-open.org/docbook/xml/4.1.2/docbookx.dtd" []>
|
|
||||||
|
|
||||||
<book id="DoingIO">
|
|
||||||
<bookinfo>
|
|
||||||
<title>Bus-Independent Device Accesses</title>
|
|
||||||
|
|
||||||
<authorgroup>
|
|
||||||
<author>
|
|
||||||
<firstname>Matthew</firstname>
|
|
||||||
<surname>Wilcox</surname>
|
|
||||||
<affiliation>
|
|
||||||
<address>
|
|
||||||
<email>matthew@wil.cx</email>
|
|
||||||
</address>
|
|
||||||
</affiliation>
|
|
||||||
</author>
|
|
||||||
</authorgroup>
|
|
||||||
|
|
||||||
<authorgroup>
|
|
||||||
<author>
|
|
||||||
<firstname>Alan</firstname>
|
|
||||||
<surname>Cox</surname>
|
|
||||||
<affiliation>
|
|
||||||
<address>
|
|
||||||
<email>alan@lxorguk.ukuu.org.uk</email>
|
|
||||||
</address>
|
|
||||||
</affiliation>
|
|
||||||
</author>
|
|
||||||
</authorgroup>
|
|
||||||
|
|
||||||
<copyright>
|
|
||||||
<year>2001</year>
|
|
||||||
<holder>Matthew Wilcox</holder>
|
|
||||||
</copyright>
|
|
||||||
|
|
||||||
<legalnotice>
|
|
||||||
<para>
|
|
||||||
This documentation is free software; you can redistribute
|
|
||||||
it and/or modify it under the terms of the GNU General Public
|
|
||||||
License as published by the Free Software Foundation; either
|
|
||||||
version 2 of the License, or (at your option) any later
|
|
||||||
version.
|
|
||||||
</para>
|
|
||||||
|
|
||||||
<para>
|
|
||||||
This program is distributed in the hope that it will be
|
|
||||||
useful, but WITHOUT ANY WARRANTY; without even the implied
|
|
||||||
warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
|
||||||
See the GNU General Public License for more details.
|
|
||||||
</para>
|
|
||||||
|
|
||||||
<para>
|
|
||||||
You should have received a copy of the GNU General Public
|
|
||||||
License along with this program; if not, write to the Free
|
|
||||||
Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
|
|
||||||
MA 02111-1307 USA
|
|
||||||
</para>
|
|
||||||
|
|
||||||
<para>
|
|
||||||
For more details see the file COPYING in the source
|
|
||||||
distribution of Linux.
|
|
||||||
</para>
|
|
||||||
</legalnotice>
|
|
||||||
</bookinfo>
|
|
||||||
|
|
||||||
<toc></toc>
|
|
||||||
|
|
||||||
<chapter id="intro">
|
|
||||||
<title>Introduction</title>
|
|
||||||
<para>
|
|
||||||
Linux provides an API which abstracts performing IO across all busses
|
|
||||||
and devices, allowing device drivers to be written independently of
|
|
||||||
bus type.
|
|
||||||
</para>
|
|
||||||
</chapter>
|
|
||||||
|
|
||||||
<chapter id="bugs">
|
|
||||||
<title>Known Bugs And Assumptions</title>
|
|
||||||
<para>
|
|
||||||
None.
|
|
||||||
</para>
|
|
||||||
</chapter>
|
|
||||||
|
|
||||||
<chapter id="mmio">
|
|
||||||
<title>Memory Mapped IO</title>
|
|
||||||
<sect1 id="getting_access_to_the_device">
|
|
||||||
<title>Getting Access to the Device</title>
|
|
||||||
<para>
|
|
||||||
The most widely supported form of IO is memory mapped IO.
|
|
||||||
That is, a part of the CPU's address space is interpreted
|
|
||||||
not as accesses to memory, but as accesses to a device. Some
|
|
||||||
architectures define devices to be at a fixed address, but most
|
|
||||||
have some method of discovering devices. The PCI bus walk is a
|
|
||||||
good example of such a scheme. This document does not cover how
|
|
||||||
to receive such an address, but assumes you are starting with one.
|
|
||||||
Physical addresses are of type unsigned long.
|
|
||||||
</para>
|
|
||||||
|
|
||||||
<para>
|
|
||||||
This address should not be used directly. Instead, to get an
|
|
||||||
address suitable for passing to the accessor functions described
|
|
||||||
below, you should call <function>ioremap</function>.
|
|
||||||
An address suitable for accessing the device will be returned to you.
|
|
||||||
</para>
|
|
||||||
|
|
||||||
<para>
|
|
||||||
After you've finished using the device (say, in your module's
|
|
||||||
exit routine), call <function>iounmap</function> in order to return
|
|
||||||
the address space to the kernel. Most architectures allocate new
|
|
||||||
address space each time you call <function>ioremap</function>, and
|
|
||||||
they can run out unless you call <function>iounmap</function>.
|
|
||||||
</para>
|
|
||||||
</sect1>
|
|
||||||
|
|
||||||
<sect1 id="accessing_the_device">
|
|
||||||
<title>Accessing the device</title>
|
|
||||||
<para>
|
|
||||||
The part of the interface most used by drivers is reading and
|
|
||||||
writing memory-mapped registers on the device. Linux provides
|
|
||||||
interfaces to read and write 8-bit, 16-bit, 32-bit and 64-bit
|
|
||||||
quantities. Due to a historical accident, these are named byte,
|
|
||||||
word, long and quad accesses. Both read and write accesses are
|
|
||||||
supported; there is no prefetch support at this time.
|
|
||||||
</para>
|
|
||||||
|
|
||||||
<para>
|
|
||||||
The functions are named <function>readb</function>,
|
|
||||||
<function>readw</function>, <function>readl</function>,
|
|
||||||
<function>readq</function>, <function>readb_relaxed</function>,
|
|
||||||
<function>readw_relaxed</function>, <function>readl_relaxed</function>,
|
|
||||||
<function>readq_relaxed</function>, <function>writeb</function>,
|
|
||||||
<function>writew</function>, <function>writel</function> and
|
|
||||||
<function>writeq</function>.
|
|
||||||
</para>
|
|
||||||
|
|
||||||
<para>
|
|
||||||
Some devices (such as framebuffers) would like to use larger
|
|
||||||
transfers than 8 bytes at a time. For these devices, the
|
|
||||||
<function>memcpy_toio</function>, <function>memcpy_fromio</function>
|
|
||||||
and <function>memset_io</function> functions are provided.
|
|
||||||
Do not use memset or memcpy on IO addresses; they
|
|
||||||
are not guaranteed to copy data in order.
|
|
||||||
</para>
|
|
||||||
|
|
||||||
<para>
|
|
||||||
The read and write functions are defined to be ordered. That is the
|
|
||||||
compiler is not permitted to reorder the I/O sequence. When the
|
|
||||||
ordering can be compiler optimised, you can use <function>
|
|
||||||
__readb</function> and friends to indicate the relaxed ordering. Use
|
|
||||||
this with care.
|
|
||||||
</para>
|
|
||||||
|
|
||||||
<para>
|
|
||||||
While the basic functions are defined to be synchronous with respect
|
|
||||||
to each other and ordered with respect to each other the busses the
|
|
||||||
devices sit on may themselves have asynchronicity. In particular many
|
|
||||||
authors are burned by the fact that PCI bus writes are posted
|
|
||||||
asynchronously. A driver author must issue a read from the same
|
|
||||||
device to ensure that writes have occurred in the specific cases the
|
|
||||||
author cares. This kind of property cannot be hidden from driver
|
|
||||||
writers in the API. In some cases, the read used to flush the device
|
|
||||||
may be expected to fail (if the card is resetting, for example). In
|
|
||||||
that case, the read should be done from config space, which is
|
|
||||||
guaranteed to soft-fail if the card doesn't respond.
|
|
||||||
</para>
|
|
||||||
|
|
||||||
<para>
|
|
||||||
The following is an example of flushing a write to a device when
|
|
||||||
the driver would like to ensure the write's effects are visible prior
|
|
||||||
to continuing execution.
|
|
||||||
</para>
|
|
||||||
|
|
||||||
<programlisting>
|
|
||||||
static inline void
|
|
||||||
qla1280_disable_intrs(struct scsi_qla_host *ha)
|
|
||||||
{
|
|
||||||
struct device_reg *reg;
|
|
||||||
|
|
||||||
reg = ha->iobase;
|
|
||||||
/* disable risc and host interrupts */
|
|
||||||
WRT_REG_WORD(&reg->ictrl, 0);
|
|
||||||
/*
|
|
||||||
* The following read will ensure that the above write
|
|
||||||
* has been received by the device before we return from this
|
|
||||||
* function.
|
|
||||||
*/
|
|
||||||
RD_REG_WORD(&reg->ictrl);
|
|
||||||
ha->flags.ints_enabled = 0;
|
|
||||||
}
|
|
||||||
</programlisting>
|
|
||||||
|
|
||||||
<para>
|
|
||||||
In addition to write posting, on some large multiprocessing systems
|
|
||||||
(e.g. SGI Challenge, Origin and Altix machines) posted writes won't
|
|
||||||
be strongly ordered coming from different CPUs. Thus it's important
|
|
||||||
to properly protect parts of your driver that do memory-mapped writes
|
|
||||||
with locks and use the <function>mmiowb</function> to make sure they
|
|
||||||
arrive in the order intended. Issuing a regular <function>readX
|
|
||||||
</function> will also ensure write ordering, but should only be used
|
|
||||||
when the driver has to be sure that the write has actually arrived
|
|
||||||
at the device (not that it's simply ordered with respect to other
|
|
||||||
writes), since a full <function>readX</function> is a relatively
|
|
||||||
expensive operation.
|
|
||||||
</para>
|
|
||||||
|
|
||||||
<para>
|
|
||||||
Generally, one should use <function>mmiowb</function> prior to
|
|
||||||
releasing a spinlock that protects regions using <function>writeb
|
|
||||||
</function> or similar functions that aren't surrounded by <function>
|
|
||||||
readb</function> calls, which will ensure ordering and flushing. The
|
|
||||||
following pseudocode illustrates what might occur if write ordering
|
|
||||||
isn't guaranteed via <function>mmiowb</function> or one of the
|
|
||||||
<function>readX</function> functions.
|
|
||||||
</para>
|
|
||||||
|
|
||||||
<programlisting>
|
|
||||||
CPU A: spin_lock_irqsave(&dev_lock, flags)
|
|
||||||
CPU A: ...
|
|
||||||
CPU A: writel(newval, ring_ptr);
|
|
||||||
CPU A: spin_unlock_irqrestore(&dev_lock, flags)
|
|
||||||
...
|
|
||||||
CPU B: spin_lock_irqsave(&dev_lock, flags)
|
|
||||||
CPU B: writel(newval2, ring_ptr);
|
|
||||||
CPU B: ...
|
|
||||||
CPU B: spin_unlock_irqrestore(&dev_lock, flags)
|
|
||||||
</programlisting>
|
|
||||||
|
|
||||||
<para>
|
|
||||||
In the case above, newval2 could be written to ring_ptr before
|
|
||||||
newval. Fixing it is easy though:
|
|
||||||
</para>
|
|
||||||
|
|
||||||
<programlisting>
|
|
||||||
CPU A: spin_lock_irqsave(&dev_lock, flags)
|
|
||||||
CPU A: ...
|
|
||||||
CPU A: writel(newval, ring_ptr);
|
|
||||||
CPU A: mmiowb(); /* ensure no other writes beat us to the device */
|
|
||||||
CPU A: spin_unlock_irqrestore(&dev_lock, flags)
|
|
||||||
...
|
|
||||||
CPU B: spin_lock_irqsave(&dev_lock, flags)
|
|
||||||
CPU B: writel(newval2, ring_ptr);
|
|
||||||
CPU B: ...
|
|
||||||
CPU B: mmiowb();
|
|
||||||
CPU B: spin_unlock_irqrestore(&dev_lock, flags)
|
|
||||||
</programlisting>
|
|
||||||
|
|
||||||
<para>
|
|
||||||
See tg3.c for a real world example of how to use <function>mmiowb
|
|
||||||
</function>
|
|
||||||
</para>
|
|
||||||
|
|
||||||
<para>
|
|
||||||
PCI ordering rules also guarantee that PIO read responses arrive
|
|
||||||
after any outstanding DMA writes from that bus, since for some devices
|
|
||||||
the result of a <function>readb</function> call may signal to the
|
|
||||||
driver that a DMA transaction is complete. In many cases, however,
|
|
||||||
the driver may want to indicate that the next
|
|
||||||
<function>readb</function> call has no relation to any previous DMA
|
|
||||||
writes performed by the device. The driver can use
|
|
||||||
<function>readb_relaxed</function> for these cases, although only
|
|
||||||
some platforms will honor the relaxed semantics. Using the relaxed
|
|
||||||
read functions will provide significant performance benefits on
|
|
||||||
platforms that support it. The qla2xxx driver provides examples
|
|
||||||
of how to use <function>readX_relaxed</function>. In many cases,
|
|
||||||
a majority of the driver's <function>readX</function> calls can
|
|
||||||
safely be converted to <function>readX_relaxed</function> calls, since
|
|
||||||
only a few will indicate or depend on DMA completion.
|
|
||||||
</para>
|
|
||||||
</sect1>
|
|
||||||
|
|
||||||
</chapter>
|
|
||||||
|
|
||||||
<chapter id="port_space_accesses">
|
|
||||||
<title>Port Space Accesses</title>
|
|
||||||
<sect1 id="port_space_explained">
|
|
||||||
<title>Port Space Explained</title>
|
|
||||||
|
|
||||||
<para>
|
|
||||||
Another form of IO commonly supported is Port Space. This is a
|
|
||||||
range of addresses separate to the normal memory address space.
|
|
||||||
Access to these addresses is generally not as fast as accesses
|
|
||||||
to the memory mapped addresses, and it also has a potentially
|
|
||||||
smaller address space.
|
|
||||||
</para>
|
|
||||||
|
|
||||||
<para>
|
|
||||||
Unlike memory mapped IO, no preparation is required
|
|
||||||
to access port space.
|
|
||||||
</para>
|
|
||||||
|
|
||||||
</sect1>
|
|
||||||
<sect1 id="accessing_port_space">
|
|
||||||
<title>Accessing Port Space</title>
|
|
||||||
<para>
|
|
||||||
Accesses to this space are provided through a set of functions
|
|
||||||
which allow 8-bit, 16-bit and 32-bit accesses; also
|
|
||||||
known as byte, word and long. These functions are
|
|
||||||
<function>inb</function>, <function>inw</function>,
|
|
||||||
<function>inl</function>, <function>outb</function>,
|
|
||||||
<function>outw</function> and <function>outl</function>.
|
|
||||||
</para>
|
|
||||||
|
|
||||||
<para>
|
|
||||||
Some variants are provided for these functions. Some devices
|
|
||||||
require that accesses to their ports are slowed down. This
|
|
||||||
functionality is provided by appending a <function>_p</function>
|
|
||||||
to the end of the function. There are also equivalents to memcpy.
|
|
||||||
The <function>ins</function> and <function>outs</function>
|
|
||||||
functions copy bytes, words or longs to the given port.
|
|
||||||
</para>
|
|
||||||
</sect1>
|
|
||||||
|
|
||||||
</chapter>
|
|
||||||
|
|
||||||
<chapter id="pubfunctions">
|
|
||||||
<title>Public Functions Provided</title>
|
|
||||||
!Iarch/x86/include/asm/io.h
|
|
||||||
!Elib/pci_iomap.c
|
|
||||||
</chapter>
|
|
||||||
|
|
||||||
</book>
|
|
||||||
File diff suppressed because it is too large
Load Diff
@@ -1,304 +0,0 @@
|
|||||||
<?xml version="1.0" encoding="UTF-8"?>
|
|
||||||
<!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.1.2//EN"
|
|
||||||
"http://www.oasis-open.org/docbook/xml/4.1.2/docbookx.dtd" []>
|
|
||||||
|
|
||||||
<book id="regulator-api">
|
|
||||||
<bookinfo>
|
|
||||||
<title>Voltage and current regulator API</title>
|
|
||||||
|
|
||||||
<authorgroup>
|
|
||||||
<author>
|
|
||||||
<firstname>Liam</firstname>
|
|
||||||
<surname>Girdwood</surname>
|
|
||||||
<affiliation>
|
|
||||||
<address>
|
|
||||||
<email>lrg@slimlogic.co.uk</email>
|
|
||||||
</address>
|
|
||||||
</affiliation>
|
|
||||||
</author>
|
|
||||||
<author>
|
|
||||||
<firstname>Mark</firstname>
|
|
||||||
<surname>Brown</surname>
|
|
||||||
<affiliation>
|
|
||||||
<orgname>Wolfson Microelectronics</orgname>
|
|
||||||
<address>
|
|
||||||
<email>broonie@opensource.wolfsonmicro.com</email>
|
|
||||||
</address>
|
|
||||||
</affiliation>
|
|
||||||
</author>
|
|
||||||
</authorgroup>
|
|
||||||
|
|
||||||
<copyright>
|
|
||||||
<year>2007-2008</year>
|
|
||||||
<holder>Wolfson Microelectronics</holder>
|
|
||||||
</copyright>
|
|
||||||
<copyright>
|
|
||||||
<year>2008</year>
|
|
||||||
<holder>Liam Girdwood</holder>
|
|
||||||
</copyright>
|
|
||||||
|
|
||||||
<legalnotice>
|
|
||||||
<para>
|
|
||||||
This documentation is free software; you can redistribute
|
|
||||||
it and/or modify it under the terms of the GNU General Public
|
|
||||||
License version 2 as published by the Free Software Foundation.
|
|
||||||
</para>
|
|
||||||
|
|
||||||
<para>
|
|
||||||
This program is distributed in the hope that it will be
|
|
||||||
useful, but WITHOUT ANY WARRANTY; without even the implied
|
|
||||||
warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
|
||||||
See the GNU General Public License for more details.
|
|
||||||
</para>
|
|
||||||
|
|
||||||
<para>
|
|
||||||
You should have received a copy of the GNU General Public
|
|
||||||
License along with this program; if not, write to the Free
|
|
||||||
Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
|
|
||||||
MA 02111-1307 USA
|
|
||||||
</para>
|
|
||||||
|
|
||||||
<para>
|
|
||||||
For more details see the file COPYING in the source
|
|
||||||
distribution of Linux.
|
|
||||||
</para>
|
|
||||||
</legalnotice>
|
|
||||||
</bookinfo>
|
|
||||||
|
|
||||||
<toc></toc>
|
|
||||||
|
|
||||||
<chapter id="intro">
|
|
||||||
<title>Introduction</title>
|
|
||||||
<para>
|
|
||||||
This framework is designed to provide a standard kernel
|
|
||||||
interface to control voltage and current regulators.
|
|
||||||
</para>
|
|
||||||
<para>
|
|
||||||
The intention is to allow systems to dynamically control
|
|
||||||
regulator power output in order to save power and prolong
|
|
||||||
battery life. This applies to both voltage regulators (where
|
|
||||||
voltage output is controllable) and current sinks (where current
|
|
||||||
limit is controllable).
|
|
||||||
</para>
|
|
||||||
<para>
|
|
||||||
Note that additional (and currently more complete) documentation
|
|
||||||
is available in the Linux kernel source under
|
|
||||||
<filename>Documentation/power/regulator</filename>.
|
|
||||||
</para>
|
|
||||||
|
|
||||||
<sect1 id="glossary">
|
|
||||||
<title>Glossary</title>
|
|
||||||
<para>
|
|
||||||
The regulator API uses a number of terms which may not be
|
|
||||||
familiar:
|
|
||||||
</para>
|
|
||||||
<glossary>
|
|
||||||
|
|
||||||
<glossentry>
|
|
||||||
<glossterm>Regulator</glossterm>
|
|
||||||
<glossdef>
|
|
||||||
<para>
|
|
||||||
Electronic device that supplies power to other devices. Most
|
|
||||||
regulators can enable and disable their output and some can also
|
|
||||||
control their output voltage or current.
|
|
||||||
</para>
|
|
||||||
</glossdef>
|
|
||||||
</glossentry>
|
|
||||||
|
|
||||||
<glossentry>
|
|
||||||
<glossterm>Consumer</glossterm>
|
|
||||||
<glossdef>
|
|
||||||
<para>
|
|
||||||
Electronic device which consumes power provided by a regulator.
|
|
||||||
These may either be static, requiring only a fixed supply, or
|
|
||||||
dynamic, requiring active management of the regulator at
|
|
||||||
runtime.
|
|
||||||
</para>
|
|
||||||
</glossdef>
|
|
||||||
</glossentry>
|
|
||||||
|
|
||||||
<glossentry>
|
|
||||||
<glossterm>Power Domain</glossterm>
|
|
||||||
<glossdef>
|
|
||||||
<para>
|
|
||||||
The electronic circuit supplied by a given regulator, including
|
|
||||||
the regulator and all consumer devices. The configuration of
|
|
||||||
the regulator is shared between all the components in the
|
|
||||||
circuit.
|
|
||||||
</para>
|
|
||||||
</glossdef>
|
|
||||||
</glossentry>
|
|
||||||
|
|
||||||
<glossentry>
|
|
||||||
<glossterm>Power Management Integrated Circuit</glossterm>
|
|
||||||
<acronym>PMIC</acronym>
|
|
||||||
<glossdef>
|
|
||||||
<para>
|
|
||||||
An IC which contains numerous regulators and often also other
|
|
||||||
subsystems. In an embedded system the primary PMIC is often
|
|
||||||
equivalent to a combination of the PSU and southbridge in a
|
|
||||||
desktop system.
|
|
||||||
</para>
|
|
||||||
</glossdef>
|
|
||||||
</glossentry>
|
|
||||||
</glossary>
|
|
||||||
</sect1>
|
|
||||||
</chapter>
|
|
||||||
|
|
||||||
<chapter id="consumer">
|
|
||||||
<title>Consumer driver interface</title>
|
|
||||||
<para>
|
|
||||||
This offers a similar API to the kernel clock framework.
|
|
||||||
Consumer drivers use <link
|
|
||||||
linkend='API-regulator-get'>get</link> and <link
|
|
||||||
linkend='API-regulator-put'>put</link> operations to acquire and
|
|
||||||
release regulators. Functions are
|
|
||||||
provided to <link linkend='API-regulator-enable'>enable</link>
|
|
||||||
and <link linkend='API-regulator-disable'>disable</link> the
|
|
||||||
regulator and to get and set the runtime parameters of the
|
|
||||||
regulator.
|
|
||||||
</para>
|
|
||||||
<para>
|
|
||||||
When requesting regulators consumers use symbolic names for their
|
|
||||||
supplies, such as "Vcc", which are mapped into actual regulator
|
|
||||||
devices by the machine interface.
|
|
||||||
</para>
|
|
||||||
<para>
|
|
||||||
A stub version of this API is provided when the regulator
|
|
||||||
framework is not in use in order to minimise the need to use
|
|
||||||
ifdefs.
|
|
||||||
</para>
|
|
||||||
|
|
||||||
<sect1 id="consumer-enable">
|
|
||||||
<title>Enabling and disabling</title>
|
|
||||||
<para>
|
|
||||||
The regulator API provides reference counted enabling and
|
|
||||||
disabling of regulators. Consumer devices use the <function><link
|
|
||||||
linkend='API-regulator-enable'>regulator_enable</link></function>
|
|
||||||
and <function><link
|
|
||||||
linkend='API-regulator-disable'>regulator_disable</link>
|
|
||||||
</function> functions to enable and disable regulators. Calls
|
|
||||||
to the two functions must be balanced.
|
|
||||||
</para>
|
|
||||||
<para>
|
|
||||||
Note that since multiple consumers may be using a regulator and
|
|
||||||
machine constraints may not allow the regulator to be disabled
|
|
||||||
there is no guarantee that calling
|
|
||||||
<function>regulator_disable</function> will actually cause the
|
|
||||||
supply provided by the regulator to be disabled. Consumer
|
|
||||||
drivers should assume that the regulator may be enabled at all
|
|
||||||
times.
|
|
||||||
</para>
|
|
||||||
</sect1>
|
|
||||||
|
|
||||||
<sect1 id="consumer-config">
|
|
||||||
<title>Configuration</title>
|
|
||||||
<para>
|
|
||||||
Some consumer devices may need to be able to dynamically
|
|
||||||
configure their supplies. For example, MMC drivers may need to
|
|
||||||
select the correct operating voltage for their cards. This may
|
|
||||||
be done while the regulator is enabled or disabled.
|
|
||||||
</para>
|
|
||||||
<para>
|
|
||||||
The <function><link
|
|
||||||
linkend='API-regulator-set-voltage'>regulator_set_voltage</link>
|
|
||||||
</function> and <function><link
|
|
||||||
linkend='API-regulator-set-current-limit'
|
|
||||||
>regulator_set_current_limit</link>
|
|
||||||
</function> functions provide the primary interface for this.
|
|
||||||
Both take ranges of voltages and currents, supporting drivers
|
|
||||||
that do not require a specific value (eg, CPU frequency scaling
|
|
||||||
normally permits the CPU to use a wider range of supply
|
|
||||||
voltages at lower frequencies but does not require that the
|
|
||||||
supply voltage be lowered). Where an exact value is required
|
|
||||||
both minimum and maximum values should be identical.
|
|
||||||
</para>
|
|
||||||
</sect1>
|
|
||||||
|
|
||||||
<sect1 id="consumer-callback">
|
|
||||||
<title>Callbacks</title>
|
|
||||||
<para>
|
|
||||||
Callbacks may also be <link
|
|
||||||
linkend='API-regulator-register-notifier'>registered</link>
|
|
||||||
for events such as regulation failures.
|
|
||||||
</para>
|
|
||||||
</sect1>
|
|
||||||
</chapter>
|
|
||||||
|
|
||||||
<chapter id="driver">
|
|
||||||
<title>Regulator driver interface</title>
|
|
||||||
<para>
|
|
||||||
Drivers for regulator chips <link
|
|
||||||
linkend='API-regulator-register'>register</link> the regulators
|
|
||||||
with the regulator core, providing operations structures to the
|
|
||||||
core. A <link
|
|
||||||
linkend='API-regulator-notifier-call-chain'>notifier</link> interface
|
|
||||||
allows error conditions to be reported to the core.
|
|
||||||
</para>
|
|
||||||
<para>
|
|
||||||
Registration should be triggered by explicit setup done by the
|
|
||||||
platform, supplying a <link
|
|
||||||
linkend='API-struct-regulator-init-data'>struct
|
|
||||||
regulator_init_data</link> for the regulator containing
|
|
||||||
<link linkend='machine-constraint'>constraint</link> and
|
|
||||||
<link linkend='machine-supply'>supply</link> information.
|
|
||||||
</para>
|
|
||||||
</chapter>
|
|
||||||
|
|
||||||
<chapter id="machine">
|
|
||||||
<title>Machine interface</title>
|
|
||||||
<para>
|
|
||||||
This interface provides a way to define how regulators are
|
|
||||||
connected to consumers on a given system and what the valid
|
|
||||||
operating parameters are for the system.
|
|
||||||
</para>
|
|
||||||
|
|
||||||
<sect1 id="machine-supply">
|
|
||||||
<title>Supplies</title>
|
|
||||||
<para>
|
|
||||||
Regulator supplies are specified using <link
|
|
||||||
linkend='API-struct-regulator-consumer-supply'>struct
|
|
||||||
regulator_consumer_supply</link>. This is done at
|
|
||||||
<link linkend='driver'>driver registration
|
|
||||||
time</link> as part of the machine constraints.
|
|
||||||
</para>
|
|
||||||
</sect1>
|
|
||||||
|
|
||||||
<sect1 id="machine-constraint">
|
|
||||||
<title>Constraints</title>
|
|
||||||
<para>
|
|
||||||
As well as defining the connections the machine interface
|
|
||||||
also provides constraints defining the operations that
|
|
||||||
clients are allowed to perform and the parameters that may be
|
|
||||||
set. This is required since generally regulator devices will
|
|
||||||
offer more flexibility than it is safe to use on a given
|
|
||||||
system, for example supporting higher supply voltages than the
|
|
||||||
consumers are rated for.
|
|
||||||
</para>
|
|
||||||
<para>
|
|
||||||
This is done at <link linkend='driver'>driver
|
|
||||||
registration time</link> by providing a <link
|
|
||||||
linkend='API-struct-regulation-constraints'>struct
|
|
||||||
regulation_constraints</link>.
|
|
||||||
</para>
|
|
||||||
<para>
|
|
||||||
The constraints may also specify an initial configuration for the
|
|
||||||
regulator in the constraints, which is particularly useful for
|
|
||||||
use with static consumers.
|
|
||||||
</para>
|
|
||||||
</sect1>
|
|
||||||
</chapter>
|
|
||||||
|
|
||||||
<chapter id="api">
|
|
||||||
<title>API reference</title>
|
|
||||||
<para>
|
|
||||||
Due to limitations of the kernel documentation framework and the
|
|
||||||
existing layout of the source code the entire regulator API is
|
|
||||||
documented here.
|
|
||||||
</para>
|
|
||||||
!Iinclude/linux/regulator/consumer.h
|
|
||||||
!Iinclude/linux/regulator/machine.h
|
|
||||||
!Iinclude/linux/regulator/driver.h
|
|
||||||
!Edrivers/regulator/core.c
|
|
||||||
</chapter>
|
|
||||||
</book>
|
|
||||||
@@ -43,7 +43,7 @@ ALLSPHINXOPTS = $(KERNELDOC_CONF) $(PAPEROPT_$(PAPER)) $(SPHINXOPTS)
|
|||||||
I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
|
I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
|
||||||
|
|
||||||
# commands; the 'cmd' from scripts/Kbuild.include is not *loopable*
|
# commands; the 'cmd' from scripts/Kbuild.include is not *loopable*
|
||||||
loop_cmd = $(echo-cmd) $(cmd_$(1))
|
loop_cmd = $(echo-cmd) $(cmd_$(1)) || exit;
|
||||||
|
|
||||||
# $2 sphinx builder e.g. "html"
|
# $2 sphinx builder e.g. "html"
|
||||||
# $3 name of the build subfolder / e.g. "media", used as:
|
# $3 name of the build subfolder / e.g. "media", used as:
|
||||||
@@ -54,7 +54,8 @@ loop_cmd = $(echo-cmd) $(cmd_$(1))
|
|||||||
# e.g. "media" for the linux-tv book-set at ./Documentation/media
|
# e.g. "media" for the linux-tv book-set at ./Documentation/media
|
||||||
|
|
||||||
quiet_cmd_sphinx = SPHINX $@ --> file://$(abspath $(BUILDDIR)/$3/$4)
|
quiet_cmd_sphinx = SPHINX $@ --> file://$(abspath $(BUILDDIR)/$3/$4)
|
||||||
cmd_sphinx = $(MAKE) BUILDDIR=$(abspath $(BUILDDIR)) $(build)=Documentation/media $2;\
|
cmd_sphinx = $(MAKE) BUILDDIR=$(abspath $(BUILDDIR)) $(build)=Documentation/media $2 && \
|
||||||
|
PYTHONDONTWRITEBYTECODE=1 \
|
||||||
BUILDDIR=$(abspath $(BUILDDIR)) SPHINX_CONF=$(abspath $(srctree)/$(src)/$5/$(SPHINX_CONF)) \
|
BUILDDIR=$(abspath $(BUILDDIR)) SPHINX_CONF=$(abspath $(srctree)/$(src)/$5/$(SPHINX_CONF)) \
|
||||||
$(SPHINXBUILD) \
|
$(SPHINXBUILD) \
|
||||||
-b $2 \
|
-b $2 \
|
||||||
@@ -63,13 +64,16 @@ quiet_cmd_sphinx = SPHINX $@ --> file://$(abspath $(BUILDDIR)/$3/$4)
|
|||||||
-D version=$(KERNELVERSION) -D release=$(KERNELRELEASE) \
|
-D version=$(KERNELVERSION) -D release=$(KERNELRELEASE) \
|
||||||
$(ALLSPHINXOPTS) \
|
$(ALLSPHINXOPTS) \
|
||||||
$(abspath $(srctree)/$(src)/$5) \
|
$(abspath $(srctree)/$(src)/$5) \
|
||||||
$(abspath $(BUILDDIR)/$3/$4);
|
$(abspath $(BUILDDIR)/$3/$4)
|
||||||
|
|
||||||
htmldocs:
|
htmldocs:
|
||||||
@$(foreach var,$(SPHINXDIRS),$(call loop_cmd,sphinx,html,$(var),,$(var)))
|
@+$(foreach var,$(SPHINXDIRS),$(call loop_cmd,sphinx,html,$(var),,$(var)))
|
||||||
|
|
||||||
|
linkcheckdocs:
|
||||||
|
@$(foreach var,$(SPHINXDIRS),$(call loop_cmd,sphinx,linkcheck,$(var),,$(var)))
|
||||||
|
|
||||||
latexdocs:
|
latexdocs:
|
||||||
@$(foreach var,$(SPHINXDIRS),$(call loop_cmd,sphinx,latex,$(var),latex,$(var)))
|
@+$(foreach var,$(SPHINXDIRS),$(call loop_cmd,sphinx,latex,$(var),latex,$(var)))
|
||||||
|
|
||||||
ifeq ($(HAVE_PDFLATEX),0)
|
ifeq ($(HAVE_PDFLATEX),0)
|
||||||
|
|
||||||
@@ -80,27 +84,34 @@ pdfdocs:
|
|||||||
else # HAVE_PDFLATEX
|
else # HAVE_PDFLATEX
|
||||||
|
|
||||||
pdfdocs: latexdocs
|
pdfdocs: latexdocs
|
||||||
$(foreach var,$(SPHINXDIRS), $(MAKE) PDFLATEX=$(PDFLATEX) LATEXOPTS="$(LATEXOPTS)" -C $(BUILDDIR)/$(var)/latex;)
|
$(foreach var,$(SPHINXDIRS), $(MAKE) PDFLATEX=$(PDFLATEX) LATEXOPTS="$(LATEXOPTS)" -C $(BUILDDIR)/$(var)/latex || exit;)
|
||||||
|
|
||||||
endif # HAVE_PDFLATEX
|
endif # HAVE_PDFLATEX
|
||||||
|
|
||||||
epubdocs:
|
epubdocs:
|
||||||
@$(foreach var,$(SPHINXDIRS),$(call loop_cmd,sphinx,epub,$(var),epub,$(var)))
|
@+$(foreach var,$(SPHINXDIRS),$(call loop_cmd,sphinx,epub,$(var),epub,$(var)))
|
||||||
|
|
||||||
xmldocs:
|
xmldocs:
|
||||||
@$(foreach var,$(SPHINXDIRS),$(call loop_cmd,sphinx,xml,$(var),xml,$(var)))
|
@+$(foreach var,$(SPHINXDIRS),$(call loop_cmd,sphinx,xml,$(var),xml,$(var)))
|
||||||
|
|
||||||
|
endif # HAVE_SPHINX
|
||||||
|
|
||||||
|
# The following targets are independent of HAVE_SPHINX, and the rules should
|
||||||
|
# work or silently pass without Sphinx.
|
||||||
|
|
||||||
# no-ops for the Sphinx toolchain
|
# no-ops for the Sphinx toolchain
|
||||||
sgmldocs:
|
sgmldocs:
|
||||||
|
@:
|
||||||
psdocs:
|
psdocs:
|
||||||
|
@:
|
||||||
mandocs:
|
mandocs:
|
||||||
|
@:
|
||||||
installmandocs:
|
installmandocs:
|
||||||
|
@:
|
||||||
|
|
||||||
cleandocs:
|
cleandocs:
|
||||||
$(Q)rm -rf $(BUILDDIR)
|
$(Q)rm -rf $(BUILDDIR)
|
||||||
$(Q)$(MAKE) BUILDDIR=$(abspath $(BUILDDIR)) -C Documentation/media clean
|
$(Q)$(MAKE) BUILDDIR=$(abspath $(BUILDDIR)) $(build)=Documentation/media clean
|
||||||
|
|
||||||
endif # HAVE_SPHINX
|
|
||||||
|
|
||||||
dochelp:
|
dochelp:
|
||||||
@echo ' Linux kernel internal documentation in different formats (Sphinx):'
|
@echo ' Linux kernel internal documentation in different formats (Sphinx):'
|
||||||
@@ -109,6 +120,7 @@ dochelp:
|
|||||||
@echo ' pdfdocs - PDF'
|
@echo ' pdfdocs - PDF'
|
||||||
@echo ' epubdocs - EPUB'
|
@echo ' epubdocs - EPUB'
|
||||||
@echo ' xmldocs - XML'
|
@echo ' xmldocs - XML'
|
||||||
|
@echo ' linkcheckdocs - check for broken external links (will connect to external hosts)'
|
||||||
@echo ' cleandocs - clean all generated files'
|
@echo ' cleandocs - clean all generated files'
|
||||||
@echo
|
@echo
|
||||||
@echo ' make SPHINXDIRS="s1 s2" [target] Generate only docs of folder s1, s2'
|
@echo ' make SPHINXDIRS="s1 s2" [target] Generate only docs of folder s1, s2'
|
||||||
|
|||||||
@@ -17,7 +17,7 @@ What is Linux?
|
|||||||
loading, shared copy-on-write executables, proper memory management,
|
loading, shared copy-on-write executables, proper memory management,
|
||||||
and multistack networking including IPv4 and IPv6.
|
and multistack networking including IPv4 and IPv6.
|
||||||
|
|
||||||
It is distributed under the GNU General Public License - see the
|
It is distributed under the GNU General Public License v2 - see the
|
||||||
accompanying COPYING file for more details.
|
accompanying COPYING file for more details.
|
||||||
|
|
||||||
On what hardware does it run?
|
On what hardware does it run?
|
||||||
@@ -236,7 +236,7 @@ Configuring the kernel
|
|||||||
|
|
||||||
- Having unnecessary drivers will make the kernel bigger, and can
|
- Having unnecessary drivers will make the kernel bigger, and can
|
||||||
under some circumstances lead to problems: probing for a
|
under some circumstances lead to problems: probing for a
|
||||||
nonexistent controller card may confuse your other controllers
|
nonexistent controller card may confuse your other controllers.
|
||||||
|
|
||||||
- A kernel with math-emulation compiled in will still use the
|
- A kernel with math-emulation compiled in will still use the
|
||||||
coprocessor if one is present: the math emulation will just
|
coprocessor if one is present: the math emulation will just
|
||||||
|
|||||||
@@ -93,9 +93,9 @@ Command Language Reference
|
|||||||
At the lexical level, a command comprises a sequence of words separated
|
At the lexical level, a command comprises a sequence of words separated
|
||||||
by spaces or tabs. So these are all equivalent::
|
by spaces or tabs. So these are all equivalent::
|
||||||
|
|
||||||
nullarbor:~ # echo -c 'file svcsock.c line 1603 +p' >
|
nullarbor:~ # echo -n 'file svcsock.c line 1603 +p' >
|
||||||
<debugfs>/dynamic_debug/control
|
<debugfs>/dynamic_debug/control
|
||||||
nullarbor:~ # echo -c ' file svcsock.c line 1603 +p ' >
|
nullarbor:~ # echo -n ' file svcsock.c line 1603 +p ' >
|
||||||
<debugfs>/dynamic_debug/control
|
<debugfs>/dynamic_debug/control
|
||||||
nullarbor:~ # echo -n 'file svcsock.c line 1603 +p' >
|
nullarbor:~ # echo -n 'file svcsock.c line 1603 +p' >
|
||||||
<debugfs>/dynamic_debug/control
|
<debugfs>/dynamic_debug/control
|
||||||
|
|||||||
@@ -90,7 +90,7 @@ and thus removes any access restriction implied by it.
|
|||||||
4. IOC_PR_PREEMPT
|
4. IOC_PR_PREEMPT
|
||||||
|
|
||||||
This ioctl command releases the existing reservation referred to by
|
This ioctl command releases the existing reservation referred to by
|
||||||
old_key and replaces it with a a new reservation of type for the
|
old_key and replaces it with a new reservation of type for the
|
||||||
reservation key new_key.
|
reservation key new_key.
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -615,7 +615,7 @@ to allocate a page of memory for that task.
|
|||||||
|
|
||||||
If a cpuset has its 'cpuset.cpus' modified, then each task in that cpuset
|
If a cpuset has its 'cpuset.cpus' modified, then each task in that cpuset
|
||||||
will have its allowed CPU placement changed immediately. Similarly,
|
will have its allowed CPU placement changed immediately. Similarly,
|
||||||
if a task's pid is written to another cpusets 'cpuset.tasks' file, then its
|
if a task's pid is written to another cpuset's 'tasks' file, then its
|
||||||
allowed CPU placement is changed immediately. If such a task had been
|
allowed CPU placement is changed immediately. If such a task had been
|
||||||
bound to some subset of its cpuset using the sched_setaffinity() call,
|
bound to some subset of its cpuset using the sched_setaffinity() call,
|
||||||
the task will be allowed to run on any CPU allowed in its new cpuset,
|
the task will be allowed to run on any CPU allowed in its new cpuset,
|
||||||
|
|||||||
@@ -58,7 +58,7 @@ master_doc = 'index'
|
|||||||
|
|
||||||
# General information about the project.
|
# General information about the project.
|
||||||
project = 'The Linux Kernel'
|
project = 'The Linux Kernel'
|
||||||
copyright = '2016, The kernel development community'
|
copyright = 'The kernel development community'
|
||||||
author = 'The kernel development community'
|
author = 'The kernel development community'
|
||||||
|
|
||||||
# The version info for the project you're documenting, acts as replacement for
|
# The version info for the project you're documenting, acts as replacement for
|
||||||
|
|||||||
@@ -0,0 +1,372 @@
|
|||||||
|
=========================
|
||||||
|
CPU hotplug in the Kernel
|
||||||
|
=========================
|
||||||
|
|
||||||
|
:Date: December, 2016
|
||||||
|
:Author: Sebastian Andrzej Siewior <bigeasy@linutronix.de>,
|
||||||
|
Rusty Russell <rusty@rustcorp.com.au>,
|
||||||
|
Srivatsa Vaddagiri <vatsa@in.ibm.com>,
|
||||||
|
Ashok Raj <ashok.raj@intel.com>,
|
||||||
|
Joel Schopp <jschopp@austin.ibm.com>
|
||||||
|
|
||||||
|
Introduction
|
||||||
|
============
|
||||||
|
|
||||||
|
Modern advances in system architectures have introduced advanced error
|
||||||
|
reporting and correction capabilities in processors. There are couple OEMS that
|
||||||
|
support NUMA hardware which are hot pluggable as well, where physical node
|
||||||
|
insertion and removal require support for CPU hotplug.
|
||||||
|
|
||||||
|
Such advances require CPUs available to a kernel to be removed either for
|
||||||
|
provisioning reasons, or for RAS purposes to keep an offending CPU off
|
||||||
|
system execution path. Hence the need for CPU hotplug support in the
|
||||||
|
Linux kernel.
|
||||||
|
|
||||||
|
A more novel use of CPU-hotplug support is its use today in suspend resume
|
||||||
|
support for SMP. Dual-core and HT support makes even a laptop run SMP kernels
|
||||||
|
which didn't support these methods.
|
||||||
|
|
||||||
|
|
||||||
|
Command Line Switches
|
||||||
|
=====================
|
||||||
|
``maxcpus=n``
|
||||||
|
Restrict boot time CPUs to *n*. Say if you have fourV CPUs, using
|
||||||
|
``maxcpus=2`` will only boot two. You can choose to bring the
|
||||||
|
other CPUs later online.
|
||||||
|
|
||||||
|
``nr_cpus=n``
|
||||||
|
Restrict the total amount CPUs the kernel will support. If the number
|
||||||
|
supplied here is lower than the number of physically available CPUs than
|
||||||
|
those CPUs can not be brought online later.
|
||||||
|
|
||||||
|
``additional_cpus=n``
|
||||||
|
Use this to limit hotpluggable CPUs. This option sets
|
||||||
|
``cpu_possible_mask = cpu_present_mask + additional_cpus``
|
||||||
|
|
||||||
|
This option is limited to the IA64 architecture.
|
||||||
|
|
||||||
|
``possible_cpus=n``
|
||||||
|
This option sets ``possible_cpus`` bits in ``cpu_possible_mask``.
|
||||||
|
|
||||||
|
This option is limited to the X86 and S390 architecture.
|
||||||
|
|
||||||
|
``cede_offline={"off","on"}``
|
||||||
|
Use this option to disable/enable putting offlined processors to an extended
|
||||||
|
``H_CEDE`` state on supported pseries platforms. If nothing is specified,
|
||||||
|
``cede_offline`` is set to "on".
|
||||||
|
|
||||||
|
This option is limited to the PowerPC architecture.
|
||||||
|
|
||||||
|
``cpu0_hotplug``
|
||||||
|
Allow to shutdown CPU0.
|
||||||
|
|
||||||
|
This option is limited to the X86 architecture.
|
||||||
|
|
||||||
|
CPU maps
|
||||||
|
========
|
||||||
|
|
||||||
|
``cpu_possible_mask``
|
||||||
|
Bitmap of possible CPUs that can ever be available in the
|
||||||
|
system. This is used to allocate some boot time memory for per_cpu variables
|
||||||
|
that aren't designed to grow/shrink as CPUs are made available or removed.
|
||||||
|
Once set during boot time discovery phase, the map is static, i.e no bits
|
||||||
|
are added or removed anytime. Trimming it accurately for your system needs
|
||||||
|
upfront can save some boot time memory.
|
||||||
|
|
||||||
|
``cpu_online_mask``
|
||||||
|
Bitmap of all CPUs currently online. Its set in ``__cpu_up()``
|
||||||
|
after a CPU is available for kernel scheduling and ready to receive
|
||||||
|
interrupts from devices. Its cleared when a CPU is brought down using
|
||||||
|
``__cpu_disable()``, before which all OS services including interrupts are
|
||||||
|
migrated to another target CPU.
|
||||||
|
|
||||||
|
``cpu_present_mask``
|
||||||
|
Bitmap of CPUs currently present in the system. Not all
|
||||||
|
of them may be online. When physical hotplug is processed by the relevant
|
||||||
|
subsystem (e.g ACPI) can change and new bit either be added or removed
|
||||||
|
from the map depending on the event is hot-add/hot-remove. There are currently
|
||||||
|
no locking rules as of now. Typical usage is to init topology during boot,
|
||||||
|
at which time hotplug is disabled.
|
||||||
|
|
||||||
|
You really don't need to manipulate any of the system CPU maps. They should
|
||||||
|
be read-only for most use. When setting up per-cpu resources almost always use
|
||||||
|
``cpu_possible_mask`` or ``for_each_possible_cpu()`` to iterate. To macro
|
||||||
|
``for_each_cpu()`` can be used to iterate over a custom CPU mask.
|
||||||
|
|
||||||
|
Never use anything other than ``cpumask_t`` to represent bitmap of CPUs.
|
||||||
|
|
||||||
|
|
||||||
|
Using CPU hotplug
|
||||||
|
=================
|
||||||
|
The kernel option *CONFIG_HOTPLUG_CPU* needs to be enabled. It is currently
|
||||||
|
available on multiple architectures including ARM, MIPS, PowerPC and X86. The
|
||||||
|
configuration is done via the sysfs interface: ::
|
||||||
|
|
||||||
|
$ ls -lh /sys/devices/system/cpu
|
||||||
|
total 0
|
||||||
|
drwxr-xr-x 9 root root 0 Dec 21 16:33 cpu0
|
||||||
|
drwxr-xr-x 9 root root 0 Dec 21 16:33 cpu1
|
||||||
|
drwxr-xr-x 9 root root 0 Dec 21 16:33 cpu2
|
||||||
|
drwxr-xr-x 9 root root 0 Dec 21 16:33 cpu3
|
||||||
|
drwxr-xr-x 9 root root 0 Dec 21 16:33 cpu4
|
||||||
|
drwxr-xr-x 9 root root 0 Dec 21 16:33 cpu5
|
||||||
|
drwxr-xr-x 9 root root 0 Dec 21 16:33 cpu6
|
||||||
|
drwxr-xr-x 9 root root 0 Dec 21 16:33 cpu7
|
||||||
|
drwxr-xr-x 2 root root 0 Dec 21 16:33 hotplug
|
||||||
|
-r--r--r-- 1 root root 4.0K Dec 21 16:33 offline
|
||||||
|
-r--r--r-- 1 root root 4.0K Dec 21 16:33 online
|
||||||
|
-r--r--r-- 1 root root 4.0K Dec 21 16:33 possible
|
||||||
|
-r--r--r-- 1 root root 4.0K Dec 21 16:33 present
|
||||||
|
|
||||||
|
The files *offline*, *online*, *possible*, *present* represent the CPU masks.
|
||||||
|
Each CPU folder contains an *online* file which controls the logical on (1) and
|
||||||
|
off (0) state. To logically shutdown CPU4: ::
|
||||||
|
|
||||||
|
$ echo 0 > /sys/devices/system/cpu/cpu4/online
|
||||||
|
smpboot: CPU 4 is now offline
|
||||||
|
|
||||||
|
Once the CPU is shutdown, it will be removed from */proc/interrupts*,
|
||||||
|
*/proc/cpuinfo* and should also not be shown visible by the *top* command. To
|
||||||
|
bring CPU4 back online: ::
|
||||||
|
|
||||||
|
$ echo 1 > /sys/devices/system/cpu/cpu4/online
|
||||||
|
smpboot: Booting Node 0 Processor 4 APIC 0x1
|
||||||
|
|
||||||
|
The CPU is usable again. This should work on all CPUs. CPU0 is often special
|
||||||
|
and excluded from CPU hotplug. On X86 the kernel option
|
||||||
|
*CONFIG_BOOTPARAM_HOTPLUG_CPU0* has to be enabled in order to be able to
|
||||||
|
shutdown CPU0. Alternatively the kernel command option *cpu0_hotplug* can be
|
||||||
|
used. Some known dependencies of CPU0:
|
||||||
|
|
||||||
|
* Resume from hibernate/suspend. Hibernate/suspend will fail if CPU0 is offline.
|
||||||
|
* PIC interrupts. CPU0 can't be removed if a PIC interrupt is detected.
|
||||||
|
|
||||||
|
Please let Fenghua Yu <fenghua.yu@intel.com> know if you find any dependencies
|
||||||
|
on CPU0.
|
||||||
|
|
||||||
|
The CPU hotplug coordination
|
||||||
|
============================
|
||||||
|
|
||||||
|
The offline case
|
||||||
|
----------------
|
||||||
|
Once a CPU has been logically shutdown the teardown callbacks of registered
|
||||||
|
hotplug states will be invoked, starting with ``CPUHP_ONLINE`` and terminating
|
||||||
|
at state ``CPUHP_OFFLINE``. This includes:
|
||||||
|
|
||||||
|
* If tasks are frozen due to a suspend operation then *cpuhp_tasks_frozen*
|
||||||
|
will be set to true.
|
||||||
|
* All processes are migrated away from this outgoing CPU to new CPUs.
|
||||||
|
The new CPU is chosen from each process' current cpuset, which may be
|
||||||
|
a subset of all online CPUs.
|
||||||
|
* All interrupts targeted to this CPU are migrated to a new CPU
|
||||||
|
* timers are also migrated to a new CPU
|
||||||
|
* Once all services are migrated, kernel calls an arch specific routine
|
||||||
|
``__cpu_disable()`` to perform arch specific cleanup.
|
||||||
|
|
||||||
|
Using the hotplug API
|
||||||
|
---------------------
|
||||||
|
It is possible to receive notifications once a CPU is offline or onlined. This
|
||||||
|
might be important to certain drivers which need to perform some kind of setup
|
||||||
|
or clean up functions based on the number of available CPUs: ::
|
||||||
|
|
||||||
|
#include <linux/cpuhotplug.h>
|
||||||
|
|
||||||
|
ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "X/Y:online",
|
||||||
|
Y_online, Y_prepare_down);
|
||||||
|
|
||||||
|
*X* is the subsystem and *Y* the particular driver. The *Y_online* callback
|
||||||
|
will be invoked during registration on all online CPUs. If an error
|
||||||
|
occurs during the online callback the *Y_prepare_down* callback will be
|
||||||
|
invoked on all CPUs on which the online callback was previously invoked.
|
||||||
|
After registration completed, the *Y_online* callback will be invoked
|
||||||
|
once a CPU is brought online and *Y_prepare_down* will be invoked when a
|
||||||
|
CPU is shutdown. All resources which were previously allocated in
|
||||||
|
*Y_online* should be released in *Y_prepare_down*.
|
||||||
|
The return value *ret* is negative if an error occurred during the
|
||||||
|
registration process. Otherwise a positive value is returned which
|
||||||
|
contains the allocated hotplug for dynamically allocated states
|
||||||
|
(*CPUHP_AP_ONLINE_DYN*). It will return zero for predefined states.
|
||||||
|
|
||||||
|
The callback can be remove by invoking ``cpuhp_remove_state()``. In case of a
|
||||||
|
dynamically allocated state (*CPUHP_AP_ONLINE_DYN*) use the returned state.
|
||||||
|
During the removal of a hotplug state the teardown callback will be invoked.
|
||||||
|
|
||||||
|
Multiple instances
|
||||||
|
~~~~~~~~~~~~~~~~~~
|
||||||
|
If a driver has multiple instances and each instance needs to perform the
|
||||||
|
callback independently then it is likely that a ''multi-state'' should be used.
|
||||||
|
First a multi-state state needs to be registered: ::
|
||||||
|
|
||||||
|
ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, "X/Y:online,
|
||||||
|
Y_online, Y_prepare_down);
|
||||||
|
Y_hp_online = ret;
|
||||||
|
|
||||||
|
The ``cpuhp_setup_state_multi()`` behaves similar to ``cpuhp_setup_state()``
|
||||||
|
except it prepares the callbacks for a multi state and does not invoke
|
||||||
|
the callbacks. This is a one time setup.
|
||||||
|
Once a new instance is allocated, you need to register this new instance: ::
|
||||||
|
|
||||||
|
ret = cpuhp_state_add_instance(Y_hp_online, &d->node);
|
||||||
|
|
||||||
|
This function will add this instance to your previously allocated
|
||||||
|
*Y_hp_online* state and invoke the previously registered callback
|
||||||
|
(*Y_online*) on all online CPUs. The *node* element is a ``struct
|
||||||
|
hlist_node`` member of your per-instance data structure.
|
||||||
|
|
||||||
|
On removal of the instance: ::
|
||||||
|
cpuhp_state_remove_instance(Y_hp_online, &d->node)
|
||||||
|
|
||||||
|
should be invoked which will invoke the teardown callback on all online
|
||||||
|
CPUs.
|
||||||
|
|
||||||
|
Manual setup
|
||||||
|
~~~~~~~~~~~~
|
||||||
|
Usually it is handy to invoke setup and teardown callbacks on registration or
|
||||||
|
removal of a state because usually the operation needs to performed once a CPU
|
||||||
|
goes online (offline) and during initial setup (shutdown) of the driver. However
|
||||||
|
each registration and removal function is also available with a ``_nocalls``
|
||||||
|
suffix which does not invoke the provided callbacks if the invocation of the
|
||||||
|
callbacks is not desired. During the manual setup (or teardown) the functions
|
||||||
|
``get_online_cpus()`` and ``put_online_cpus()`` should be used to inhibit CPU
|
||||||
|
hotplug operations.
|
||||||
|
|
||||||
|
|
||||||
|
The ordering of the events
|
||||||
|
--------------------------
|
||||||
|
The hotplug states are defined in ``include/linux/cpuhotplug.h``:
|
||||||
|
|
||||||
|
* The states *CPUHP_OFFLINE* … *CPUHP_AP_OFFLINE* are invoked before the
|
||||||
|
CPU is up.
|
||||||
|
* The states *CPUHP_AP_OFFLINE* … *CPUHP_AP_ONLINE* are invoked
|
||||||
|
just the after the CPU has been brought up. The interrupts are off and
|
||||||
|
the scheduler is not yet active on this CPU. Starting with *CPUHP_AP_OFFLINE*
|
||||||
|
the callbacks are invoked on the target CPU.
|
||||||
|
* The states between *CPUHP_AP_ONLINE_DYN* and *CPUHP_AP_ONLINE_DYN_END* are
|
||||||
|
reserved for the dynamic allocation.
|
||||||
|
* The states are invoked in the reverse order on CPU shutdown starting with
|
||||||
|
*CPUHP_ONLINE* and stopping at *CPUHP_OFFLINE*. Here the callbacks are
|
||||||
|
invoked on the CPU that will be shutdown until *CPUHP_AP_OFFLINE*.
|
||||||
|
|
||||||
|
A dynamically allocated state via *CPUHP_AP_ONLINE_DYN* is often enough.
|
||||||
|
However if an earlier invocation during the bring up or shutdown is required
|
||||||
|
then an explicit state should be acquired. An explicit state might also be
|
||||||
|
required if the hotplug event requires specific ordering in respect to
|
||||||
|
another hotplug event.
|
||||||
|
|
||||||
|
Testing of hotplug states
|
||||||
|
=========================
|
||||||
|
One way to verify whether a custom state is working as expected or not is to
|
||||||
|
shutdown a CPU and then put it online again. It is also possible to put the CPU
|
||||||
|
to certain state (for instance *CPUHP_AP_ONLINE*) and then go back to
|
||||||
|
*CPUHP_ONLINE*. This would simulate an error one state after *CPUHP_AP_ONLINE*
|
||||||
|
which would lead to rollback to the online state.
|
||||||
|
|
||||||
|
All registered states are enumerated in ``/sys/devices/system/cpu/hotplug/states``: ::
|
||||||
|
|
||||||
|
$ tail /sys/devices/system/cpu/hotplug/states
|
||||||
|
138: mm/vmscan:online
|
||||||
|
139: mm/vmstat:online
|
||||||
|
140: lib/percpu_cnt:online
|
||||||
|
141: acpi/cpu-drv:online
|
||||||
|
142: base/cacheinfo:online
|
||||||
|
143: virtio/net:online
|
||||||
|
144: x86/mce:online
|
||||||
|
145: printk:online
|
||||||
|
168: sched:active
|
||||||
|
169: online
|
||||||
|
|
||||||
|
To rollback CPU4 to ``lib/percpu_cnt:online`` and back online just issue: ::
|
||||||
|
|
||||||
|
$ cat /sys/devices/system/cpu/cpu4/hotplug/state
|
||||||
|
169
|
||||||
|
$ echo 140 > /sys/devices/system/cpu/cpu4/hotplug/target
|
||||||
|
$ cat /sys/devices/system/cpu/cpu4/hotplug/state
|
||||||
|
140
|
||||||
|
|
||||||
|
It is important to note that the teardown callbac of state 140 have been
|
||||||
|
invoked. And now get back online: ::
|
||||||
|
|
||||||
|
$ echo 169 > /sys/devices/system/cpu/cpu4/hotplug/target
|
||||||
|
$ cat /sys/devices/system/cpu/cpu4/hotplug/state
|
||||||
|
169
|
||||||
|
|
||||||
|
With trace events enabled, the individual steps are visible, too: ::
|
||||||
|
|
||||||
|
# TASK-PID CPU# TIMESTAMP FUNCTION
|
||||||
|
# | | | | |
|
||||||
|
bash-394 [001] 22.976: cpuhp_enter: cpu: 0004 target: 140 step: 169 (cpuhp_kick_ap_work)
|
||||||
|
cpuhp/4-31 [004] 22.977: cpuhp_enter: cpu: 0004 target: 140 step: 168 (sched_cpu_deactivate)
|
||||||
|
cpuhp/4-31 [004] 22.990: cpuhp_exit: cpu: 0004 state: 168 step: 168 ret: 0
|
||||||
|
cpuhp/4-31 [004] 22.991: cpuhp_enter: cpu: 0004 target: 140 step: 144 (mce_cpu_pre_down)
|
||||||
|
cpuhp/4-31 [004] 22.992: cpuhp_exit: cpu: 0004 state: 144 step: 144 ret: 0
|
||||||
|
cpuhp/4-31 [004] 22.993: cpuhp_multi_enter: cpu: 0004 target: 140 step: 143 (virtnet_cpu_down_prep)
|
||||||
|
cpuhp/4-31 [004] 22.994: cpuhp_exit: cpu: 0004 state: 143 step: 143 ret: 0
|
||||||
|
cpuhp/4-31 [004] 22.995: cpuhp_enter: cpu: 0004 target: 140 step: 142 (cacheinfo_cpu_pre_down)
|
||||||
|
cpuhp/4-31 [004] 22.996: cpuhp_exit: cpu: 0004 state: 142 step: 142 ret: 0
|
||||||
|
bash-394 [001] 22.997: cpuhp_exit: cpu: 0004 state: 140 step: 169 ret: 0
|
||||||
|
bash-394 [005] 95.540: cpuhp_enter: cpu: 0004 target: 169 step: 140 (cpuhp_kick_ap_work)
|
||||||
|
cpuhp/4-31 [004] 95.541: cpuhp_enter: cpu: 0004 target: 169 step: 141 (acpi_soft_cpu_online)
|
||||||
|
cpuhp/4-31 [004] 95.542: cpuhp_exit: cpu: 0004 state: 141 step: 141 ret: 0
|
||||||
|
cpuhp/4-31 [004] 95.543: cpuhp_enter: cpu: 0004 target: 169 step: 142 (cacheinfo_cpu_online)
|
||||||
|
cpuhp/4-31 [004] 95.544: cpuhp_exit: cpu: 0004 state: 142 step: 142 ret: 0
|
||||||
|
cpuhp/4-31 [004] 95.545: cpuhp_multi_enter: cpu: 0004 target: 169 step: 143 (virtnet_cpu_online)
|
||||||
|
cpuhp/4-31 [004] 95.546: cpuhp_exit: cpu: 0004 state: 143 step: 143 ret: 0
|
||||||
|
cpuhp/4-31 [004] 95.547: cpuhp_enter: cpu: 0004 target: 169 step: 144 (mce_cpu_online)
|
||||||
|
cpuhp/4-31 [004] 95.548: cpuhp_exit: cpu: 0004 state: 144 step: 144 ret: 0
|
||||||
|
cpuhp/4-31 [004] 95.549: cpuhp_enter: cpu: 0004 target: 169 step: 145 (console_cpu_notify)
|
||||||
|
cpuhp/4-31 [004] 95.550: cpuhp_exit: cpu: 0004 state: 145 step: 145 ret: 0
|
||||||
|
cpuhp/4-31 [004] 95.551: cpuhp_enter: cpu: 0004 target: 169 step: 168 (sched_cpu_activate)
|
||||||
|
cpuhp/4-31 [004] 95.552: cpuhp_exit: cpu: 0004 state: 168 step: 168 ret: 0
|
||||||
|
bash-394 [005] 95.553: cpuhp_exit: cpu: 0004 state: 169 step: 140 ret: 0
|
||||||
|
|
||||||
|
As it an be seen, CPU4 went down until timestamp 22.996 and then back up until
|
||||||
|
95.552. All invoked callbacks including their return codes are visible in the
|
||||||
|
trace.
|
||||||
|
|
||||||
|
Architecture's requirements
|
||||||
|
===========================
|
||||||
|
The following functions and configurations are required:
|
||||||
|
|
||||||
|
``CONFIG_HOTPLUG_CPU``
|
||||||
|
This entry needs to be enabled in Kconfig
|
||||||
|
|
||||||
|
``__cpu_up()``
|
||||||
|
Arch interface to bring up a CPU
|
||||||
|
|
||||||
|
``__cpu_disable()``
|
||||||
|
Arch interface to shutdown a CPU, no more interrupts can be handled by the
|
||||||
|
kernel after the routine returns. This includes the shutdown of the timer.
|
||||||
|
|
||||||
|
``__cpu_die()``
|
||||||
|
This actually supposed to ensure death of the CPU. Actually look at some
|
||||||
|
example code in other arch that implement CPU hotplug. The processor is taken
|
||||||
|
down from the ``idle()`` loop for that specific architecture. ``__cpu_die()``
|
||||||
|
typically waits for some per_cpu state to be set, to ensure the processor dead
|
||||||
|
routine is called to be sure positively.
|
||||||
|
|
||||||
|
User Space Notification
|
||||||
|
=======================
|
||||||
|
After CPU successfully onlined or offline udev events are sent. A udev rule like: ::
|
||||||
|
|
||||||
|
SUBSYSTEM=="cpu", DRIVERS=="processor", DEVPATH=="/devices/system/cpu/*", RUN+="the_hotplug_receiver.sh"
|
||||||
|
|
||||||
|
will receive all events. A script like: ::
|
||||||
|
|
||||||
|
#!/bin/sh
|
||||||
|
|
||||||
|
if [ "${ACTION}" = "offline" ]
|
||||||
|
then
|
||||||
|
echo "CPU ${DEVPATH##*/} offline"
|
||||||
|
|
||||||
|
elif [ "${ACTION}" = "online" ]
|
||||||
|
then
|
||||||
|
echo "CPU ${DEVPATH##*/} online"
|
||||||
|
|
||||||
|
fi
|
||||||
|
|
||||||
|
can process the event further.
|
||||||
|
|
||||||
|
Kernel Inline Documentations Reference
|
||||||
|
======================================
|
||||||
|
|
||||||
|
.. kernel-doc:: include/linux/cpuhotplug.h
|
||||||
@@ -13,6 +13,7 @@ Core utilities
|
|||||||
|
|
||||||
assoc_array
|
assoc_array
|
||||||
atomic_ops
|
atomic_ops
|
||||||
|
cpu_hotplug
|
||||||
local_ops
|
local_ops
|
||||||
workqueue
|
workqueue
|
||||||
|
|
||||||
|
|||||||
@@ -82,7 +82,9 @@ UltraSPARC-III
|
|||||||
-------
|
-------
|
||||||
|
|
||||||
Several "PowerBook" and "iBook2" notebooks are supported.
|
Several "PowerBook" and "iBook2" notebooks are supported.
|
||||||
|
The following POWER processors are supported in powernv mode:
|
||||||
|
POWER8
|
||||||
|
POWER9
|
||||||
|
|
||||||
1.5 SuperH
|
1.5 SuperH
|
||||||
----------
|
----------
|
||||||
|
|||||||
@@ -1,452 +0,0 @@
|
|||||||
CPU hotplug Support in Linux(tm) Kernel
|
|
||||||
|
|
||||||
Maintainers:
|
|
||||||
CPU Hotplug Core:
|
|
||||||
Rusty Russell <rusty@rustcorp.com.au>
|
|
||||||
Srivatsa Vaddagiri <vatsa@in.ibm.com>
|
|
||||||
i386:
|
|
||||||
Zwane Mwaikambo <zwanem@gmail.com>
|
|
||||||
ppc64:
|
|
||||||
Nathan Lynch <nathanl@austin.ibm.com>
|
|
||||||
Joel Schopp <jschopp@austin.ibm.com>
|
|
||||||
ia64/x86_64:
|
|
||||||
Ashok Raj <ashok.raj@intel.com>
|
|
||||||
s390:
|
|
||||||
Heiko Carstens <heiko.carstens@de.ibm.com>
|
|
||||||
|
|
||||||
Authors: Ashok Raj <ashok.raj@intel.com>
|
|
||||||
Lots of feedback: Nathan Lynch <nathanl@austin.ibm.com>,
|
|
||||||
Joel Schopp <jschopp@austin.ibm.com>
|
|
||||||
|
|
||||||
Introduction
|
|
||||||
|
|
||||||
Modern advances in system architectures have introduced advanced error
|
|
||||||
reporting and correction capabilities in processors. CPU architectures permit
|
|
||||||
partitioning support, where compute resources of a single CPU could be made
|
|
||||||
available to virtual machine environments. There are couple OEMS that
|
|
||||||
support NUMA hardware which are hot pluggable as well, where physical
|
|
||||||
node insertion and removal require support for CPU hotplug.
|
|
||||||
|
|
||||||
Such advances require CPUs available to a kernel to be removed either for
|
|
||||||
provisioning reasons, or for RAS purposes to keep an offending CPU off
|
|
||||||
system execution path. Hence the need for CPU hotplug support in the
|
|
||||||
Linux kernel.
|
|
||||||
|
|
||||||
A more novel use of CPU-hotplug support is its use today in suspend
|
|
||||||
resume support for SMP. Dual-core and HT support makes even
|
|
||||||
a laptop run SMP kernels which didn't support these methods. SMP support
|
|
||||||
for suspend/resume is a work in progress.
|
|
||||||
|
|
||||||
General Stuff about CPU Hotplug
|
|
||||||
--------------------------------
|
|
||||||
|
|
||||||
Command Line Switches
|
|
||||||
---------------------
|
|
||||||
maxcpus=n Restrict boot time cpus to n. Say if you have 4 cpus, using
|
|
||||||
maxcpus=2 will only boot 2. You can choose to bring the
|
|
||||||
other cpus later online, read FAQ's for more info.
|
|
||||||
|
|
||||||
additional_cpus=n (*) Use this to limit hotpluggable cpus. This option sets
|
|
||||||
cpu_possible_mask = cpu_present_mask + additional_cpus
|
|
||||||
|
|
||||||
cede_offline={"off","on"} Use this option to disable/enable putting offlined
|
|
||||||
processors to an extended H_CEDE state on
|
|
||||||
supported pseries platforms.
|
|
||||||
If nothing is specified,
|
|
||||||
cede_offline is set to "on".
|
|
||||||
|
|
||||||
(*) Option valid only for following architectures
|
|
||||||
- ia64
|
|
||||||
|
|
||||||
ia64 uses the number of disabled local apics in ACPI tables MADT to
|
|
||||||
determine the number of potentially hot-pluggable cpus. The implementation
|
|
||||||
should only rely on this to count the # of cpus, but *MUST* not rely
|
|
||||||
on the apicid values in those tables for disabled apics. In the event
|
|
||||||
BIOS doesn't mark such hot-pluggable cpus as disabled entries, one could
|
|
||||||
use this parameter "additional_cpus=x" to represent those cpus in the
|
|
||||||
cpu_possible_mask.
|
|
||||||
|
|
||||||
possible_cpus=n [s390,x86_64] use this to set hotpluggable cpus.
|
|
||||||
This option sets possible_cpus bits in
|
|
||||||
cpu_possible_mask. Thus keeping the numbers of bits set
|
|
||||||
constant even if the machine gets rebooted.
|
|
||||||
|
|
||||||
CPU maps and such
|
|
||||||
-----------------
|
|
||||||
[More on cpumaps and primitive to manipulate, please check
|
|
||||||
include/linux/cpumask.h that has more descriptive text.]
|
|
||||||
|
|
||||||
cpu_possible_mask: Bitmap of possible CPUs that can ever be available in the
|
|
||||||
system. This is used to allocate some boot time memory for per_cpu variables
|
|
||||||
that aren't designed to grow/shrink as CPUs are made available or removed.
|
|
||||||
Once set during boot time discovery phase, the map is static, i.e no bits
|
|
||||||
are added or removed anytime. Trimming it accurately for your system needs
|
|
||||||
upfront can save some boot time memory. See below for how we use heuristics
|
|
||||||
in x86_64 case to keep this under check.
|
|
||||||
|
|
||||||
cpu_online_mask: Bitmap of all CPUs currently online. It's set in __cpu_up()
|
|
||||||
after a CPU is available for kernel scheduling and ready to receive
|
|
||||||
interrupts from devices. It's cleared when a CPU is brought down using
|
|
||||||
__cpu_disable(), before which all OS services including interrupts are
|
|
||||||
migrated to another target CPU.
|
|
||||||
|
|
||||||
cpu_present_mask: Bitmap of CPUs currently present in the system. Not all
|
|
||||||
of them may be online. When physical hotplug is processed by the relevant
|
|
||||||
subsystem (e.g ACPI) can change and new bit either be added or removed
|
|
||||||
from the map depending on the event is hot-add/hot-remove. There are currently
|
|
||||||
no locking rules as of now. Typical usage is to init topology during boot,
|
|
||||||
at which time hotplug is disabled.
|
|
||||||
|
|
||||||
You really dont need to manipulate any of the system cpu maps. They should
|
|
||||||
be read-only for most use. When setting up per-cpu resources almost always use
|
|
||||||
cpu_possible_mask/for_each_possible_cpu() to iterate.
|
|
||||||
|
|
||||||
Never use anything other than cpumask_t to represent bitmap of CPUs.
|
|
||||||
|
|
||||||
#include <linux/cpumask.h>
|
|
||||||
|
|
||||||
for_each_possible_cpu - Iterate over cpu_possible_mask
|
|
||||||
for_each_online_cpu - Iterate over cpu_online_mask
|
|
||||||
for_each_present_cpu - Iterate over cpu_present_mask
|
|
||||||
for_each_cpu(x,mask) - Iterate over some random collection of cpu mask.
|
|
||||||
|
|
||||||
#include <linux/cpu.h>
|
|
||||||
get_online_cpus() and put_online_cpus():
|
|
||||||
|
|
||||||
The above calls are used to inhibit cpu hotplug operations. While the
|
|
||||||
cpu_hotplug.refcount is non zero, the cpu_online_mask will not change.
|
|
||||||
If you merely need to avoid cpus going away, you could also use
|
|
||||||
preempt_disable() and preempt_enable() for those sections.
|
|
||||||
Just remember the critical section cannot call any
|
|
||||||
function that can sleep or schedule this process away. The preempt_disable()
|
|
||||||
will work as long as stop_machine_run() is used to take a cpu down.
|
|
||||||
|
|
||||||
CPU Hotplug - Frequently Asked Questions.
|
|
||||||
|
|
||||||
Q: How to enable my kernel to support CPU hotplug?
|
|
||||||
A: When doing make defconfig, Enable CPU hotplug support
|
|
||||||
|
|
||||||
"Processor type and Features" -> Support for Hotpluggable CPUs
|
|
||||||
|
|
||||||
Make sure that you have CONFIG_SMP turned on as well.
|
|
||||||
|
|
||||||
You would need to enable CONFIG_HOTPLUG_CPU for SMP suspend/resume support
|
|
||||||
as well.
|
|
||||||
|
|
||||||
Q: What architectures support CPU hotplug?
|
|
||||||
A: As of 2.6.14, the following architectures support CPU hotplug.
|
|
||||||
|
|
||||||
i386 (Intel), ppc, ppc64, parisc, s390, ia64 and x86_64
|
|
||||||
|
|
||||||
Q: How to test if hotplug is supported on the newly built kernel?
|
|
||||||
A: You should now notice an entry in sysfs.
|
|
||||||
|
|
||||||
Check if sysfs is mounted, using the "mount" command. You should notice
|
|
||||||
an entry as shown below in the output.
|
|
||||||
|
|
||||||
....
|
|
||||||
none on /sys type sysfs (rw)
|
|
||||||
....
|
|
||||||
|
|
||||||
If this is not mounted, do the following.
|
|
||||||
|
|
||||||
#mkdir /sys
|
|
||||||
#mount -t sysfs sys /sys
|
|
||||||
|
|
||||||
Now you should see entries for all present cpu, the following is an example
|
|
||||||
in a 8-way system.
|
|
||||||
|
|
||||||
#pwd
|
|
||||||
#/sys/devices/system/cpu
|
|
||||||
#ls -l
|
|
||||||
total 0
|
|
||||||
drwxr-xr-x 10 root root 0 Sep 19 07:44 .
|
|
||||||
drwxr-xr-x 13 root root 0 Sep 19 07:45 ..
|
|
||||||
drwxr-xr-x 3 root root 0 Sep 19 07:44 cpu0
|
|
||||||
drwxr-xr-x 3 root root 0 Sep 19 07:44 cpu1
|
|
||||||
drwxr-xr-x 3 root root 0 Sep 19 07:44 cpu2
|
|
||||||
drwxr-xr-x 3 root root 0 Sep 19 07:44 cpu3
|
|
||||||
drwxr-xr-x 3 root root 0 Sep 19 07:44 cpu4
|
|
||||||
drwxr-xr-x 3 root root 0 Sep 19 07:44 cpu5
|
|
||||||
drwxr-xr-x 3 root root 0 Sep 19 07:44 cpu6
|
|
||||||
drwxr-xr-x 3 root root 0 Sep 19 07:48 cpu7
|
|
||||||
|
|
||||||
Under each directory you would find an "online" file which is the control
|
|
||||||
file to logically online/offline a processor.
|
|
||||||
|
|
||||||
Q: Does hot-add/hot-remove refer to physical add/remove of cpus?
|
|
||||||
A: The usage of hot-add/remove may not be very consistently used in the code.
|
|
||||||
CONFIG_HOTPLUG_CPU enables logical online/offline capability in the kernel.
|
|
||||||
To support physical addition/removal, one would need some BIOS hooks and
|
|
||||||
the platform should have something like an attention button in PCI hotplug.
|
|
||||||
CONFIG_ACPI_HOTPLUG_CPU enables ACPI support for physical add/remove of CPUs.
|
|
||||||
|
|
||||||
Q: How do I logically offline a CPU?
|
|
||||||
A: Do the following.
|
|
||||||
|
|
||||||
#echo 0 > /sys/devices/system/cpu/cpuX/online
|
|
||||||
|
|
||||||
Once the logical offline is successful, check
|
|
||||||
|
|
||||||
#cat /proc/interrupts
|
|
||||||
|
|
||||||
You should now not see the CPU that you removed. Also online file will report
|
|
||||||
the state as 0 when a CPU is offline and 1 when it's online.
|
|
||||||
|
|
||||||
#To display the current cpu state.
|
|
||||||
#cat /sys/devices/system/cpu/cpuX/online
|
|
||||||
|
|
||||||
Q: Why can't I remove CPU0 on some systems?
|
|
||||||
A: Some architectures may have some special dependency on a certain CPU.
|
|
||||||
|
|
||||||
For e.g in IA64 platforms we have ability to send platform interrupts to the
|
|
||||||
OS. a.k.a Corrected Platform Error Interrupts (CPEI). In current ACPI
|
|
||||||
specifications, we didn't have a way to change the target CPU. Hence if the
|
|
||||||
current ACPI version doesn't support such re-direction, we disable that CPU
|
|
||||||
by making it not-removable.
|
|
||||||
|
|
||||||
In such cases you will also notice that the online file is missing under cpu0.
|
|
||||||
|
|
||||||
Q: Is CPU0 removable on X86?
|
|
||||||
A: Yes. If kernel is compiled with CONFIG_BOOTPARAM_HOTPLUG_CPU0=y, CPU0 is
|
|
||||||
removable by default. Otherwise, CPU0 is also removable by kernel option
|
|
||||||
cpu0_hotplug.
|
|
||||||
|
|
||||||
But some features depend on CPU0. Two known dependencies are:
|
|
||||||
|
|
||||||
1. Resume from hibernate/suspend depends on CPU0. Hibernate/suspend will fail if
|
|
||||||
CPU0 is offline and you need to online CPU0 before hibernate/suspend can
|
|
||||||
continue.
|
|
||||||
2. PIC interrupts also depend on CPU0. CPU0 can't be removed if a PIC interrupt
|
|
||||||
is detected.
|
|
||||||
|
|
||||||
It's said poweroff/reboot may depend on CPU0 on some machines although I haven't
|
|
||||||
seen any poweroff/reboot failure so far after CPU0 is offline on a few tested
|
|
||||||
machines.
|
|
||||||
|
|
||||||
Please let me know if you know or see any other dependencies of CPU0.
|
|
||||||
|
|
||||||
If the dependencies are under your control, you can turn on CPU0 hotplug feature
|
|
||||||
either by CONFIG_BOOTPARAM_HOTPLUG_CPU0 or by kernel parameter cpu0_hotplug.
|
|
||||||
|
|
||||||
--Fenghua Yu <fenghua.yu@intel.com>
|
|
||||||
|
|
||||||
Q: How do I find out if a particular CPU is not removable?
|
|
||||||
A: Depending on the implementation, some architectures may show this by the
|
|
||||||
absence of the "online" file. This is done if it can be determined ahead of
|
|
||||||
time that this CPU cannot be removed.
|
|
||||||
|
|
||||||
In some situations, this can be a run time check, i.e if you try to remove the
|
|
||||||
last CPU, this will not be permitted. You can find such failures by
|
|
||||||
investigating the return value of the "echo" command.
|
|
||||||
|
|
||||||
Q: What happens when a CPU is being logically offlined?
|
|
||||||
A: The following happen, listed in no particular order :-)
|
|
||||||
|
|
||||||
- A notification is sent to in-kernel registered modules by sending an event
|
|
||||||
CPU_DOWN_PREPARE or CPU_DOWN_PREPARE_FROZEN, depending on whether or not the
|
|
||||||
CPU is being offlined while tasks are frozen due to a suspend operation in
|
|
||||||
progress
|
|
||||||
- All processes are migrated away from this outgoing CPU to new CPUs.
|
|
||||||
The new CPU is chosen from each process' current cpuset, which may be
|
|
||||||
a subset of all online CPUs.
|
|
||||||
- All interrupts targeted to this CPU are migrated to a new CPU
|
|
||||||
- timers/bottom half/task lets are also migrated to a new CPU
|
|
||||||
- Once all services are migrated, kernel calls an arch specific routine
|
|
||||||
__cpu_disable() to perform arch specific cleanup.
|
|
||||||
- Once this is successful, an event for successful cleanup is sent by an event
|
|
||||||
CPU_DEAD (or CPU_DEAD_FROZEN if tasks are frozen due to a suspend while the
|
|
||||||
CPU is being offlined).
|
|
||||||
|
|
||||||
"It is expected that each service cleans up when the CPU_DOWN_PREPARE
|
|
||||||
notifier is called, when CPU_DEAD is called it's expected there is nothing
|
|
||||||
running on behalf of this CPU that was offlined"
|
|
||||||
|
|
||||||
Q: If I have some kernel code that needs to be aware of CPU arrival and
|
|
||||||
departure, how to i arrange for proper notification?
|
|
||||||
A: This is what you would need in your kernel code to receive notifications.
|
|
||||||
|
|
||||||
#include <linux/cpu.h>
|
|
||||||
static int foobar_cpu_callback(struct notifier_block *nfb,
|
|
||||||
unsigned long action, void *hcpu)
|
|
||||||
{
|
|
||||||
unsigned int cpu = (unsigned long)hcpu;
|
|
||||||
|
|
||||||
switch (action) {
|
|
||||||
case CPU_ONLINE:
|
|
||||||
case CPU_ONLINE_FROZEN:
|
|
||||||
foobar_online_action(cpu);
|
|
||||||
break;
|
|
||||||
case CPU_DEAD:
|
|
||||||
case CPU_DEAD_FROZEN:
|
|
||||||
foobar_dead_action(cpu);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
return NOTIFY_OK;
|
|
||||||
}
|
|
||||||
|
|
||||||
static struct notifier_block foobar_cpu_notifier =
|
|
||||||
{
|
|
||||||
.notifier_call = foobar_cpu_callback,
|
|
||||||
};
|
|
||||||
|
|
||||||
You need to call register_cpu_notifier() from your init function.
|
|
||||||
Init functions could be of two types:
|
|
||||||
1. early init (init function called when only the boot processor is online).
|
|
||||||
2. late init (init function called _after_ all the CPUs are online).
|
|
||||||
|
|
||||||
For the first case, you should add the following to your init function
|
|
||||||
|
|
||||||
register_cpu_notifier(&foobar_cpu_notifier);
|
|
||||||
|
|
||||||
For the second case, you should add the following to your init function
|
|
||||||
|
|
||||||
register_hotcpu_notifier(&foobar_cpu_notifier);
|
|
||||||
|
|
||||||
You can fail PREPARE notifiers if something doesn't work to prepare resources.
|
|
||||||
This will stop the activity and send a following CANCELED event back.
|
|
||||||
|
|
||||||
CPU_DEAD should not be failed, its just a goodness indication, but bad
|
|
||||||
things will happen if a notifier in path sent a BAD notify code.
|
|
||||||
|
|
||||||
Q: I don't see my action being called for all CPUs already up and running?
|
|
||||||
A: Yes, CPU notifiers are called only when new CPUs are on-lined or offlined.
|
|
||||||
If you need to perform some action for each CPU already in the system, then
|
|
||||||
do this:
|
|
||||||
|
|
||||||
for_each_online_cpu(i) {
|
|
||||||
foobar_cpu_callback(&foobar_cpu_notifier, CPU_UP_PREPARE, i);
|
|
||||||
foobar_cpu_callback(&foobar_cpu_notifier, CPU_ONLINE, i);
|
|
||||||
}
|
|
||||||
|
|
||||||
However, if you want to register a hotplug callback, as well as perform
|
|
||||||
some initialization for CPUs that are already online, then do this:
|
|
||||||
|
|
||||||
Version 1: (Correct)
|
|
||||||
---------
|
|
||||||
|
|
||||||
cpu_notifier_register_begin();
|
|
||||||
|
|
||||||
for_each_online_cpu(i) {
|
|
||||||
foobar_cpu_callback(&foobar_cpu_notifier,
|
|
||||||
CPU_UP_PREPARE, i);
|
|
||||||
foobar_cpu_callback(&foobar_cpu_notifier,
|
|
||||||
CPU_ONLINE, i);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Note the use of the double underscored version of the API */
|
|
||||||
__register_cpu_notifier(&foobar_cpu_notifier);
|
|
||||||
|
|
||||||
cpu_notifier_register_done();
|
|
||||||
|
|
||||||
Note that the following code is *NOT* the right way to achieve this,
|
|
||||||
because it is prone to an ABBA deadlock between the cpu_add_remove_lock
|
|
||||||
and the cpu_hotplug.lock.
|
|
||||||
|
|
||||||
Version 2: (Wrong!)
|
|
||||||
---------
|
|
||||||
|
|
||||||
get_online_cpus();
|
|
||||||
|
|
||||||
for_each_online_cpu(i) {
|
|
||||||
foobar_cpu_callback(&foobar_cpu_notifier,
|
|
||||||
CPU_UP_PREPARE, i);
|
|
||||||
foobar_cpu_callback(&foobar_cpu_notifier,
|
|
||||||
CPU_ONLINE, i);
|
|
||||||
}
|
|
||||||
|
|
||||||
register_cpu_notifier(&foobar_cpu_notifier);
|
|
||||||
|
|
||||||
put_online_cpus();
|
|
||||||
|
|
||||||
So always use the first version shown above when you want to register
|
|
||||||
callbacks as well as initialize the already online CPUs.
|
|
||||||
|
|
||||||
|
|
||||||
Q: If I would like to develop CPU hotplug support for a new architecture,
|
|
||||||
what do I need at a minimum?
|
|
||||||
A: The following are what is required for CPU hotplug infrastructure to work
|
|
||||||
correctly.
|
|
||||||
|
|
||||||
- Make sure you have an entry in Kconfig to enable CONFIG_HOTPLUG_CPU
|
|
||||||
- __cpu_up() - Arch interface to bring up a CPU
|
|
||||||
- __cpu_disable() - Arch interface to shutdown a CPU, no more interrupts
|
|
||||||
can be handled by the kernel after the routine
|
|
||||||
returns. Including local APIC timers etc are
|
|
||||||
shutdown.
|
|
||||||
- __cpu_die() - This actually supposed to ensure death of the CPU.
|
|
||||||
Actually look at some example code in other arch
|
|
||||||
that implement CPU hotplug. The processor is taken
|
|
||||||
down from the idle() loop for that specific
|
|
||||||
architecture. __cpu_die() typically waits for some
|
|
||||||
per_cpu state to be set, to ensure the processor
|
|
||||||
dead routine is called to be sure positively.
|
|
||||||
|
|
||||||
Q: I need to ensure that a particular CPU is not removed when there is some
|
|
||||||
work specific to this CPU in progress.
|
|
||||||
A: There are two ways. If your code can be run in interrupt context, use
|
|
||||||
smp_call_function_single(), otherwise use work_on_cpu(). Note that
|
|
||||||
work_on_cpu() is slow, and can fail due to out of memory:
|
|
||||||
|
|
||||||
int my_func_on_cpu(int cpu)
|
|
||||||
{
|
|
||||||
int err;
|
|
||||||
get_online_cpus();
|
|
||||||
if (!cpu_online(cpu))
|
|
||||||
err = -EINVAL;
|
|
||||||
else
|
|
||||||
#if NEEDS_BLOCKING
|
|
||||||
err = work_on_cpu(cpu, __my_func_on_cpu, NULL);
|
|
||||||
#else
|
|
||||||
smp_call_function_single(cpu, __my_func_on_cpu, &err,
|
|
||||||
true);
|
|
||||||
#endif
|
|
||||||
put_online_cpus();
|
|
||||||
return err;
|
|
||||||
}
|
|
||||||
|
|
||||||
Q: How do we determine how many CPUs are available for hotplug.
|
|
||||||
A: There is no clear spec defined way from ACPI that can give us that
|
|
||||||
information today. Based on some input from Natalie of Unisys,
|
|
||||||
that the ACPI MADT (Multiple APIC Description Tables) marks those possible
|
|
||||||
CPUs in a system with disabled status.
|
|
||||||
|
|
||||||
Andi implemented some simple heuristics that count the number of disabled
|
|
||||||
CPUs in MADT as hotpluggable CPUS. In the case there are no disabled CPUS
|
|
||||||
we assume 1/2 the number of CPUs currently present can be hotplugged.
|
|
||||||
|
|
||||||
Caveat: ACPI MADT can only provide 256 entries in systems with only ACPI 2.0c
|
|
||||||
or earlier ACPI version supported, because the apicid field in MADT is only
|
|
||||||
8 bits. From ACPI 3.0, this limitation was removed since the apicid field
|
|
||||||
was extended to 32 bits with x2APIC introduced.
|
|
||||||
|
|
||||||
User Space Notification
|
|
||||||
|
|
||||||
Hotplug support for devices is common in Linux today. Its being used today to
|
|
||||||
support automatic configuration of network, usb and pci devices. A hotplug
|
|
||||||
event can be used to invoke an agent script to perform the configuration task.
|
|
||||||
|
|
||||||
You can add /etc/hotplug/cpu.agent to handle hotplug notification user space
|
|
||||||
scripts.
|
|
||||||
|
|
||||||
#!/bin/bash
|
|
||||||
# $Id: cpu.agent
|
|
||||||
# Kernel hotplug params include:
|
|
||||||
#ACTION=%s [online or offline]
|
|
||||||
#DEVPATH=%s
|
|
||||||
#
|
|
||||||
cd /etc/hotplug
|
|
||||||
. ./hotplug.functions
|
|
||||||
|
|
||||||
case $ACTION in
|
|
||||||
online)
|
|
||||||
echo `date` ":cpu.agent" add cpu >> /tmp/hotplug.txt
|
|
||||||
;;
|
|
||||||
offline)
|
|
||||||
echo `date` ":cpu.agent" remove cpu >>/tmp/hotplug.txt
|
|
||||||
;;
|
|
||||||
*)
|
|
||||||
debug_mesg CPU $ACTION event not supported
|
|
||||||
exit 1
|
|
||||||
;;
|
|
||||||
esac
|
|
||||||
@@ -103,3 +103,9 @@ have already built it.
|
|||||||
|
|
||||||
The optional make variable CF can be used to pass arguments to sparse. The
|
The optional make variable CF can be used to pass arguments to sparse. The
|
||||||
build system passes -Wbitwise to sparse automatically.
|
build system passes -Wbitwise to sparse automatically.
|
||||||
|
|
||||||
|
Checking RCU annotations
|
||||||
|
~~~~~~~~~~~~~~~~~~~~~~~~
|
||||||
|
|
||||||
|
RCU annotations are not checked by default. To enable RCU annotation
|
||||||
|
checks, include -DCONFIG_SPARSE_RCU_POINTER in your CF flags.
|
||||||
|
|||||||
@@ -116,9 +116,11 @@ crc32table.h*
|
|||||||
cscope.*
|
cscope.*
|
||||||
defkeymap.c
|
defkeymap.c
|
||||||
devlist.h*
|
devlist.h*
|
||||||
|
devicetable-offsets.h
|
||||||
dnotify_test
|
dnotify_test
|
||||||
docproc
|
docproc
|
||||||
dslm
|
dslm
|
||||||
|
dtc
|
||||||
elf2ecoff
|
elf2ecoff
|
||||||
elfconfig.h*
|
elfconfig.h*
|
||||||
evergreen_reg_safe.h
|
evergreen_reg_safe.h
|
||||||
@@ -153,8 +155,8 @@ keywords.c
|
|||||||
ksym.c*
|
ksym.c*
|
||||||
ksym.h*
|
ksym.h*
|
||||||
kxgettext
|
kxgettext
|
||||||
lex.c
|
*lex.c
|
||||||
lex.*.c
|
*lex.*.c
|
||||||
linux
|
linux
|
||||||
logo_*.c
|
logo_*.c
|
||||||
logo_*_clut224.c
|
logo_*_clut224.c
|
||||||
@@ -215,6 +217,7 @@ series
|
|||||||
setup
|
setup
|
||||||
setup.bin
|
setup.bin
|
||||||
setup.elf
|
setup.elf
|
||||||
|
sortextable
|
||||||
sImage
|
sImage
|
||||||
sm_tbl*
|
sm_tbl*
|
||||||
split-include
|
split-include
|
||||||
|
|||||||
@@ -0,0 +1,201 @@
|
|||||||
|
.. Copyright 2001 Matthew Wilcox
|
||||||
|
..
|
||||||
|
.. This documentation is free software; you can redistribute
|
||||||
|
.. it and/or modify it under the terms of the GNU General Public
|
||||||
|
.. License as published by the Free Software Foundation; either
|
||||||
|
.. version 2 of the License, or (at your option) any later
|
||||||
|
.. version.
|
||||||
|
|
||||||
|
===============================
|
||||||
|
Bus-Independent Device Accesses
|
||||||
|
===============================
|
||||||
|
|
||||||
|
:Author: Matthew Wilcox
|
||||||
|
:Author: Alan Cox
|
||||||
|
|
||||||
|
Introduction
|
||||||
|
============
|
||||||
|
|
||||||
|
Linux provides an API which abstracts performing IO across all busses
|
||||||
|
and devices, allowing device drivers to be written independently of bus
|
||||||
|
type.
|
||||||
|
|
||||||
|
Memory Mapped IO
|
||||||
|
================
|
||||||
|
|
||||||
|
Getting Access to the Device
|
||||||
|
----------------------------
|
||||||
|
|
||||||
|
The most widely supported form of IO is memory mapped IO. That is, a
|
||||||
|
part of the CPU's address space is interpreted not as accesses to
|
||||||
|
memory, but as accesses to a device. Some architectures define devices
|
||||||
|
to be at a fixed address, but most have some method of discovering
|
||||||
|
devices. The PCI bus walk is a good example of such a scheme. This
|
||||||
|
document does not cover how to receive such an address, but assumes you
|
||||||
|
are starting with one. Physical addresses are of type unsigned long.
|
||||||
|
|
||||||
|
This address should not be used directly. Instead, to get an address
|
||||||
|
suitable for passing to the accessor functions described below, you
|
||||||
|
should call :c:func:`ioremap()`. An address suitable for accessing
|
||||||
|
the device will be returned to you.
|
||||||
|
|
||||||
|
After you've finished using the device (say, in your module's exit
|
||||||
|
routine), call :c:func:`iounmap()` in order to return the address
|
||||||
|
space to the kernel. Most architectures allocate new address space each
|
||||||
|
time you call :c:func:`ioremap()`, and they can run out unless you
|
||||||
|
call :c:func:`iounmap()`.
|
||||||
|
|
||||||
|
Accessing the device
|
||||||
|
--------------------
|
||||||
|
|
||||||
|
The part of the interface most used by drivers is reading and writing
|
||||||
|
memory-mapped registers on the device. Linux provides interfaces to read
|
||||||
|
and write 8-bit, 16-bit, 32-bit and 64-bit quantities. Due to a
|
||||||
|
historical accident, these are named byte, word, long and quad accesses.
|
||||||
|
Both read and write accesses are supported; there is no prefetch support
|
||||||
|
at this time.
|
||||||
|
|
||||||
|
The functions are named readb(), readw(), readl(), readq(),
|
||||||
|
readb_relaxed(), readw_relaxed(), readl_relaxed(), readq_relaxed(),
|
||||||
|
writeb(), writew(), writel() and writeq().
|
||||||
|
|
||||||
|
Some devices (such as framebuffers) would like to use larger transfers than
|
||||||
|
8 bytes at a time. For these devices, the :c:func:`memcpy_toio()`,
|
||||||
|
:c:func:`memcpy_fromio()` and :c:func:`memset_io()` functions are
|
||||||
|
provided. Do not use memset or memcpy on IO addresses; they are not
|
||||||
|
guaranteed to copy data in order.
|
||||||
|
|
||||||
|
The read and write functions are defined to be ordered. That is the
|
||||||
|
compiler is not permitted to reorder the I/O sequence. When the ordering
|
||||||
|
can be compiler optimised, you can use __readb() and friends to
|
||||||
|
indicate the relaxed ordering. Use this with care.
|
||||||
|
|
||||||
|
While the basic functions are defined to be synchronous with respect to
|
||||||
|
each other and ordered with respect to each other the busses the devices
|
||||||
|
sit on may themselves have asynchronicity. In particular many authors
|
||||||
|
are burned by the fact that PCI bus writes are posted asynchronously. A
|
||||||
|
driver author must issue a read from the same device to ensure that
|
||||||
|
writes have occurred in the specific cases the author cares. This kind
|
||||||
|
of property cannot be hidden from driver writers in the API. In some
|
||||||
|
cases, the read used to flush the device may be expected to fail (if the
|
||||||
|
card is resetting, for example). In that case, the read should be done
|
||||||
|
from config space, which is guaranteed to soft-fail if the card doesn't
|
||||||
|
respond.
|
||||||
|
|
||||||
|
The following is an example of flushing a write to a device when the
|
||||||
|
driver would like to ensure the write's effects are visible prior to
|
||||||
|
continuing execution::
|
||||||
|
|
||||||
|
static inline void
|
||||||
|
qla1280_disable_intrs(struct scsi_qla_host *ha)
|
||||||
|
{
|
||||||
|
struct device_reg *reg;
|
||||||
|
|
||||||
|
reg = ha->iobase;
|
||||||
|
/* disable risc and host interrupts */
|
||||||
|
WRT_REG_WORD(®->ictrl, 0);
|
||||||
|
/*
|
||||||
|
* The following read will ensure that the above write
|
||||||
|
* has been received by the device before we return from this
|
||||||
|
* function.
|
||||||
|
*/
|
||||||
|
RD_REG_WORD(®->ictrl);
|
||||||
|
ha->flags.ints_enabled = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
In addition to write posting, on some large multiprocessing systems
|
||||||
|
(e.g. SGI Challenge, Origin and Altix machines) posted writes won't be
|
||||||
|
strongly ordered coming from different CPUs. Thus it's important to
|
||||||
|
properly protect parts of your driver that do memory-mapped writes with
|
||||||
|
locks and use the :c:func:`mmiowb()` to make sure they arrive in the
|
||||||
|
order intended. Issuing a regular readX() will also ensure write ordering,
|
||||||
|
but should only be used when the
|
||||||
|
driver has to be sure that the write has actually arrived at the device
|
||||||
|
(not that it's simply ordered with respect to other writes), since a
|
||||||
|
full readX() is a relatively expensive operation.
|
||||||
|
|
||||||
|
Generally, one should use :c:func:`mmiowb()` prior to releasing a spinlock
|
||||||
|
that protects regions using :c:func:`writeb()` or similar functions that
|
||||||
|
aren't surrounded by readb() calls, which will ensure ordering
|
||||||
|
and flushing. The following pseudocode illustrates what might occur if
|
||||||
|
write ordering isn't guaranteed via :c:func:`mmiowb()` or one of the
|
||||||
|
readX() functions::
|
||||||
|
|
||||||
|
CPU A: spin_lock_irqsave(&dev_lock, flags)
|
||||||
|
CPU A: ...
|
||||||
|
CPU A: writel(newval, ring_ptr);
|
||||||
|
CPU A: spin_unlock_irqrestore(&dev_lock, flags)
|
||||||
|
...
|
||||||
|
CPU B: spin_lock_irqsave(&dev_lock, flags)
|
||||||
|
CPU B: writel(newval2, ring_ptr);
|
||||||
|
CPU B: ...
|
||||||
|
CPU B: spin_unlock_irqrestore(&dev_lock, flags)
|
||||||
|
|
||||||
|
In the case above, newval2 could be written to ring_ptr before newval.
|
||||||
|
Fixing it is easy though::
|
||||||
|
|
||||||
|
CPU A: spin_lock_irqsave(&dev_lock, flags)
|
||||||
|
CPU A: ...
|
||||||
|
CPU A: writel(newval, ring_ptr);
|
||||||
|
CPU A: mmiowb(); /* ensure no other writes beat us to the device */
|
||||||
|
CPU A: spin_unlock_irqrestore(&dev_lock, flags)
|
||||||
|
...
|
||||||
|
CPU B: spin_lock_irqsave(&dev_lock, flags)
|
||||||
|
CPU B: writel(newval2, ring_ptr);
|
||||||
|
CPU B: ...
|
||||||
|
CPU B: mmiowb();
|
||||||
|
CPU B: spin_unlock_irqrestore(&dev_lock, flags)
|
||||||
|
|
||||||
|
See tg3.c for a real world example of how to use :c:func:`mmiowb()`
|
||||||
|
|
||||||
|
PCI ordering rules also guarantee that PIO read responses arrive after any
|
||||||
|
outstanding DMA writes from that bus, since for some devices the result of
|
||||||
|
a readb() call may signal to the driver that a DMA transaction is
|
||||||
|
complete. In many cases, however, the driver may want to indicate that the
|
||||||
|
next readb() call has no relation to any previous DMA writes
|
||||||
|
performed by the device. The driver can use readb_relaxed() for
|
||||||
|
these cases, although only some platforms will honor the relaxed
|
||||||
|
semantics. Using the relaxed read functions will provide significant
|
||||||
|
performance benefits on platforms that support it. The qla2xxx driver
|
||||||
|
provides examples of how to use readX_relaxed(). In many cases, a majority
|
||||||
|
of the driver's readX() calls can safely be converted to readX_relaxed()
|
||||||
|
calls, since only a few will indicate or depend on DMA completion.
|
||||||
|
|
||||||
|
Port Space Accesses
|
||||||
|
===================
|
||||||
|
|
||||||
|
Port Space Explained
|
||||||
|
--------------------
|
||||||
|
|
||||||
|
Another form of IO commonly supported is Port Space. This is a range of
|
||||||
|
addresses separate to the normal memory address space. Access to these
|
||||||
|
addresses is generally not as fast as accesses to the memory mapped
|
||||||
|
addresses, and it also has a potentially smaller address space.
|
||||||
|
|
||||||
|
Unlike memory mapped IO, no preparation is required to access port
|
||||||
|
space.
|
||||||
|
|
||||||
|
Accessing Port Space
|
||||||
|
--------------------
|
||||||
|
|
||||||
|
Accesses to this space are provided through a set of functions which
|
||||||
|
allow 8-bit, 16-bit and 32-bit accesses; also known as byte, word and
|
||||||
|
long. These functions are :c:func:`inb()`, :c:func:`inw()`,
|
||||||
|
:c:func:`inl()`, :c:func:`outb()`, :c:func:`outw()` and
|
||||||
|
:c:func:`outl()`.
|
||||||
|
|
||||||
|
Some variants are provided for these functions. Some devices require
|
||||||
|
that accesses to their ports are slowed down. This functionality is
|
||||||
|
provided by appending a ``_p`` to the end of the function.
|
||||||
|
There are also equivalents to memcpy. The :c:func:`ins()` and
|
||||||
|
:c:func:`outs()` functions copy bytes, words or longs to the given
|
||||||
|
port.
|
||||||
|
|
||||||
|
Public Functions Provided
|
||||||
|
=========================
|
||||||
|
|
||||||
|
.. kernel-doc:: arch/x86/include/asm/io.h
|
||||||
|
:internal:
|
||||||
|
|
||||||
|
.. kernel-doc:: lib/pci_iomap.c
|
||||||
|
:export:
|
||||||
@@ -1,3 +1,6 @@
|
|||||||
|
.. |struct dev_pm_domain| replace:: :c:type:`struct dev_pm_domain <dev_pm_domain>`
|
||||||
|
.. |struct generic_pm_domain| replace:: :c:type:`struct generic_pm_domain <generic_pm_domain>`
|
||||||
|
|
||||||
============
|
============
|
||||||
Device links
|
Device links
|
||||||
============
|
============
|
||||||
@@ -120,12 +123,11 @@ Examples
|
|||||||
is the same as if the MMU was the parent of the master device.
|
is the same as if the MMU was the parent of the master device.
|
||||||
|
|
||||||
The fact that both devices share the same power domain would normally
|
The fact that both devices share the same power domain would normally
|
||||||
suggest usage of a :c:type:`struct dev_pm_domain` or :c:type:`struct
|
suggest usage of a |struct dev_pm_domain| or |struct generic_pm_domain|,
|
||||||
generic_pm_domain`, however these are not independent devices that
|
however these are not independent devices that happen to share a power
|
||||||
happen to share a power switch, but rather the MMU device serves the
|
switch, but rather the MMU device serves the busmaster device and is
|
||||||
busmaster device and is useless without it. A device link creates a
|
useless without it. A device link creates a synthetic hierarchical
|
||||||
synthetic hierarchical relationship between the devices and is thus
|
relationship between the devices and is thus more apt.
|
||||||
more apt.
|
|
||||||
|
|
||||||
* A Thunderbolt host controller comprises a number of PCIe hotplug ports
|
* A Thunderbolt host controller comprises a number of PCIe hotplug ports
|
||||||
and an NHI device to manage the PCIe switch. On resume from system sleep,
|
and an NHI device to manage the PCIe switch. On resume from system sleep,
|
||||||
@@ -157,7 +159,7 @@ Examples
|
|||||||
Alternatives
|
Alternatives
|
||||||
============
|
============
|
||||||
|
|
||||||
* A :c:type:`struct dev_pm_domain` can be used to override the bus,
|
* A |struct dev_pm_domain| can be used to override the bus,
|
||||||
class or device type callbacks. It is intended for devices sharing
|
class or device type callbacks. It is intended for devices sharing
|
||||||
a single on/off switch, however it does not guarantee a specific
|
a single on/off switch, however it does not guarantee a specific
|
||||||
suspend/resume ordering, this needs to be implemented separately.
|
suspend/resume ordering, this needs to be implemented separately.
|
||||||
@@ -166,7 +168,7 @@ Alternatives
|
|||||||
suspended. Furthermore it cannot be used to enforce a specific shutdown
|
suspended. Furthermore it cannot be used to enforce a specific shutdown
|
||||||
ordering or a driver presence dependency.
|
ordering or a driver presence dependency.
|
||||||
|
|
||||||
* A :c:type:`struct generic_pm_domain` is a lot more heavyweight than a
|
* A |struct generic_pm_domain| is a lot more heavyweight than a
|
||||||
device link and does not allow for shutdown ordering or driver presence
|
device link and does not allow for shutdown ordering or driver presence
|
||||||
dependencies. It also cannot be used on ACPI systems.
|
dependencies. It also cannot be used on ACPI systems.
|
||||||
|
|
||||||
|
|||||||
@@ -0,0 +1,125 @@
|
|||||||
|
=======
|
||||||
|
Buffers
|
||||||
|
=======
|
||||||
|
|
||||||
|
* struct :c:type:`iio_buffer` — general buffer structure
|
||||||
|
* :c:func:`iio_validate_scan_mask_onehot` — Validates that exactly one channel
|
||||||
|
is selected
|
||||||
|
* :c:func:`iio_buffer_get` — Grab a reference to the buffer
|
||||||
|
* :c:func:`iio_buffer_put` — Release the reference to the buffer
|
||||||
|
|
||||||
|
The Industrial I/O core offers a way for continuous data capture based on a
|
||||||
|
trigger source. Multiple data channels can be read at once from
|
||||||
|
:file:`/dev/iio:device{X}` character device node, thus reducing the CPU load.
|
||||||
|
|
||||||
|
IIO buffer sysfs interface
|
||||||
|
==========================
|
||||||
|
An IIO buffer has an associated attributes directory under
|
||||||
|
:file:`/sys/bus/iio/iio:device{X}/buffer/*`. Here are some of the existing
|
||||||
|
attributes:
|
||||||
|
|
||||||
|
* :file:`length`, the total number of data samples (capacity) that can be
|
||||||
|
stored by the buffer.
|
||||||
|
* :file:`enable`, activate buffer capture.
|
||||||
|
|
||||||
|
IIO buffer setup
|
||||||
|
================
|
||||||
|
|
||||||
|
The meta information associated with a channel reading placed in a buffer is
|
||||||
|
called a scan element . The important bits configuring scan elements are
|
||||||
|
exposed to userspace applications via the
|
||||||
|
:file:`/sys/bus/iio/iio:device{X}/scan_elements/*` directory. This file contains
|
||||||
|
attributes of the following form:
|
||||||
|
|
||||||
|
* :file:`enable`, used for enabling a channel. If and only if its attribute
|
||||||
|
is non *zero*, then a triggered capture will contain data samples for this
|
||||||
|
channel.
|
||||||
|
* :file:`type`, description of the scan element data storage within the buffer
|
||||||
|
and hence the form in which it is read from user space.
|
||||||
|
Format is [be|le]:[s|u]bits/storagebitsXrepeat[>>shift] .
|
||||||
|
* *be* or *le*, specifies big or little endian.
|
||||||
|
* *s* or *u*, specifies if signed (2's complement) or unsigned.
|
||||||
|
* *bits*, is the number of valid data bits.
|
||||||
|
* *storagebits*, is the number of bits (after padding) that it occupies in the
|
||||||
|
buffer.
|
||||||
|
* *shift*, if specified, is the shift that needs to be applied prior to
|
||||||
|
masking out unused bits.
|
||||||
|
* *repeat*, specifies the number of bits/storagebits repetitions. When the
|
||||||
|
repeat element is 0 or 1, then the repeat value is omitted.
|
||||||
|
|
||||||
|
For example, a driver for a 3-axis accelerometer with 12 bit resolution where
|
||||||
|
data is stored in two 8-bits registers as follows::
|
||||||
|
|
||||||
|
7 6 5 4 3 2 1 0
|
||||||
|
+---+---+---+---+---+---+---+---+
|
||||||
|
|D3 |D2 |D1 |D0 | X | X | X | X | (LOW byte, address 0x06)
|
||||||
|
+---+---+---+---+---+---+---+---+
|
||||||
|
|
||||||
|
7 6 5 4 3 2 1 0
|
||||||
|
+---+---+---+---+---+---+---+---+
|
||||||
|
|D11|D10|D9 |D8 |D7 |D6 |D5 |D4 | (HIGH byte, address 0x07)
|
||||||
|
+---+---+---+---+---+---+---+---+
|
||||||
|
|
||||||
|
will have the following scan element type for each axis::
|
||||||
|
|
||||||
|
$ cat /sys/bus/iio/devices/iio:device0/scan_elements/in_accel_y_type
|
||||||
|
le:s12/16>>4
|
||||||
|
|
||||||
|
A user space application will interpret data samples read from the buffer as
|
||||||
|
two byte little endian signed data, that needs a 4 bits right shift before
|
||||||
|
masking out the 12 valid bits of data.
|
||||||
|
|
||||||
|
For implementing buffer support a driver should initialize the following
|
||||||
|
fields in iio_chan_spec definition::
|
||||||
|
|
||||||
|
struct iio_chan_spec {
|
||||||
|
/* other members */
|
||||||
|
int scan_index
|
||||||
|
struct {
|
||||||
|
char sign;
|
||||||
|
u8 realbits;
|
||||||
|
u8 storagebits;
|
||||||
|
u8 shift;
|
||||||
|
u8 repeat;
|
||||||
|
enum iio_endian endianness;
|
||||||
|
} scan_type;
|
||||||
|
};
|
||||||
|
|
||||||
|
The driver implementing the accelerometer described above will have the
|
||||||
|
following channel definition::
|
||||||
|
|
||||||
|
struct struct iio_chan_spec accel_channels[] = {
|
||||||
|
{
|
||||||
|
.type = IIO_ACCEL,
|
||||||
|
.modified = 1,
|
||||||
|
.channel2 = IIO_MOD_X,
|
||||||
|
/* other stuff here */
|
||||||
|
.scan_index = 0,
|
||||||
|
.scan_type = {
|
||||||
|
.sign = 's',
|
||||||
|
.realbits = 12,
|
||||||
|
.storagebits = 16,
|
||||||
|
.shift = 4,
|
||||||
|
.endianness = IIO_LE,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
/* similar for Y (with channel2 = IIO_MOD_Y, scan_index = 1)
|
||||||
|
* and Z (with channel2 = IIO_MOD_Z, scan_index = 2) axis
|
||||||
|
*/
|
||||||
|
}
|
||||||
|
|
||||||
|
Here **scan_index** defines the order in which the enabled channels are placed
|
||||||
|
inside the buffer. Channels with a lower **scan_index** will be placed before
|
||||||
|
channels with a higher index. Each channel needs to have a unique
|
||||||
|
**scan_index**.
|
||||||
|
|
||||||
|
Setting **scan_index** to -1 can be used to indicate that the specific channel
|
||||||
|
does not support buffered capture. In this case no entries will be created for
|
||||||
|
the channel in the scan_elements directory.
|
||||||
|
|
||||||
|
More details
|
||||||
|
============
|
||||||
|
.. kernel-doc:: include/linux/iio/buffer.h
|
||||||
|
.. kernel-doc:: drivers/iio/industrialio-buffer.c
|
||||||
|
:export:
|
||||||
|
|
||||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user