You've already forked linux-apfs
mirror of
https://github.com/linux-apfs/linux-apfs.git
synced 2026-05-01 15:00:59 -07:00
Merge branch 'akpm' (more patches from Andrew)
Merge patches from Andrew Morton: "Most of the rest of MM, plus a few dribs and drabs. I still have quite a few irritating patches left around: ones with dubious testing results, lack of review, ones which should have gone via maintainer trees but the maintainers are slack, etc. I need to be more activist in getting these things wrapped up outside the merge window, but they're such a PITA." * emailed patches from Andrew Morton <akpm@linux-foundation.org>: (48 commits) mm/vmscan.c: avoid possible deadlock caused by too_many_isolated() vmscan: comment too_many_isolated() mm/kmemleak.c: remove obsolete simple_strtoul mm/memory_hotplug.c: improve comments mm/hugetlb: create hugetlb cgroup file in hugetlb_init mm/mprotect.c: coding-style cleanups Documentation: ABI: /sys/devices/system/node/ slub: drop mutex before deleting sysfs entry memcg: add comments clarifying aspects of cache attribute propagation kmem: add slab-specific documentation about the kmem controller slub: slub-specific propagation changes slab: propagate tunable values memcg: aggregate memcg cache values in slabinfo memcg/sl[au]b: shrink dead caches memcg/sl[au]b: track all the memcg children of a kmem_cache memcg: destroy memcg caches sl[au]b: allocate objects from memcg cache sl[au]b: always get the cache from its page in kmem_cache_free() memcg: skip memcg kmem allocations in specified code regions memcg: infrastructure to match an allocation to the right cache ...
This commit is contained in:
@@ -1,7 +1,101 @@
|
||||
What: /sys/devices/system/node/possible
|
||||
Date: October 2002
|
||||
Contact: Linux Memory Management list <linux-mm@kvack.org>
|
||||
Description:
|
||||
Nodes that could be possibly become online at some point.
|
||||
|
||||
What: /sys/devices/system/node/online
|
||||
Date: October 2002
|
||||
Contact: Linux Memory Management list <linux-mm@kvack.org>
|
||||
Description:
|
||||
Nodes that are online.
|
||||
|
||||
What: /sys/devices/system/node/has_normal_memory
|
||||
Date: October 2002
|
||||
Contact: Linux Memory Management list <linux-mm@kvack.org>
|
||||
Description:
|
||||
Nodes that have regular memory.
|
||||
|
||||
What: /sys/devices/system/node/has_cpu
|
||||
Date: October 2002
|
||||
Contact: Linux Memory Management list <linux-mm@kvack.org>
|
||||
Description:
|
||||
Nodes that have one or more CPUs.
|
||||
|
||||
What: /sys/devices/system/node/has_high_memory
|
||||
Date: October 2002
|
||||
Contact: Linux Memory Management list <linux-mm@kvack.org>
|
||||
Description:
|
||||
Nodes that have regular or high memory.
|
||||
Depends on CONFIG_HIGHMEM.
|
||||
|
||||
What: /sys/devices/system/node/nodeX
|
||||
Date: October 2002
|
||||
Contact: Linux Memory Management list <linux-mm@kvack.org>
|
||||
Description:
|
||||
When CONFIG_NUMA is enabled, this is a directory containing
|
||||
information on node X such as what CPUs are local to the
|
||||
node.
|
||||
node. Each file is detailed next.
|
||||
|
||||
What: /sys/devices/system/node/nodeX/cpumap
|
||||
Date: October 2002
|
||||
Contact: Linux Memory Management list <linux-mm@kvack.org>
|
||||
Description:
|
||||
The node's cpumap.
|
||||
|
||||
What: /sys/devices/system/node/nodeX/cpulist
|
||||
Date: October 2002
|
||||
Contact: Linux Memory Management list <linux-mm@kvack.org>
|
||||
Description:
|
||||
The CPUs associated to the node.
|
||||
|
||||
What: /sys/devices/system/node/nodeX/meminfo
|
||||
Date: October 2002
|
||||
Contact: Linux Memory Management list <linux-mm@kvack.org>
|
||||
Description:
|
||||
Provides information about the node's distribution and memory
|
||||
utilization. Similar to /proc/meminfo, see Documentation/filesystems/proc.txt
|
||||
|
||||
What: /sys/devices/system/node/nodeX/numastat
|
||||
Date: October 2002
|
||||
Contact: Linux Memory Management list <linux-mm@kvack.org>
|
||||
Description:
|
||||
The node's hit/miss statistics, in units of pages.
|
||||
See Documentation/numastat.txt
|
||||
|
||||
What: /sys/devices/system/node/nodeX/distance
|
||||
Date: October 2002
|
||||
Contact: Linux Memory Management list <linux-mm@kvack.org>
|
||||
Description:
|
||||
Distance between the node and all the other nodes
|
||||
in the system.
|
||||
|
||||
What: /sys/devices/system/node/nodeX/vmstat
|
||||
Date: October 2002
|
||||
Contact: Linux Memory Management list <linux-mm@kvack.org>
|
||||
Description:
|
||||
The node's zoned virtual memory statistics.
|
||||
This is a superset of numastat.
|
||||
|
||||
What: /sys/devices/system/node/nodeX/compact
|
||||
Date: February 2010
|
||||
Contact: Mel Gorman <mel@csn.ul.ie>
|
||||
Description:
|
||||
When this file is written to, all memory within that node
|
||||
will be compacted. When it completes, memory will be freed
|
||||
into blocks which have as many contiguous pages as possible
|
||||
|
||||
What: /sys/devices/system/node/nodeX/scan_unevictable_pages
|
||||
Date: October 2008
|
||||
Contact: Lee Schermerhorn <lee.schermerhorn@hp.com>
|
||||
Description:
|
||||
When set, it triggers scanning the node's unevictable lists
|
||||
and move any pages that have become evictable onto the respective
|
||||
zone's inactive list. See mm/vmscan.c
|
||||
|
||||
What: /sys/devices/system/node/nodeX/hugepages/hugepages-<size>/
|
||||
Date: December 2009
|
||||
Contact: Lee Schermerhorn <lee.schermerhorn@hp.com>
|
||||
Description:
|
||||
The node's huge page size control/query attributes.
|
||||
See Documentation/vm/hugetlbpage.txt
|
||||
@@ -71,6 +71,11 @@ Brief summary of control files.
|
||||
memory.oom_control # set/show oom controls.
|
||||
memory.numa_stat # show the number of memory usage per numa node
|
||||
|
||||
memory.kmem.limit_in_bytes # set/show hard limit for kernel memory
|
||||
memory.kmem.usage_in_bytes # show current kernel memory allocation
|
||||
memory.kmem.failcnt # show the number of kernel memory usage hits limits
|
||||
memory.kmem.max_usage_in_bytes # show max kernel memory usage recorded
|
||||
|
||||
memory.kmem.tcp.limit_in_bytes # set/show hard limit for tcp buf memory
|
||||
memory.kmem.tcp.usage_in_bytes # show current tcp buf memory allocation
|
||||
memory.kmem.tcp.failcnt # show the number of tcp buf memory usage hits limits
|
||||
@@ -268,20 +273,73 @@ the amount of kernel memory used by the system. Kernel memory is fundamentally
|
||||
different than user memory, since it can't be swapped out, which makes it
|
||||
possible to DoS the system by consuming too much of this precious resource.
|
||||
|
||||
Kernel memory won't be accounted at all until limit on a group is set. This
|
||||
allows for existing setups to continue working without disruption. The limit
|
||||
cannot be set if the cgroup have children, or if there are already tasks in the
|
||||
cgroup. Attempting to set the limit under those conditions will return -EBUSY.
|
||||
When use_hierarchy == 1 and a group is accounted, its children will
|
||||
automatically be accounted regardless of their limit value.
|
||||
|
||||
After a group is first limited, it will be kept being accounted until it
|
||||
is removed. The memory limitation itself, can of course be removed by writing
|
||||
-1 to memory.kmem.limit_in_bytes. In this case, kmem will be accounted, but not
|
||||
limited.
|
||||
|
||||
Kernel memory limits are not imposed for the root cgroup. Usage for the root
|
||||
cgroup may or may not be accounted.
|
||||
cgroup may or may not be accounted. The memory used is accumulated into
|
||||
memory.kmem.usage_in_bytes, or in a separate counter when it makes sense.
|
||||
(currently only for tcp).
|
||||
The main "kmem" counter is fed into the main counter, so kmem charges will
|
||||
also be visible from the user counter.
|
||||
|
||||
Currently no soft limit is implemented for kernel memory. It is future work
|
||||
to trigger slab reclaim when those limits are reached.
|
||||
|
||||
2.7.1 Current Kernel Memory resources accounted
|
||||
|
||||
* stack pages: every process consumes some stack pages. By accounting into
|
||||
kernel memory, we prevent new processes from being created when the kernel
|
||||
memory usage is too high.
|
||||
|
||||
* slab pages: pages allocated by the SLAB or SLUB allocator are tracked. A copy
|
||||
of each kmem_cache is created everytime the cache is touched by the first time
|
||||
from inside the memcg. The creation is done lazily, so some objects can still be
|
||||
skipped while the cache is being created. All objects in a slab page should
|
||||
belong to the same memcg. This only fails to hold when a task is migrated to a
|
||||
different memcg during the page allocation by the cache.
|
||||
|
||||
* sockets memory pressure: some sockets protocols have memory pressure
|
||||
thresholds. The Memory Controller allows them to be controlled individually
|
||||
per cgroup, instead of globally.
|
||||
|
||||
* tcp memory pressure: sockets memory pressure for the tcp protocol.
|
||||
|
||||
2.7.3 Common use cases
|
||||
|
||||
Because the "kmem" counter is fed to the main user counter, kernel memory can
|
||||
never be limited completely independently of user memory. Say "U" is the user
|
||||
limit, and "K" the kernel limit. There are three possible ways limits can be
|
||||
set:
|
||||
|
||||
U != 0, K = unlimited:
|
||||
This is the standard memcg limitation mechanism already present before kmem
|
||||
accounting. Kernel memory is completely ignored.
|
||||
|
||||
U != 0, K < U:
|
||||
Kernel memory is a subset of the user memory. This setup is useful in
|
||||
deployments where the total amount of memory per-cgroup is overcommited.
|
||||
Overcommiting kernel memory limits is definitely not recommended, since the
|
||||
box can still run out of non-reclaimable memory.
|
||||
In this case, the admin could set up K so that the sum of all groups is
|
||||
never greater than the total memory, and freely set U at the cost of his
|
||||
QoS.
|
||||
|
||||
U != 0, K >= U:
|
||||
Since kmem charges will also be fed to the user counter and reclaim will be
|
||||
triggered for the cgroup for both kinds of memory. This setup gives the
|
||||
admin a unified view of memory, and it is also useful for people who just
|
||||
want to track kernel memory usage.
|
||||
|
||||
3. User Interface
|
||||
|
||||
0. Configuration
|
||||
@@ -290,6 +348,7 @@ a. Enable CONFIG_CGROUPS
|
||||
b. Enable CONFIG_RESOURCE_COUNTERS
|
||||
c. Enable CONFIG_MEMCG
|
||||
d. Enable CONFIG_MEMCG_SWAP (to use swap extension)
|
||||
d. Enable CONFIG_MEMCG_KMEM (to use kmem extension)
|
||||
|
||||
1. Prepare the cgroups (see cgroups.txt, Why are cgroups needed?)
|
||||
# mount -t tmpfs none /sys/fs/cgroup
|
||||
@@ -406,6 +465,11 @@ About use_hierarchy, see Section 6.
|
||||
Because rmdir() moves all pages to parent, some out-of-use page caches can be
|
||||
moved to the parent. If you want to avoid that, force_empty will be useful.
|
||||
|
||||
Also, note that when memory.kmem.limit_in_bytes is set the charges due to
|
||||
kernel pages will still be seen. This is not considered a failure and the
|
||||
write will still return success. In this case, it is expected that
|
||||
memory.kmem.usage_in_bytes == memory.usage_in_bytes.
|
||||
|
||||
About use_hierarchy, see Section 6.
|
||||
|
||||
5.2 stat file
|
||||
|
||||
@@ -83,16 +83,17 @@ to work with it.
|
||||
res_counter->lock internally (it must be called with res_counter->lock
|
||||
held). The force parameter indicates whether we can bypass the limit.
|
||||
|
||||
e. void res_counter_uncharge[_locked]
|
||||
e. u64 res_counter_uncharge[_locked]
|
||||
(struct res_counter *rc, unsigned long val)
|
||||
|
||||
When a resource is released (freed) it should be de-accounted
|
||||
from the resource counter it was accounted to. This is called
|
||||
"uncharging".
|
||||
"uncharging". The return value of this function indicate the amount
|
||||
of charges still present in the counter.
|
||||
|
||||
The _locked routines imply that the res_counter->lock is taken.
|
||||
|
||||
f. void res_counter_uncharge_until
|
||||
f. u64 res_counter_uncharge_until
|
||||
(struct res_counter *rc, struct res_counter *top,
|
||||
unsinged long val)
|
||||
|
||||
|
||||
@@ -133,12 +133,39 @@ static inline void writel(unsigned int b, volatile void __iomem *addr)
|
||||
#define insb(port,addr,count) (cris_iops ? cris_iops->read_io(port,addr,1,count) : 0)
|
||||
#define insw(port,addr,count) (cris_iops ? cris_iops->read_io(port,addr,2,count) : 0)
|
||||
#define insl(port,addr,count) (cris_iops ? cris_iops->read_io(port,addr,4,count) : 0)
|
||||
#define outb(data,port) if (cris_iops) cris_iops->write_io(port,(void*)(unsigned)data,1,1)
|
||||
#define outw(data,port) if (cris_iops) cris_iops->write_io(port,(void*)(unsigned)data,2,1)
|
||||
#define outl(data,port) if (cris_iops) cris_iops->write_io(port,(void*)(unsigned)data,4,1)
|
||||
#define outsb(port,addr,count) if(cris_iops) cris_iops->write_io(port,(void*)addr,1,count)
|
||||
#define outsw(port,addr,count) if(cris_iops) cris_iops->write_io(port,(void*)addr,2,count)
|
||||
#define outsl(port,addr,count) if(cris_iops) cris_iops->write_io(port,(void*)addr,3,count)
|
||||
static inline void outb(unsigned char data, unsigned int port)
|
||||
{
|
||||
if (cris_iops)
|
||||
cris_iops->write_io(port, (void *) &data, 1, 1);
|
||||
}
|
||||
static inline void outw(unsigned short data, unsigned int port)
|
||||
{
|
||||
if (cris_iops)
|
||||
cris_iops->write_io(port, (void *) &data, 2, 1);
|
||||
}
|
||||
static inline void outl(unsigned int data, unsigned int port)
|
||||
{
|
||||
if (cris_iops)
|
||||
cris_iops->write_io(port, (void *) &data, 4, 1);
|
||||
}
|
||||
static inline void outsb(unsigned int port, const void *addr,
|
||||
unsigned long count)
|
||||
{
|
||||
if (cris_iops)
|
||||
cris_iops->write_io(port, (void *)addr, 1, count);
|
||||
}
|
||||
static inline void outsw(unsigned int port, const void *addr,
|
||||
unsigned long count)
|
||||
{
|
||||
if (cris_iops)
|
||||
cris_iops->write_io(port, (void *)addr, 2, count);
|
||||
}
|
||||
static inline void outsl(unsigned int port, const void *addr,
|
||||
unsigned long count)
|
||||
{
|
||||
if (cris_iops)
|
||||
cris_iops->write_io(port, (void *)addr, 4, count);
|
||||
}
|
||||
|
||||
/*
|
||||
* Convert a physical pointer to a virtual kernel pointer for /dev/mem
|
||||
|
||||
@@ -3,6 +3,7 @@ config H8300
|
||||
default y
|
||||
select HAVE_IDE
|
||||
select HAVE_GENERIC_HARDIRQS
|
||||
select GENERIC_ATOMIC64
|
||||
select HAVE_UID16
|
||||
select ARCH_WANT_IPC_PARSE_VERSION
|
||||
select GENERIC_IRQ_SHOW
|
||||
|
||||
@@ -23,6 +23,7 @@
|
||||
|
||||
#include <linux/moduleparam.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/platform_device.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/errno.h>
|
||||
#include <linux/delay.h>
|
||||
@@ -62,29 +63,75 @@ static void iris_power_off(void)
|
||||
* by reading its input port and seeing whether the read value is
|
||||
* meaningful.
|
||||
*/
|
||||
static int iris_init(void)
|
||||
static int iris_probe(struct platform_device *pdev)
|
||||
{
|
||||
unsigned char status;
|
||||
if (force != 1) {
|
||||
printk(KERN_ERR "The force parameter has not been set to 1 so the Iris poweroff handler will not be installed.\n");
|
||||
return -ENODEV;
|
||||
}
|
||||
status = inb(IRIS_GIO_INPUT);
|
||||
unsigned char status = inb(IRIS_GIO_INPUT);
|
||||
if (status == IRIS_GIO_NODEV) {
|
||||
printk(KERN_ERR "This machine does not seem to be an Iris. Power_off handler not installed.\n");
|
||||
printk(KERN_ERR "This machine does not seem to be an Iris. "
|
||||
"Power off handler not installed.\n");
|
||||
return -ENODEV;
|
||||
}
|
||||
old_pm_power_off = pm_power_off;
|
||||
pm_power_off = &iris_power_off;
|
||||
printk(KERN_INFO "Iris power_off handler installed.\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int iris_remove(struct platform_device *pdev)
|
||||
{
|
||||
pm_power_off = old_pm_power_off;
|
||||
printk(KERN_INFO "Iris power_off handler uninstalled.\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct platform_driver iris_driver = {
|
||||
.driver = {
|
||||
.name = "iris",
|
||||
.owner = THIS_MODULE,
|
||||
},
|
||||
.probe = iris_probe,
|
||||
.remove = iris_remove,
|
||||
};
|
||||
|
||||
static struct resource iris_resources[] = {
|
||||
{
|
||||
.start = IRIS_GIO_BASE,
|
||||
.end = IRIS_GIO_OUTPUT,
|
||||
.flags = IORESOURCE_IO,
|
||||
.name = "address"
|
||||
}
|
||||
};
|
||||
|
||||
static struct platform_device *iris_device;
|
||||
|
||||
static int iris_init(void)
|
||||
{
|
||||
int ret;
|
||||
if (force != 1) {
|
||||
printk(KERN_ERR "The force parameter has not been set to 1."
|
||||
" The Iris poweroff handler will not be installed.\n");
|
||||
return -ENODEV;
|
||||
}
|
||||
ret = platform_driver_register(&iris_driver);
|
||||
if (ret < 0) {
|
||||
printk(KERN_ERR "Failed to register iris platform driver: %d\n",
|
||||
ret);
|
||||
return ret;
|
||||
}
|
||||
iris_device = platform_device_register_simple("iris", (-1),
|
||||
iris_resources, ARRAY_SIZE(iris_resources));
|
||||
if (IS_ERR(iris_device)) {
|
||||
printk(KERN_ERR "Failed to register iris platform device\n");
|
||||
platform_driver_unregister(&iris_driver);
|
||||
return PTR_ERR(iris_device);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void iris_exit(void)
|
||||
{
|
||||
pm_power_off = old_pm_power_off;
|
||||
printk(KERN_INFO "Iris power_off handler uninstalled.\n");
|
||||
platform_device_unregister(iris_device);
|
||||
platform_driver_unregister(&iris_driver);
|
||||
}
|
||||
|
||||
module_init(iris_init);
|
||||
|
||||
@@ -792,6 +792,7 @@ mptscsih_io_done(MPT_ADAPTER *ioc, MPT_FRAME_HDR *mf, MPT_FRAME_HDR *mr)
|
||||
* than an unsolicited DID_ABORT.
|
||||
*/
|
||||
sc->result = DID_RESET << 16;
|
||||
break;
|
||||
|
||||
case MPI_IOCSTATUS_SCSI_EXT_TERMINATED: /* 0x004C */
|
||||
if (ioc->bus_type == FC)
|
||||
|
||||
@@ -107,7 +107,6 @@ void locomolcd_power(int on)
|
||||
}
|
||||
EXPORT_SYMBOL(locomolcd_power);
|
||||
|
||||
|
||||
static int current_intensity;
|
||||
|
||||
static int locomolcd_set_intensity(struct backlight_device *bd)
|
||||
@@ -122,13 +121,25 @@ static int locomolcd_set_intensity(struct backlight_device *bd)
|
||||
intensity = 0;
|
||||
|
||||
switch (intensity) {
|
||||
/* AC and non-AC are handled differently, but produce same results in sharp code? */
|
||||
case 0: locomo_frontlight_set(locomolcd_dev, 0, 0, 161); break;
|
||||
case 1: locomo_frontlight_set(locomolcd_dev, 117, 0, 161); break;
|
||||
case 2: locomo_frontlight_set(locomolcd_dev, 163, 0, 148); break;
|
||||
case 3: locomo_frontlight_set(locomolcd_dev, 194, 0, 161); break;
|
||||
case 4: locomo_frontlight_set(locomolcd_dev, 194, 1, 161); break;
|
||||
|
||||
/*
|
||||
* AC and non-AC are handled differently,
|
||||
* but produce same results in sharp code?
|
||||
*/
|
||||
case 0:
|
||||
locomo_frontlight_set(locomolcd_dev, 0, 0, 161);
|
||||
break;
|
||||
case 1:
|
||||
locomo_frontlight_set(locomolcd_dev, 117, 0, 161);
|
||||
break;
|
||||
case 2:
|
||||
locomo_frontlight_set(locomolcd_dev, 163, 0, 148);
|
||||
break;
|
||||
case 3:
|
||||
locomo_frontlight_set(locomolcd_dev, 194, 0, 161);
|
||||
break;
|
||||
case 4:
|
||||
locomo_frontlight_set(locomolcd_dev, 194, 1, 161);
|
||||
break;
|
||||
default:
|
||||
return -ENODEV;
|
||||
}
|
||||
@@ -175,9 +186,11 @@ static int locomolcd_probe(struct locomo_dev *ldev)
|
||||
|
||||
locomo_gpio_set_dir(ldev->dev.parent, LOCOMO_GPIO_FL_VR, 0);
|
||||
|
||||
/* the poodle_lcd_power function is called for the first time
|
||||
/*
|
||||
* the poodle_lcd_power function is called for the first time
|
||||
* from fs_initcall, which is before locomo is activated.
|
||||
* We need to recall poodle_lcd_power here*/
|
||||
* We need to recall poodle_lcd_power here
|
||||
*/
|
||||
if (machine_is_poodle())
|
||||
locomolcd_power(1);
|
||||
|
||||
@@ -190,8 +203,8 @@ static int locomolcd_probe(struct locomo_dev *ldev)
|
||||
&ldev->dev, NULL,
|
||||
&locomobl_data, &props);
|
||||
|
||||
if (IS_ERR (locomolcd_bl_device))
|
||||
return PTR_ERR (locomolcd_bl_device);
|
||||
if (IS_ERR(locomolcd_bl_device))
|
||||
return PTR_ERR(locomolcd_bl_device);
|
||||
|
||||
/* Set up frontlight so that screen is readable */
|
||||
locomolcd_bl_device->props.brightness = 2;
|
||||
@@ -226,7 +239,6 @@ static struct locomo_driver poodle_lcd_driver = {
|
||||
.resume = locomolcd_resume,
|
||||
};
|
||||
|
||||
|
||||
static int __init locomolcd_init(void)
|
||||
{
|
||||
return locomo_driver_register(&poodle_lcd_driver);
|
||||
|
||||
+3
-1
@@ -56,13 +56,15 @@ static int ceph_encode_fh(struct inode *inode, u32 *rawfh, int *max_len,
|
||||
struct ceph_nfs_confh *cfh = (void *)rawfh;
|
||||
int connected_handle_length = sizeof(*cfh)/4;
|
||||
int handle_length = sizeof(*fh)/4;
|
||||
struct dentry *dentry = d_find_alias(inode);
|
||||
struct dentry *dentry;
|
||||
struct dentry *parent;
|
||||
|
||||
/* don't re-export snaps */
|
||||
if (ceph_snap(inode) != CEPH_NOSNAP)
|
||||
return -EINVAL;
|
||||
|
||||
dentry = d_find_alias(inode);
|
||||
|
||||
/* if we found an alias, generate a connectable fh */
|
||||
if (*max_len >= connected_handle_length && dentry) {
|
||||
dout("encode_fh %p connectable\n", dentry);
|
||||
|
||||
@@ -30,6 +30,7 @@ struct vm_area_struct;
|
||||
#define ___GFP_HARDWALL 0x20000u
|
||||
#define ___GFP_THISNODE 0x40000u
|
||||
#define ___GFP_RECLAIMABLE 0x80000u
|
||||
#define ___GFP_KMEMCG 0x100000u
|
||||
#define ___GFP_NOTRACK 0x200000u
|
||||
#define ___GFP_NO_KSWAPD 0x400000u
|
||||
#define ___GFP_OTHER_NODE 0x800000u
|
||||
@@ -89,6 +90,7 @@ struct vm_area_struct;
|
||||
|
||||
#define __GFP_NO_KSWAPD ((__force gfp_t)___GFP_NO_KSWAPD)
|
||||
#define __GFP_OTHER_NODE ((__force gfp_t)___GFP_OTHER_NODE) /* On behalf of other node */
|
||||
#define __GFP_KMEMCG ((__force gfp_t)___GFP_KMEMCG) /* Allocation comes from a memcg-accounted resource */
|
||||
#define __GFP_WRITE ((__force gfp_t)___GFP_WRITE) /* Allocator intends to dirty page */
|
||||
|
||||
/*
|
||||
@@ -365,6 +367,9 @@ extern void free_pages(unsigned long addr, unsigned int order);
|
||||
extern void free_hot_cold_page(struct page *page, int cold);
|
||||
extern void free_hot_cold_page_list(struct list_head *list, int cold);
|
||||
|
||||
extern void __free_memcg_kmem_pages(struct page *page, unsigned int order);
|
||||
extern void free_memcg_kmem_pages(unsigned long addr, unsigned int order);
|
||||
|
||||
#define __free_page(page) __free_pages((page), 0)
|
||||
#define free_page(addr) free_pages((addr), 0)
|
||||
|
||||
|
||||
@@ -62,7 +62,7 @@ extern void hugetlb_cgroup_uncharge_page(int idx, unsigned long nr_pages,
|
||||
struct page *page);
|
||||
extern void hugetlb_cgroup_uncharge_cgroup(int idx, unsigned long nr_pages,
|
||||
struct hugetlb_cgroup *h_cg);
|
||||
extern int hugetlb_cgroup_file_init(int idx) __init;
|
||||
extern void hugetlb_cgroup_file_init(void) __init;
|
||||
extern void hugetlb_cgroup_migrate(struct page *oldhpage,
|
||||
struct page *newhpage);
|
||||
|
||||
@@ -111,9 +111,8 @@ hugetlb_cgroup_uncharge_cgroup(int idx, unsigned long nr_pages,
|
||||
return;
|
||||
}
|
||||
|
||||
static inline int __init hugetlb_cgroup_file_init(int idx)
|
||||
static inline void hugetlb_cgroup_file_init(void)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline void hugetlb_cgroup_migrate(struct page *oldhpage,
|
||||
|
||||
@@ -21,11 +21,14 @@
|
||||
#define _LINUX_MEMCONTROL_H
|
||||
#include <linux/cgroup.h>
|
||||
#include <linux/vm_event_item.h>
|
||||
#include <linux/hardirq.h>
|
||||
#include <linux/jump_label.h>
|
||||
|
||||
struct mem_cgroup;
|
||||
struct page_cgroup;
|
||||
struct page;
|
||||
struct mm_struct;
|
||||
struct kmem_cache;
|
||||
|
||||
/* Stats that can be updated by kernel. */
|
||||
enum mem_cgroup_page_stat_item {
|
||||
@@ -414,5 +417,211 @@ static inline void sock_release_memcg(struct sock *sk)
|
||||
{
|
||||
}
|
||||
#endif /* CONFIG_INET && CONFIG_MEMCG_KMEM */
|
||||
|
||||
#ifdef CONFIG_MEMCG_KMEM
|
||||
extern struct static_key memcg_kmem_enabled_key;
|
||||
|
||||
extern int memcg_limited_groups_array_size;
|
||||
|
||||
/*
|
||||
* Helper macro to loop through all memcg-specific caches. Callers must still
|
||||
* check if the cache is valid (it is either valid or NULL).
|
||||
* the slab_mutex must be held when looping through those caches
|
||||
*/
|
||||
#define for_each_memcg_cache_index(_idx) \
|
||||
for ((_idx) = 0; i < memcg_limited_groups_array_size; (_idx)++)
|
||||
|
||||
static inline bool memcg_kmem_enabled(void)
|
||||
{
|
||||
return static_key_false(&memcg_kmem_enabled_key);
|
||||
}
|
||||
|
||||
/*
|
||||
* In general, we'll do everything in our power to not incur in any overhead
|
||||
* for non-memcg users for the kmem functions. Not even a function call, if we
|
||||
* can avoid it.
|
||||
*
|
||||
* Therefore, we'll inline all those functions so that in the best case, we'll
|
||||
* see that kmemcg is off for everybody and proceed quickly. If it is on,
|
||||
* we'll still do most of the flag checking inline. We check a lot of
|
||||
* conditions, but because they are pretty simple, they are expected to be
|
||||
* fast.
|
||||
*/
|
||||
bool __memcg_kmem_newpage_charge(gfp_t gfp, struct mem_cgroup **memcg,
|
||||
int order);
|
||||
void __memcg_kmem_commit_charge(struct page *page,
|
||||
struct mem_cgroup *memcg, int order);
|
||||
void __memcg_kmem_uncharge_pages(struct page *page, int order);
|
||||
|
||||
int memcg_cache_id(struct mem_cgroup *memcg);
|
||||
int memcg_register_cache(struct mem_cgroup *memcg, struct kmem_cache *s,
|
||||
struct kmem_cache *root_cache);
|
||||
void memcg_release_cache(struct kmem_cache *cachep);
|
||||
void memcg_cache_list_add(struct mem_cgroup *memcg, struct kmem_cache *cachep);
|
||||
|
||||
int memcg_update_cache_size(struct kmem_cache *s, int num_groups);
|
||||
void memcg_update_array_size(int num_groups);
|
||||
|
||||
struct kmem_cache *
|
||||
__memcg_kmem_get_cache(struct kmem_cache *cachep, gfp_t gfp);
|
||||
|
||||
void mem_cgroup_destroy_cache(struct kmem_cache *cachep);
|
||||
void kmem_cache_destroy_memcg_children(struct kmem_cache *s);
|
||||
|
||||
/**
|
||||
* memcg_kmem_newpage_charge: verify if a new kmem allocation is allowed.
|
||||
* @gfp: the gfp allocation flags.
|
||||
* @memcg: a pointer to the memcg this was charged against.
|
||||
* @order: allocation order.
|
||||
*
|
||||
* returns true if the memcg where the current task belongs can hold this
|
||||
* allocation.
|
||||
*
|
||||
* We return true automatically if this allocation is not to be accounted to
|
||||
* any memcg.
|
||||
*/
|
||||
static inline bool
|
||||
memcg_kmem_newpage_charge(gfp_t gfp, struct mem_cgroup **memcg, int order)
|
||||
{
|
||||
if (!memcg_kmem_enabled())
|
||||
return true;
|
||||
|
||||
/*
|
||||
* __GFP_NOFAIL allocations will move on even if charging is not
|
||||
* possible. Therefore we don't even try, and have this allocation
|
||||
* unaccounted. We could in theory charge it with
|
||||
* res_counter_charge_nofail, but we hope those allocations are rare,
|
||||
* and won't be worth the trouble.
|
||||
*/
|
||||
if (!(gfp & __GFP_KMEMCG) || (gfp & __GFP_NOFAIL))
|
||||
return true;
|
||||
if (in_interrupt() || (!current->mm) || (current->flags & PF_KTHREAD))
|
||||
return true;
|
||||
|
||||
/* If the test is dying, just let it go. */
|
||||
if (unlikely(fatal_signal_pending(current)))
|
||||
return true;
|
||||
|
||||
return __memcg_kmem_newpage_charge(gfp, memcg, order);
|
||||
}
|
||||
|
||||
/**
|
||||
* memcg_kmem_uncharge_pages: uncharge pages from memcg
|
||||
* @page: pointer to struct page being freed
|
||||
* @order: allocation order.
|
||||
*
|
||||
* there is no need to specify memcg here, since it is embedded in page_cgroup
|
||||
*/
|
||||
static inline void
|
||||
memcg_kmem_uncharge_pages(struct page *page, int order)
|
||||
{
|
||||
if (memcg_kmem_enabled())
|
||||
__memcg_kmem_uncharge_pages(page, order);
|
||||
}
|
||||
|
||||
/**
|
||||
* memcg_kmem_commit_charge: embeds correct memcg in a page
|
||||
* @page: pointer to struct page recently allocated
|
||||
* @memcg: the memcg structure we charged against
|
||||
* @order: allocation order.
|
||||
*
|
||||
* Needs to be called after memcg_kmem_newpage_charge, regardless of success or
|
||||
* failure of the allocation. if @page is NULL, this function will revert the
|
||||
* charges. Otherwise, it will commit the memcg given by @memcg to the
|
||||
* corresponding page_cgroup.
|
||||
*/
|
||||
static inline void
|
||||
memcg_kmem_commit_charge(struct page *page, struct mem_cgroup *memcg, int order)
|
||||
{
|
||||
if (memcg_kmem_enabled() && memcg)
|
||||
__memcg_kmem_commit_charge(page, memcg, order);
|
||||
}
|
||||
|
||||
/**
|
||||
* memcg_kmem_get_cache: selects the correct per-memcg cache for allocation
|
||||
* @cachep: the original global kmem cache
|
||||
* @gfp: allocation flags.
|
||||
*
|
||||
* This function assumes that the task allocating, which determines the memcg
|
||||
* in the page allocator, belongs to the same cgroup throughout the whole
|
||||
* process. Misacounting can happen if the task calls memcg_kmem_get_cache()
|
||||
* while belonging to a cgroup, and later on changes. This is considered
|
||||
* acceptable, and should only happen upon task migration.
|
||||
*
|
||||
* Before the cache is created by the memcg core, there is also a possible
|
||||
* imbalance: the task belongs to a memcg, but the cache being allocated from
|
||||
* is the global cache, since the child cache is not yet guaranteed to be
|
||||
* ready. This case is also fine, since in this case the GFP_KMEMCG will not be
|
||||
* passed and the page allocator will not attempt any cgroup accounting.
|
||||
*/
|
||||
static __always_inline struct kmem_cache *
|
||||
memcg_kmem_get_cache(struct kmem_cache *cachep, gfp_t gfp)
|
||||
{
|
||||
if (!memcg_kmem_enabled())
|
||||
return cachep;
|
||||
if (gfp & __GFP_NOFAIL)
|
||||
return cachep;
|
||||
if (in_interrupt() || (!current->mm) || (current->flags & PF_KTHREAD))
|
||||
return cachep;
|
||||
if (unlikely(fatal_signal_pending(current)))
|
||||
return cachep;
|
||||
|
||||
return __memcg_kmem_get_cache(cachep, gfp);
|
||||
}
|
||||
#else
|
||||
#define for_each_memcg_cache_index(_idx) \
|
||||
for (; NULL; )
|
||||
|
||||
static inline bool memcg_kmem_enabled(void)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline bool
|
||||
memcg_kmem_newpage_charge(gfp_t gfp, struct mem_cgroup **memcg, int order)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
static inline void memcg_kmem_uncharge_pages(struct page *page, int order)
|
||||
{
|
||||
}
|
||||
|
||||
static inline void
|
||||
memcg_kmem_commit_charge(struct page *page, struct mem_cgroup *memcg, int order)
|
||||
{
|
||||
}
|
||||
|
||||
static inline int memcg_cache_id(struct mem_cgroup *memcg)
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
|
||||
static inline int
|
||||
memcg_register_cache(struct mem_cgroup *memcg, struct kmem_cache *s,
|
||||
struct kmem_cache *root_cache)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline void memcg_release_cache(struct kmem_cache *cachep)
|
||||
{
|
||||
}
|
||||
|
||||
static inline void memcg_cache_list_add(struct mem_cgroup *memcg,
|
||||
struct kmem_cache *s)
|
||||
{
|
||||
}
|
||||
|
||||
static inline struct kmem_cache *
|
||||
memcg_kmem_get_cache(struct kmem_cache *cachep, gfp_t gfp)
|
||||
{
|
||||
return cachep;
|
||||
}
|
||||
|
||||
static inline void kmem_cache_destroy_memcg_children(struct kmem_cache *s)
|
||||
{
|
||||
}
|
||||
#endif /* CONFIG_MEMCG_KMEM */
|
||||
#endif /* _LINUX_MEMCONTROL_H */
|
||||
|
||||
|
||||
@@ -125,14 +125,16 @@ int res_counter_charge_nofail(struct res_counter *counter,
|
||||
*
|
||||
* these calls check for usage underflow and show a warning on the console
|
||||
* _locked call expects the counter->lock to be taken
|
||||
*
|
||||
* returns the total charges still present in @counter.
|
||||
*/
|
||||
|
||||
void res_counter_uncharge_locked(struct res_counter *counter, unsigned long val);
|
||||
void res_counter_uncharge(struct res_counter *counter, unsigned long val);
|
||||
u64 res_counter_uncharge_locked(struct res_counter *counter, unsigned long val);
|
||||
u64 res_counter_uncharge(struct res_counter *counter, unsigned long val);
|
||||
|
||||
void res_counter_uncharge_until(struct res_counter *counter,
|
||||
struct res_counter *top,
|
||||
unsigned long val);
|
||||
u64 res_counter_uncharge_until(struct res_counter *counter,
|
||||
struct res_counter *top,
|
||||
unsigned long val);
|
||||
/**
|
||||
* res_counter_margin - calculate chargeable space of a counter
|
||||
* @cnt: the counter
|
||||
|
||||
@@ -1597,6 +1597,7 @@ struct task_struct {
|
||||
unsigned long nr_pages; /* uncharged usage */
|
||||
unsigned long memsw_nr_pages; /* uncharged mem+swap usage */
|
||||
} memcg_batch;
|
||||
unsigned int memcg_kmem_skip_account;
|
||||
#endif
|
||||
#ifdef CONFIG_HAVE_HW_BREAKPOINT
|
||||
atomic_t ptrace_bp_refcnt;
|
||||
|
||||
@@ -11,6 +11,8 @@
|
||||
|
||||
#include <linux/gfp.h>
|
||||
#include <linux/types.h>
|
||||
#include <linux/workqueue.h>
|
||||
|
||||
|
||||
/*
|
||||
* Flags to pass to kmem_cache_create().
|
||||
@@ -116,6 +118,7 @@ struct kmem_cache {
|
||||
};
|
||||
#endif
|
||||
|
||||
struct mem_cgroup;
|
||||
/*
|
||||
* struct kmem_cache related prototypes
|
||||
*/
|
||||
@@ -125,6 +128,9 @@ int slab_is_available(void);
|
||||
struct kmem_cache *kmem_cache_create(const char *, size_t, size_t,
|
||||
unsigned long,
|
||||
void (*)(void *));
|
||||
struct kmem_cache *
|
||||
kmem_cache_create_memcg(struct mem_cgroup *, const char *, size_t, size_t,
|
||||
unsigned long, void (*)(void *), struct kmem_cache *);
|
||||
void kmem_cache_destroy(struct kmem_cache *);
|
||||
int kmem_cache_shrink(struct kmem_cache *);
|
||||
void kmem_cache_free(struct kmem_cache *, void *);
|
||||
@@ -175,6 +181,48 @@ void kmem_cache_free(struct kmem_cache *, void *);
|
||||
#ifndef ARCH_SLAB_MINALIGN
|
||||
#define ARCH_SLAB_MINALIGN __alignof__(unsigned long long)
|
||||
#endif
|
||||
/*
|
||||
* This is the main placeholder for memcg-related information in kmem caches.
|
||||
* struct kmem_cache will hold a pointer to it, so the memory cost while
|
||||
* disabled is 1 pointer. The runtime cost while enabled, gets bigger than it
|
||||
* would otherwise be if that would be bundled in kmem_cache: we'll need an
|
||||
* extra pointer chase. But the trade off clearly lays in favor of not
|
||||
* penalizing non-users.
|
||||
*
|
||||
* Both the root cache and the child caches will have it. For the root cache,
|
||||
* this will hold a dynamically allocated array large enough to hold
|
||||
* information about the currently limited memcgs in the system.
|
||||
*
|
||||
* Child caches will hold extra metadata needed for its operation. Fields are:
|
||||
*
|
||||
* @memcg: pointer to the memcg this cache belongs to
|
||||
* @list: list_head for the list of all caches in this memcg
|
||||
* @root_cache: pointer to the global, root cache, this cache was derived from
|
||||
* @dead: set to true after the memcg dies; the cache may still be around.
|
||||
* @nr_pages: number of pages that belongs to this cache.
|
||||
* @destroy: worker to be called whenever we are ready, or believe we may be
|
||||
* ready, to destroy this cache.
|
||||
*/
|
||||
struct memcg_cache_params {
|
||||
bool is_root_cache;
|
||||
union {
|
||||
struct kmem_cache *memcg_caches[0];
|
||||
struct {
|
||||
struct mem_cgroup *memcg;
|
||||
struct list_head list;
|
||||
struct kmem_cache *root_cache;
|
||||
bool dead;
|
||||
atomic_t nr_pages;
|
||||
struct work_struct destroy;
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
int memcg_update_all_caches(int num_memcgs);
|
||||
|
||||
struct seq_file;
|
||||
int cache_show(struct kmem_cache *s, struct seq_file *m);
|
||||
void print_slabinfo_header(struct seq_file *m);
|
||||
|
||||
/*
|
||||
* Common kmalloc functions provided by all allocators
|
||||
|
||||
@@ -81,6 +81,9 @@ struct kmem_cache {
|
||||
*/
|
||||
int obj_offset;
|
||||
#endif /* CONFIG_DEBUG_SLAB */
|
||||
#ifdef CONFIG_MEMCG_KMEM
|
||||
struct memcg_cache_params *memcg_params;
|
||||
#endif
|
||||
|
||||
/* 6) per-cpu/per-node data, touched during every alloc/free */
|
||||
/*
|
||||
|
||||
@@ -101,6 +101,10 @@ struct kmem_cache {
|
||||
#ifdef CONFIG_SYSFS
|
||||
struct kobject kobj; /* For sysfs */
|
||||
#endif
|
||||
#ifdef CONFIG_MEMCG_KMEM
|
||||
struct memcg_cache_params *memcg_params;
|
||||
int max_attr_size; /* for propagation, maximum size of a stored attr */
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_NUMA
|
||||
/*
|
||||
@@ -222,7 +226,10 @@ void *__kmalloc(size_t size, gfp_t flags);
|
||||
static __always_inline void *
|
||||
kmalloc_order(size_t size, gfp_t flags, unsigned int order)
|
||||
{
|
||||
void *ret = (void *) __get_free_pages(flags | __GFP_COMP, order);
|
||||
void *ret;
|
||||
|
||||
flags |= (__GFP_COMP | __GFP_KMEMCG);
|
||||
ret = (void *) __get_free_pages(flags, order);
|
||||
kmemleak_alloc(ret, size, 1, flags);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -61,6 +61,8 @@ extern long do_no_restart_syscall(struct restart_block *parm);
|
||||
# define THREADINFO_GFP (GFP_KERNEL | __GFP_NOTRACK)
|
||||
#endif
|
||||
|
||||
#define THREADINFO_GFP_ACCOUNTED (THREADINFO_GFP | __GFP_KMEMCG)
|
||||
|
||||
/*
|
||||
* flag set/clear/test wrappers
|
||||
* - pass TIF_xxxx constants to these functions
|
||||
|
||||
@@ -34,6 +34,7 @@
|
||||
{(unsigned long)__GFP_HARDWALL, "GFP_HARDWALL"}, \
|
||||
{(unsigned long)__GFP_THISNODE, "GFP_THISNODE"}, \
|
||||
{(unsigned long)__GFP_RECLAIMABLE, "GFP_RECLAIMABLE"}, \
|
||||
{(unsigned long)__GFP_KMEMCG, "GFP_KMEMCG"}, \
|
||||
{(unsigned long)__GFP_MOVABLE, "GFP_MOVABLE"}, \
|
||||
{(unsigned long)__GFP_NOTRACK, "GFP_NOTRACK"}, \
|
||||
{(unsigned long)__GFP_NO_KSWAPD, "GFP_NO_KSWAPD"}, \
|
||||
|
||||
+1
-1
@@ -882,7 +882,7 @@ config MEMCG_SWAP_ENABLED
|
||||
config MEMCG_KMEM
|
||||
bool "Memory Resource Controller Kernel Memory accounting (EXPERIMENTAL)"
|
||||
depends on MEMCG && EXPERIMENTAL
|
||||
default n
|
||||
depends on SLUB || SLAB
|
||||
help
|
||||
The Kernel Memory extension for Memory Resource Controller can limit
|
||||
the amount of memory used by kernel objects in the system. Those are
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user