Commit Graph

45 Commits

Author SHA1 Message Date
Thomas Gleixner
09c434b8a0 treewide: Add SPDX license identifier for more missed files
Add SPDX license identifiers to all files which:

 - Have no license information of any form

 - Have MODULE_LICENCE("GPL*") inside which was used in the initial
   scan/conversion to ignore the file

These files fall under the project license, GPL v2 only. The resulting SPDX
license identifier is:

  GPL-2.0-only

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2019-05-21 10:50:45 +02:00
Kees Cook
6da2ec5605 treewide: kmalloc() -> kmalloc_array()
The kmalloc() function has a 2-factor argument form, kmalloc_array(). This
patch replaces cases of:

        kmalloc(a * b, gfp)

with:
        kmalloc_array(a * b, gfp)

as well as handling cases of:

        kmalloc(a * b * c, gfp)

with:

        kmalloc(array3_size(a, b, c), gfp)

as it's slightly less ugly than:

        kmalloc_array(array_size(a, b), c, gfp)

This does, however, attempt to ignore constant size factors like:

        kmalloc(4 * 1024, gfp)

though any constants defined via macros get caught up in the conversion.

Any factors with a sizeof() of "unsigned char", "char", and "u8" were
dropped, since they're redundant.

The tools/ directory was manually excluded, since it has its own
implementation of kmalloc().

The Coccinelle script used for this was:

// Fix redundant parens around sizeof().
@@
type TYPE;
expression THING, E;
@@

(
  kmalloc(
-	(sizeof(TYPE)) * E
+	sizeof(TYPE) * E
  , ...)
|
  kmalloc(
-	(sizeof(THING)) * E
+	sizeof(THING) * E
  , ...)
)

// Drop single-byte sizes and redundant parens.
@@
expression COUNT;
typedef u8;
typedef __u8;
@@

(
  kmalloc(
-	sizeof(u8) * (COUNT)
+	COUNT
  , ...)
|
  kmalloc(
-	sizeof(__u8) * (COUNT)
+	COUNT
  , ...)
|
  kmalloc(
-	sizeof(char) * (COUNT)
+	COUNT
  , ...)
|
  kmalloc(
-	sizeof(unsigned char) * (COUNT)
+	COUNT
  , ...)
|
  kmalloc(
-	sizeof(u8) * COUNT
+	COUNT
  , ...)
|
  kmalloc(
-	sizeof(__u8) * COUNT
+	COUNT
  , ...)
|
  kmalloc(
-	sizeof(char) * COUNT
+	COUNT
  , ...)
|
  kmalloc(
-	sizeof(unsigned char) * COUNT
+	COUNT
  , ...)
)

// 2-factor product with sizeof(type/expression) and identifier or constant.
@@
type TYPE;
expression THING;
identifier COUNT_ID;
constant COUNT_CONST;
@@

(
- kmalloc
+ kmalloc_array
  (
-	sizeof(TYPE) * (COUNT_ID)
+	COUNT_ID, sizeof(TYPE)
  , ...)
|
- kmalloc
+ kmalloc_array
  (
-	sizeof(TYPE) * COUNT_ID
+	COUNT_ID, sizeof(TYPE)
  , ...)
|
- kmalloc
+ kmalloc_array
  (
-	sizeof(TYPE) * (COUNT_CONST)
+	COUNT_CONST, sizeof(TYPE)
  , ...)
|
- kmalloc
+ kmalloc_array
  (
-	sizeof(TYPE) * COUNT_CONST
+	COUNT_CONST, sizeof(TYPE)
  , ...)
|
- kmalloc
+ kmalloc_array
  (
-	sizeof(THING) * (COUNT_ID)
+	COUNT_ID, sizeof(THING)
  , ...)
|
- kmalloc
+ kmalloc_array
  (
-	sizeof(THING) * COUNT_ID
+	COUNT_ID, sizeof(THING)
  , ...)
|
- kmalloc
+ kmalloc_array
  (
-	sizeof(THING) * (COUNT_CONST)
+	COUNT_CONST, sizeof(THING)
  , ...)
|
- kmalloc
+ kmalloc_array
  (
-	sizeof(THING) * COUNT_CONST
+	COUNT_CONST, sizeof(THING)
  , ...)
)

// 2-factor product, only identifiers.
@@
identifier SIZE, COUNT;
@@

- kmalloc
+ kmalloc_array
  (
-	SIZE * COUNT
+	COUNT, SIZE
  , ...)

// 3-factor product with 1 sizeof(type) or sizeof(expression), with
// redundant parens removed.
@@
expression THING;
identifier STRIDE, COUNT;
type TYPE;
@@

(
  kmalloc(
-	sizeof(TYPE) * (COUNT) * (STRIDE)
+	array3_size(COUNT, STRIDE, sizeof(TYPE))
  , ...)
|
  kmalloc(
-	sizeof(TYPE) * (COUNT) * STRIDE
+	array3_size(COUNT, STRIDE, sizeof(TYPE))
  , ...)
|
  kmalloc(
-	sizeof(TYPE) * COUNT * (STRIDE)
+	array3_size(COUNT, STRIDE, sizeof(TYPE))
  , ...)
|
  kmalloc(
-	sizeof(TYPE) * COUNT * STRIDE
+	array3_size(COUNT, STRIDE, sizeof(TYPE))
  , ...)
|
  kmalloc(
-	sizeof(THING) * (COUNT) * (STRIDE)
+	array3_size(COUNT, STRIDE, sizeof(THING))
  , ...)
|
  kmalloc(
-	sizeof(THING) * (COUNT) * STRIDE
+	array3_size(COUNT, STRIDE, sizeof(THING))
  , ...)
|
  kmalloc(
-	sizeof(THING) * COUNT * (STRIDE)
+	array3_size(COUNT, STRIDE, sizeof(THING))
  , ...)
|
  kmalloc(
-	sizeof(THING) * COUNT * STRIDE
+	array3_size(COUNT, STRIDE, sizeof(THING))
  , ...)
)

// 3-factor product with 2 sizeof(variable), with redundant parens removed.
@@
expression THING1, THING2;
identifier COUNT;
type TYPE1, TYPE2;
@@

(
  kmalloc(
-	sizeof(TYPE1) * sizeof(TYPE2) * COUNT
+	array3_size(COUNT, sizeof(TYPE1), sizeof(TYPE2))
  , ...)
|
  kmalloc(
-	sizeof(TYPE1) * sizeof(THING2) * (COUNT)
+	array3_size(COUNT, sizeof(TYPE1), sizeof(TYPE2))
  , ...)
|
  kmalloc(
-	sizeof(THING1) * sizeof(THING2) * COUNT
+	array3_size(COUNT, sizeof(THING1), sizeof(THING2))
  , ...)
|
  kmalloc(
-	sizeof(THING1) * sizeof(THING2) * (COUNT)
+	array3_size(COUNT, sizeof(THING1), sizeof(THING2))
  , ...)
|
  kmalloc(
-	sizeof(TYPE1) * sizeof(THING2) * COUNT
+	array3_size(COUNT, sizeof(TYPE1), sizeof(THING2))
  , ...)
|
  kmalloc(
-	sizeof(TYPE1) * sizeof(THING2) * (COUNT)
+	array3_size(COUNT, sizeof(TYPE1), sizeof(THING2))
  , ...)
)

// 3-factor product, only identifiers, with redundant parens removed.
@@
identifier STRIDE, SIZE, COUNT;
@@

(
  kmalloc(
-	(COUNT) * STRIDE * SIZE
+	array3_size(COUNT, STRIDE, SIZE)
  , ...)
|
  kmalloc(
-	COUNT * (STRIDE) * SIZE
+	array3_size(COUNT, STRIDE, SIZE)
  , ...)
|
  kmalloc(
-	COUNT * STRIDE * (SIZE)
+	array3_size(COUNT, STRIDE, SIZE)
  , ...)
|
  kmalloc(
-	(COUNT) * (STRIDE) * SIZE
+	array3_size(COUNT, STRIDE, SIZE)
  , ...)
|
  kmalloc(
-	COUNT * (STRIDE) * (SIZE)
+	array3_size(COUNT, STRIDE, SIZE)
  , ...)
|
  kmalloc(
-	(COUNT) * STRIDE * (SIZE)
+	array3_size(COUNT, STRIDE, SIZE)
  , ...)
|
  kmalloc(
-	(COUNT) * (STRIDE) * (SIZE)
+	array3_size(COUNT, STRIDE, SIZE)
  , ...)
|
  kmalloc(
-	COUNT * STRIDE * SIZE
+	array3_size(COUNT, STRIDE, SIZE)
  , ...)
)

// Any remaining multi-factor products, first at least 3-factor products,
// when they're not all constants...
@@
expression E1, E2, E3;
constant C1, C2, C3;
@@

(
  kmalloc(C1 * C2 * C3, ...)
|
  kmalloc(
-	(E1) * E2 * E3
+	array3_size(E1, E2, E3)
  , ...)
|
  kmalloc(
-	(E1) * (E2) * E3
+	array3_size(E1, E2, E3)
  , ...)
|
  kmalloc(
-	(E1) * (E2) * (E3)
+	array3_size(E1, E2, E3)
  , ...)
|
  kmalloc(
-	E1 * E2 * E3
+	array3_size(E1, E2, E3)
  , ...)
)

// And then all remaining 2 factors products when they're not all constants,
// keeping sizeof() as the second factor argument.
@@
expression THING, E1, E2;
type TYPE;
constant C1, C2, C3;
@@

(
  kmalloc(sizeof(THING) * C2, ...)
|
  kmalloc(sizeof(TYPE) * C2, ...)
|
  kmalloc(C1 * C2 * C3, ...)
|
  kmalloc(C1 * C2, ...)
|
- kmalloc
+ kmalloc_array
  (
-	sizeof(TYPE) * (E2)
+	E2, sizeof(TYPE)
  , ...)
|
- kmalloc
+ kmalloc_array
  (
-	sizeof(TYPE) * E2
+	E2, sizeof(TYPE)
  , ...)
|
- kmalloc
+ kmalloc_array
  (
-	sizeof(THING) * (E2)
+	E2, sizeof(THING)
  , ...)
|
- kmalloc
+ kmalloc_array
  (
-	sizeof(THING) * E2
+	E2, sizeof(THING)
  , ...)
|
- kmalloc
+ kmalloc_array
  (
-	(E1) * E2
+	E1, E2
  , ...)
|
- kmalloc
+ kmalloc_array
  (
-	(E1) * (E2)
+	E1, E2
  , ...)
|
- kmalloc
+ kmalloc_array
  (
-	E1 * E2
+	E1, E2
  , ...)
)

Signed-off-by: Kees Cook <keescook@chromium.org>
2018-06-12 16:19:22 -07:00
Jiang Biao
9ee93ba3c4 mbcache: make sure c_entry_count is not decremented past zero
Signed-off-by: Jiang Biao <jiang.biao2@zte.com.cn>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
CC: Eric Biggers <ebiggers@google.com>
CC: Andrew Morton <akpm@linux-foundation.org>
CC: Jan Kara <jack@suse.cz>
2018-01-09 23:57:52 -05:00
Eric Biggers
bbe45d2460 mbcache: revert "fs/mbcache.c: make count_objects() more robust"
This reverts commit d5dabd6339.

This patch did absolutely nothing, because ->c_entry_count is unsigned.

In addition if there is a bug in how mbcache maintains its entry count,
it needs to be fixed, not just hacked around.  (There is no obvious bug,
though.)

Cc: Jan Kara <jack@suse.cz>
Cc: Jiang Biao <jiang.biao2@zte.com.cn>
Signed-off-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
2018-01-07 16:35:20 -05:00
Alexander Potapenko
3876bbe27d mbcache: initialize entry->e_referenced in mb_cache_entry_create()
KMSAN reported use of uninitialized |entry->e_referenced| in a condition
in mb_cache_shrink():

==================================================================
BUG: KMSAN: use of uninitialized memory in mb_cache_shrink+0x3b4/0xc50 fs/mbcache.c:287
CPU: 2 PID: 816 Comm: kswapd1 Not tainted 4.11.0-rc5+ #2877
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs
01/01/2011
Call Trace:
 __dump_stack lib/dump_stack.c:16 [inline]
 dump_stack+0x172/0x1c0 lib/dump_stack.c:52
 kmsan_report+0x12a/0x180 mm/kmsan/kmsan.c:927
 __msan_warning_32+0x61/0xb0 mm/kmsan/kmsan_instr.c:469
 mb_cache_shrink+0x3b4/0xc50 fs/mbcache.c:287
 mb_cache_scan+0x67/0x80 fs/mbcache.c:321
 do_shrink_slab mm/vmscan.c:397 [inline]
 shrink_slab+0xc3d/0x12d0 mm/vmscan.c:500
 shrink_node+0x208f/0x2fd0 mm/vmscan.c:2603
 kswapd_shrink_node mm/vmscan.c:3172 [inline]
 balance_pgdat mm/vmscan.c:3289 [inline]
 kswapd+0x160f/0x2850 mm/vmscan.c:3478
 kthread+0x46c/0x5f0 kernel/kthread.c:230
 ret_from_fork+0x29/0x40 arch/x86/entry/entry_64.S:430
chained origin:
 save_stack_trace+0x37/0x40 arch/x86/kernel/stacktrace.c:59
 kmsan_save_stack_with_flags mm/kmsan/kmsan.c:302 [inline]
 kmsan_save_stack mm/kmsan/kmsan.c:317 [inline]
 kmsan_internal_chain_origin+0x12a/0x1f0 mm/kmsan/kmsan.c:547
 __msan_store_shadow_origin_1+0xac/0x110 mm/kmsan/kmsan_instr.c:257
 mb_cache_entry_create+0x3b3/0xc60 fs/mbcache.c:95
 ext4_xattr_cache_insert fs/ext4/xattr.c:1647 [inline]
 ext4_xattr_block_set+0x4c82/0x5530 fs/ext4/xattr.c:1022
 ext4_xattr_set_handle+0x1332/0x20a0 fs/ext4/xattr.c:1252
 ext4_xattr_set+0x4d2/0x680 fs/ext4/xattr.c:1306
 ext4_xattr_trusted_set+0x8d/0xa0 fs/ext4/xattr_trusted.c:36
 __vfs_setxattr+0x703/0x790 fs/xattr.c:149
 __vfs_setxattr_noperm+0x27a/0x6f0 fs/xattr.c:180
 vfs_setxattr fs/xattr.c:223 [inline]
 setxattr+0x6ae/0x790 fs/xattr.c:449
 path_setxattr+0x1eb/0x380 fs/xattr.c:468
 SYSC_lsetxattr+0x8d/0xb0 fs/xattr.c:490
 SyS_lsetxattr+0x77/0xa0 fs/xattr.c:486
 entry_SYSCALL_64_fastpath+0x13/0x94
origin:
 save_stack_trace+0x37/0x40 arch/x86/kernel/stacktrace.c:59
 kmsan_save_stack_with_flags mm/kmsan/kmsan.c:302 [inline]
 kmsan_internal_poison_shadow+0xb1/0x1a0 mm/kmsan/kmsan.c:198
 kmsan_kmalloc+0x7f/0xe0 mm/kmsan/kmsan.c:337
 kmem_cache_alloc+0x1c2/0x1e0 mm/slub.c:2766
 mb_cache_entry_create+0x283/0xc60 fs/mbcache.c:86
 ext4_xattr_cache_insert fs/ext4/xattr.c:1647 [inline]
 ext4_xattr_block_set+0x4c82/0x5530 fs/ext4/xattr.c:1022
 ext4_xattr_set_handle+0x1332/0x20a0 fs/ext4/xattr.c:1252
 ext4_xattr_set+0x4d2/0x680 fs/ext4/xattr.c:1306
 ext4_xattr_trusted_set+0x8d/0xa0 fs/ext4/xattr_trusted.c:36
 __vfs_setxattr+0x703/0x790 fs/xattr.c:149
 __vfs_setxattr_noperm+0x27a/0x6f0 fs/xattr.c:180
 vfs_setxattr fs/xattr.c:223 [inline]
 setxattr+0x6ae/0x790 fs/xattr.c:449
 path_setxattr+0x1eb/0x380 fs/xattr.c:468
 SYSC_lsetxattr+0x8d/0xb0 fs/xattr.c:490
 SyS_lsetxattr+0x77/0xa0 fs/xattr.c:486
 entry_SYSCALL_64_fastpath+0x13/0x94
==================================================================

Signed-off-by: Alexander Potapenko <glider@google.com>
Signed-off-by: Eric Biggers <ebiggers@google.com>
Cc: stable@vger.kernel.org # v4.6
2018-01-07 16:22:35 -05:00
Jiang Biao
d5dabd6339 fs/mbcache.c: make count_objects() more robust
When running ltp stress test for 7*24 hours, vmscan occasionally emits
the following warning continuously:

  mb_cache_scan+0x0/0x3f0 negative objects to delete
  nr=-9232265467809300450
  ...

Tracing shows the freeable(mb_cache_count returns) is -1, which causes
the continuous accumulation and overflow of total_scan.

This patch makes sure that mb_cache_count() cannot return a negative
value, which makes the mbcache shrinker more robust.

Link: http://lkml.kernel.org/r/1511753419-52328-1-git-send-email-jiang.biao2@zte.com.cn
Signed-off-by: Jiang Biao <jiang.biao2@zte.com.cn>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: <zhong.weidong@zte.com.cn>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2017-11-29 18:40:43 -08:00
Tahsin Erdogan
dec214d00e ext4: xattr inode deduplication
Ext4 now supports xattr values that are up to 64k in size (vfs limit).
Large xattr values are stored in external inodes each one holding a
single value. Once written the data blocks of these inodes are immutable.

The real world use cases are expected to have a lot of value duplication
such as inherited acls etc. To reduce data duplication on disk, this patch
implements a deduplicator that allows sharing of xattr inodes.

The deduplication is based on an in-memory hash lookup that is a best
effort sharing scheme. When a xattr inode is read from disk (i.e.
getxattr() call), its crc32c hash is added to a hash table. Before
creating a new xattr inode for a value being set, the hash table is
checked to see if an existing inode holds an identical value. If such an
inode is found, the ref count on that inode is incremented. On value
removal the ref count is decremented and if it reaches zero the inode is
deleted.

The quota charging for such inodes is manually managed. Every reference
holder is charged the full size as if there was no sharing happening.
This is consistent with how xattr blocks are also charged.

[ Fixed up journal credits calculation to handle inline data and the
  rare case where an shared xattr block can get freed when two thread
  race on breaking the xattr block sharing. --tytso ]

Signed-off-by: Tahsin Erdogan <tahsin@google.com>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
2017-06-22 11:44:55 -04:00
Tahsin Erdogan
c07dfcb458 mbcache: make mbcache naming more generic
Make names more generic so that mbcache usage is not limited to
block sharing. In a subsequent patch in the series
("ext4: xattr inode deduplication"), we start using the mbcache code
for sharing xattr inodes. With that patch, old mb_cache_entry.e_block
field could be holding either a block number or an inode number.

Signed-off-by: Tahsin Erdogan <tahsin@google.com>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
2017-06-22 10:29:53 -04:00
Eric Biggers
b649668c0b mbcache: document that "find" functions only return reusable entries
mb_cache_entry_find_first() and mb_cache_entry_find_next() only return
cache entries with the 'e_reusable' bit set.  This should be documented.

Signed-off-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Reviewed-by: Jan Kara <jack@suse.cz>
2016-12-03 15:55:01 -05:00
Eric Biggers
132d4e2d55 mbcache: use consistent type for entry count
mbcache used several different types to represent the number of entries
in the cache.  For consistency within mbcache and with the shrinker API,
always use unsigned long.

This does not change behavior for current mbcache users (ext2 and ext4)
since they limit the entry count to a value which easily fits in an int.

Signed-off-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Reviewed-by: Jan Kara <jack@suse.cz>
2016-12-03 15:43:48 -05:00
Eric Biggers
97c7b18a5d mbcache: remove unnecessary module_get/module_put
When mbcache is built as a module, any modules that use it (ext2 and/or
ext4) will depend on its symbols directly, incrementing its reference
count.  Therefore, there is no need to do module_get/module_put.

Also note that since the module_get/module_put were in the mbcache
module itself, executing those lines of code was already dependent on
another reference to the mbcache module being held.

Signed-off-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Reviewed-by: Jan Kara <jack@suse.cz>
2016-12-03 15:38:29 -05:00
Eric Biggers
21d0f4fa8e mbcache: don't BUG() if entry cache cannot be allocated
mbcache can be a module that is loaded long after startup, when someone
asks to mount an ext2 or ext4 filesystem.  Therefore it should not BUG()
if kmem_cache_create() fails, but rather just fail the module load.

Signed-off-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Reviewed-by: Jan Kara <jack@suse.cz>
2016-12-03 15:28:53 -05:00
Eric Biggers
918b7306ed mbcache: correctly handle 'e_referenced' bit
mbcache entries have an 'e_referenced' bit which users can set with
mb_cache_entry_touch() to indicate that an entry should be given another
pass through the LRU list before the shrinker can delete it.  However,
mb_cache_shrink() actually would, when seeing an e_referenced entry at
the front of the list (the least-recently used end), place it right at
the front of the list again.  The next iteration would then remove the
entry from the list and delete it.  Consequently, e_referenced had
essentially no effect, so ext2/ext4 xattr blocks would sometimes not be
reused as often as expected.

Fix this by making the shrinker move e_referenced entries to the back of
the list rather than the front.

Signed-off-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Reviewed-by: Jan Kara <jack@suse.cz>
2016-12-03 15:13:15 -05:00
Chao Yu
8913f343cd mbcache: fix to detect failure of register_shrinker
register_shrinker in mb_cache_create may fail due to no memory. This
patch fixes to do the check of return value of register_shrinker and
handle the error case, otherwise mb_cache_create may return with no
error, but losing the inner shrinker.

Signed-off-by: Chao Yu <yuchao0@huawei.com>
Reviewed-by: Jan Kara <jack@suse.cz>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
2016-08-31 11:44:36 -04:00
Andreas Gruenbacher
6048c64b26 mbcache: add reusable flag to cache entries
To reduce amount of damage caused by single bad block, we limit number
of inodes sharing an xattr block to 1024. Thus there can be more xattr
blocks with the same contents when there are lots of files with the same
extended attributes. These xattr blocks naturally result in hash
collisions and can form long hash chains and we unnecessarily check each
such block only to find out we cannot use it because it is already
shared by too many inodes.

Add a reusable flag to cache entries which is cleared when a cache entry
has reached its maximum refcount.  Cache entries which are not marked
reusable are skipped by mb_cache_entry_find_{first,next}. This
significantly speeds up mbcache when there are many same xattr blocks.
For example for xattr-bench with 5 values and each process handling
20000 files, the run for 64 processes is 25x faster with this patch.
Even for 8 processes the speedup is almost 3x. We have also verified
that for situations where there is only one xattr block of each kind,
the patch doesn't have a measurable cost.

[JK: Remove handling of setting the same value since it is not needed
anymore, check for races in e_reusable setting, improve changelog,
add measurements]

Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
2016-02-22 22:44:04 -05:00
Andreas Gruenbacher
dc8d5e565f mbcache: get rid of _e_hash_list_head
Get rid of field _e_hash_list_head in cache entries and add bit field
e_referenced instead.

Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
2016-02-22 22:42:05 -05:00
Jan Kara
7a2508e1b6 mbcache2: rename to mbcache
Since old mbcache code is gone, let's rename new code to mbcache since
number 2 is now meaningless. This is just a mechanical replacement.

Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
2016-02-22 22:35:22 -05:00
Jan Kara
ecd1e64412 mbcache: remove mbcache
Both ext2 and ext4 are now converted to mbcache2. Remove the old mbcache
code.

Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
2016-02-22 12:21:14 -05:00
T Makphaibulchoke
ec7756ae15 fs/mbcache: replace __builtin_log2() with ilog2()
Fix compiler error with some gcc version(s) that do not
support __builtin_log2() by replacing __builtin_log2() with
ilog2().

Signed-off-by: T. Makphaibulchoke <tmac@hp.com>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Reviewed-by: Maciej W. Rozycki <macro@linux-mips.org>
2014-06-25 22:08:29 -04:00
T Makphaibulchoke
9c191f701c ext4: each filesystem creates and uses its own mb_cache
This patch adds new interfaces to create and destory cache,
ext4_xattr_create_cache() and ext4_xattr_destroy_cache(), and remove
the cache creation and destory calls from ex4_init_xattr() and
ext4_exitxattr() in fs/ext4/xattr.c.

fs/ext4/super.c has been changed so that when a filesystem is mounted
a cache is allocated and attched to its ext4_sb_info structure.

fs/mbcache.c has been changed so that only one slab allocator is
allocated and used by all mbcache structures.

Signed-off-by: T. Makphaibulchoke <tmac@hp.com>
2014-03-18 19:24:49 -04:00
T Makphaibulchoke
1f3e55fe02 fs/mbcache.c: doucple the locking of local from global data
The patch increases the parallelism of mbcache by using the built-in
lock in the hlist_bl_node to protect the mb_cache's local block and
index hash chains.  The global data mb_cache_lru_list and
mb_cache_list continue to be protected by the global
mb_cache_spinlock.

New block group spinlock, mb_cache_bg_lock is also added to serialize
accesses to mb_cache_entry's local data.

A new member e_refcnt is added to the mb_cache_entry structure to help
preventing an mb_cache_entry from being deallocated by a free while it
is being referenced by either mb_cache_entry_get() or
mb_cache_entry_find().

Signed-off-by: T. Makphaibulchoke <tmac@hp.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
2014-03-18 19:23:20 -04:00
T Makphaibulchoke
3e037e5211 fs/mbcache.c: change block and index hash chain to hlist_bl_node
This patch changes each mb_cache's both block and index hash chains to
use a hlist_bl_node, which contains a built-in lock.  This is the
first step in decoupling of locks serializing accesses to mb_cache
global data and each mb_cache_entry local data.

Signed-off-by: T. Makphaibulchoke <tmac@hp.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
2014-03-18 19:19:41 -04:00
Dave Chinner
1ab6c4997e fs: convert fs shrinkers to new scan/count API
Convert the filesystem shrinkers to use the new API, and standardise some
of the behaviours of the shrinkers at the same time.  For example,
nr_to_scan means the number of objects to scan, not the number of objects
to free.

I refactored the CIFS idmap shrinker a little - it really needs to be
broken up into a shrinker per tree and keep an item count with the tree
root so that we don't need to walk the tree every time the shrinker needs
to count the number of objects in the tree (i.e.  all the time under
memory pressure).

[glommer@openvz.org: fixes for ext4, ubifs, nfs, cifs and glock. Fixes are needed mainly due to new code merged in the tree]
[assorted fixes folded in]
Signed-off-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Glauber Costa <glommer@openvz.org>
Acked-by: Mel Gorman <mgorman@suse.de>
Acked-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Acked-by: Jan Kara <jack@suse.cz>
Acked-by: Steven Whitehouse <swhiteho@redhat.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: "Theodore Ts'o" <tytso@mit.edu>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Cc: Arve Hjønnevåg <arve@android.com>
Cc: Carlos Maiolino <cmaiolino@redhat.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Chuck Lever <chuck.lever@oracle.com>
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
Cc: David Rientjes <rientjes@google.com>
Cc: Gleb Natapov <gleb@redhat.com>
Cc: Greg Thelen <gthelen@google.com>
Cc: J. Bruce Fields <bfields@redhat.com>
Cc: Jan Kara <jack@suse.cz>
Cc: Jerome Glisse <jglisse@redhat.com>
Cc: John Stultz <john.stultz@linaro.org>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Kent Overstreet <koverstreet@google.com>
Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Steven Whitehouse <swhiteho@redhat.com>
Cc: Thomas Hellstrom <thellstrom@vmware.com>
Cc: Trond Myklebust <Trond.Myklebust@netapp.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
2013-09-10 18:56:31 -04:00
Glauber Costa
55f841ce93 super: fix calculation of shrinkable objects for small numbers
The sysctl knob sysctl_vfs_cache_pressure is used to determine which
percentage of the shrinkable objects in our cache we should actively try
to shrink.

It works great in situations in which we have many objects (at least more
than 100), because the aproximation errors will be negligible.  But if
this is not the case, specially when total_objects < 100, we may end up
concluding that we have no objects at all (total / 100 = 0, if total <
100).

This is certainly not the biggest killer in the world, but may matter in
very low kernel memory situations.

Signed-off-by: Glauber Costa <glommer@openvz.org>
Reviewed-by: Carlos Maiolino <cmaiolino@redhat.com>
Acked-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Acked-by: Mel Gorman <mgorman@suse.de>
Cc: Dave Chinner <david@fromorbit.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: "Theodore Ts'o" <tytso@mit.edu>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Cc: Arve Hjønnevåg <arve@android.com>
Cc: Carlos Maiolino <cmaiolino@redhat.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Chuck Lever <chuck.lever@oracle.com>
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
Cc: David Rientjes <rientjes@google.com>
Cc: Gleb Natapov <gleb@redhat.com>
Cc: Greg Thelen <gthelen@google.com>
Cc: J. Bruce Fields <bfields@redhat.com>
Cc: Jan Kara <jack@suse.cz>
Cc: Jerome Glisse <jglisse@redhat.com>
Cc: John Stultz <john.stultz@linaro.org>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Kent Overstreet <koverstreet@google.com>
Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Steven Whitehouse <swhiteho@redhat.com>
Cc: Thomas Hellstrom <thellstrom@vmware.com>
Cc: Trond Myklebust <Trond.Myklebust@netapp.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
2013-09-10 18:56:29 -04:00
Ying Han
1495f230fa vmscan: change shrinker API by passing shrink_control struct
Change each shrinker's API by consolidating the existing parameters into
shrink_control struct.  This will simplify any further features added w/o
touching each file of shrinker.

[akpm@linux-foundation.org: fix build]
[akpm@linux-foundation.org: fix warning]
[kosaki.motohiro@jp.fujitsu.com: fix up new shrinker API]
[akpm@linux-foundation.org: fix xfs warning]
[akpm@linux-foundation.org: update gfs2]
Signed-off-by: Ying Han <yinghan@google.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Minchan Kim <minchan.kim@gmail.com>
Acked-by: Pavel Emelyanov <xemul@openvz.org>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Mel Gorman <mel@csn.ul.ie>
Acked-by: Rik van Riel <riel@redhat.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Hugh Dickins <hughd@google.com>
Cc: Dave Hansen <dave@linux.vnet.ibm.com>
Cc: Steven Whitehouse <swhiteho@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2011-05-25 08:39:26 -07:00