Move invalidate_bdev, block_sync_page into fs/block_dev.c. Export
kill_bdev as well, so brd doesn't have to open code it. Reduce
buffer_head.h requirement accordingly.
Removed a rather large comment from invalidate_bdev, as it looked a bit
obsolete to bother moving. The small comment replacing it says enough.
Signed-off-by: Nick Piggin <npiggin@suse.de>
Cc: Al Viro <viro@ZenIV.linux.org.uk>
Cc: Christoph Hellwig <hch@lst.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Add calls to path-based security hooks into CacheFiles as, unlike inode-based
security, these aren't implicit in the vfs_mkdir() and similar calls.
Reported-by: Tetsuo Handa <penguin-kernel@i-love.sakura.ne.jp>
Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: James Morris <jmorris@namei.org>
All file_operations should get a .llseek operation so we can make
nonseekable_open the default for future file operations without a
.llseek pointer.
The three cases that we can automatically detect are no_llseek, seq_lseek
and default_llseek. For cases where we can we can automatically prove that
the file offset is always ignored, we use noop_llseek, which maintains
the current behavior of not returning an error from a seek.
New drivers should normally not use noop_llseek but instead use no_llseek
and call nonseekable_open at open time. Existing drivers can be converted
to do the same when the maintainer knows for certain that no user code
relies on calling seek on the device file.
The generated code is often incorrectly indented and right now contains
comments that clarify for each added line why a specific variant was
chosen. In the version that gets submitted upstream, the comments will
be gone and I will manually fix the indentation, because there does not
seem to be a way to do that using coccinelle.
Some amount of new code is currently sitting in linux-next that should get
the same modifications, which I will do at the end of the merge window.
Many thanks to Julia Lawall for helping me learn to write a semantic
patch that does all this.
===== begin semantic patch =====
// This adds an llseek= method to all file operations,
// as a preparation for making no_llseek the default.
//
// The rules are
// - use no_llseek explicitly if we do nonseekable_open
// - use seq_lseek for sequential files
// - use default_llseek if we know we access f_pos
// - use noop_llseek if we know we don't access f_pos,
// but we still want to allow users to call lseek
//
@ open1 exists @
identifier nested_open;
@@
nested_open(...)
{
<+...
nonseekable_open(...)
...+>
}
@ open exists@
identifier open_f;
identifier i, f;
identifier open1.nested_open;
@@
int open_f(struct inode *i, struct file *f)
{
<+...
(
nonseekable_open(...)
|
nested_open(...)
)
...+>
}
@ read disable optional_qualifier exists @
identifier read_f;
identifier f, p, s, off;
type ssize_t, size_t, loff_t;
expression E;
identifier func;
@@
ssize_t read_f(struct file *f, char *p, size_t s, loff_t *off)
{
<+...
(
*off = E
|
*off += E
|
func(..., off, ...)
|
E = *off
)
...+>
}
@ read_no_fpos disable optional_qualifier exists @
identifier read_f;
identifier f, p, s, off;
type ssize_t, size_t, loff_t;
@@
ssize_t read_f(struct file *f, char *p, size_t s, loff_t *off)
{
... when != off
}
@ write @
identifier write_f;
identifier f, p, s, off;
type ssize_t, size_t, loff_t;
expression E;
identifier func;
@@
ssize_t write_f(struct file *f, const char *p, size_t s, loff_t *off)
{
<+...
(
*off = E
|
*off += E
|
func(..., off, ...)
|
E = *off
)
...+>
}
@ write_no_fpos @
identifier write_f;
identifier f, p, s, off;
type ssize_t, size_t, loff_t;
@@
ssize_t write_f(struct file *f, const char *p, size_t s, loff_t *off)
{
... when != off
}
@ fops0 @
identifier fops;
@@
struct file_operations fops = {
...
};
@ has_llseek depends on fops0 @
identifier fops0.fops;
identifier llseek_f;
@@
struct file_operations fops = {
...
.llseek = llseek_f,
...
};
@ has_read depends on fops0 @
identifier fops0.fops;
identifier read_f;
@@
struct file_operations fops = {
...
.read = read_f,
...
};
@ has_write depends on fops0 @
identifier fops0.fops;
identifier write_f;
@@
struct file_operations fops = {
...
.write = write_f,
...
};
@ has_open depends on fops0 @
identifier fops0.fops;
identifier open_f;
@@
struct file_operations fops = {
...
.open = open_f,
...
};
// use no_llseek if we call nonseekable_open
////////////////////////////////////////////
@ nonseekable1 depends on !has_llseek && has_open @
identifier fops0.fops;
identifier nso ~= "nonseekable_open";
@@
struct file_operations fops = {
... .open = nso, ...
+.llseek = no_llseek, /* nonseekable */
};
@ nonseekable2 depends on !has_llseek @
identifier fops0.fops;
identifier open.open_f;
@@
struct file_operations fops = {
... .open = open_f, ...
+.llseek = no_llseek, /* open uses nonseekable */
};
// use seq_lseek for sequential files
/////////////////////////////////////
@ seq depends on !has_llseek @
identifier fops0.fops;
identifier sr ~= "seq_read";
@@
struct file_operations fops = {
... .read = sr, ...
+.llseek = seq_lseek, /* we have seq_read */
};
// use default_llseek if there is a readdir
///////////////////////////////////////////
@ fops1 depends on !has_llseek && !nonseekable1 && !nonseekable2 && !seq @
identifier fops0.fops;
identifier readdir_e;
@@
// any other fop is used that changes pos
struct file_operations fops = {
... .readdir = readdir_e, ...
+.llseek = default_llseek, /* readdir is present */
};
// use default_llseek if at least one of read/write touches f_pos
/////////////////////////////////////////////////////////////////
@ fops2 depends on !fops1 && !has_llseek && !nonseekable1 && !nonseekable2 && !seq @
identifier fops0.fops;
identifier read.read_f;
@@
// read fops use offset
struct file_operations fops = {
... .read = read_f, ...
+.llseek = default_llseek, /* read accesses f_pos */
};
@ fops3 depends on !fops1 && !fops2 && !has_llseek && !nonseekable1 && !nonseekable2 && !seq @
identifier fops0.fops;
identifier write.write_f;
@@
// write fops use offset
struct file_operations fops = {
... .write = write_f, ...
+ .llseek = default_llseek, /* write accesses f_pos */
};
// Use noop_llseek if neither read nor write accesses f_pos
///////////////////////////////////////////////////////////
@ fops4 depends on !fops1 && !fops2 && !fops3 && !has_llseek && !nonseekable1 && !nonseekable2 && !seq @
identifier fops0.fops;
identifier read_no_fpos.read_f;
identifier write_no_fpos.write_f;
@@
// write fops use offset
struct file_operations fops = {
...
.write = write_f,
.read = read_f,
...
+.llseek = noop_llseek, /* read and write both use no f_pos */
};
@ depends on has_write && !has_read && !fops1 && !fops2 && !has_llseek && !nonseekable1 && !nonseekable2 && !seq @
identifier fops0.fops;
identifier write_no_fpos.write_f;
@@
struct file_operations fops = {
... .write = write_f, ...
+.llseek = noop_llseek, /* write uses no f_pos */
};
@ depends on has_read && !has_write && !fops1 && !fops2 && !has_llseek && !nonseekable1 && !nonseekable2 && !seq @
identifier fops0.fops;
identifier read_no_fpos.read_f;
@@
struct file_operations fops = {
... .read = read_f, ...
+.llseek = noop_llseek, /* read uses no f_pos */
};
@ depends on !has_read && !has_write && !fops1 && !fops2 && !has_llseek && !nonseekable1 && !nonseekable2 && !seq @
identifier fops0.fops;
@@
struct file_operations fops = {
...
+.llseek = noop_llseek, /* no read or write fn */
};
===== End semantic patch =====
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Cc: Julia Lawall <julia@diku.dk>
Cc: Christoph Hellwig <hch@infradead.org>
Add a dummy printk function for the maintenance of unused printks through gcc
format checking, and also so that side-effect checking is maintained too.
Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Add three helpers that retrieve a refcounted copy of the root and cwd
from the supplied fs_struct.
get_fs_root()
get_fs_pwd()
get_fs_root_and_pwd()
Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs-2.6: (96 commits)
no need for list_for_each_entry_safe()/resetting with superblock list
Fix sget() race with failing mount
vfs: don't hold s_umount over close_bdev_exclusive() call
sysv: do not mark superblock dirty on remount
sysv: do not mark superblock dirty on mount
btrfs: remove junk sb_dirt change
BFS: clean up the superblock usage
AFFS: wait for sb synchronization when needed
AFFS: clean up dirty flag usage
cifs: truncate fallout
mbcache: fix shrinker function return value
mbcache: Remove unused features
add f_flags to struct statfs(64)
pass a struct path to vfs_statfs
update VFS documentation for method changes.
All filesystems that need invalidate_inode_buffers() are doing that explicitly
convert remaining ->clear_inode() to ->evict_inode()
Make ->drop_inode() just return whether inode needs to be dropped
fs/inode.c:clear_inode() is gone
fs/inode.c:evict() doesn't care about delete vs. non-delete paths now
...
Fix up trivial conflicts in fs/nilfs2/super.c
We'll need the path to implement the flags field for statvfs support.
We do have it available in all callers except:
- ecryptfs_statfs. This one doesn't actually need vfs_statfs but just
needs to do a caller to the lower filesystem statfs method.
- sys_ustat. Add a non-exported statfs_by_dentry helper for it which
doesn't won't be able to fill out the flags field later on.
In addition rename the helpers for statfs vs fstatfs to do_*statfs instead
of the misleading vfs prefix.
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Make fscache operation to use only workqueue instead of combination of
workqueue and slow-work. FSCACHE_OP_SLOW is dropped and
FSCACHE_OP_FAST is renamed to FSCACHE_OP_ASYNC and uses newly added
fscache_op_wq workqueue to execute op->processor().
fscache_operation_init_slow() is dropped and fscache_operation_init()
now takes @processor argument directly.
* Unbound workqueue is used.
* fscache_retrieval_work() is no longer necessary as OP_ASYNC now does
the equivalent thing.
* sysctl fscache.operation_max_active added to control concurrency.
The default value is nr_cpus clamped between 2 and
WQ_UNBOUND_MAX_ACTIVE.
* debugfs support is dropped for now. Tracing API based debug
facility is planned to be added.
Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: David Howells <dhowells@redhat.com>
Make fscache object state transition callbacks use workqueue instead
of slow-work. New dedicated unbound CPU workqueue fscache_object_wq
is created. get/put callbacks are renamed and modified to take
@object and called directly from the enqueue wrapper and the work
function. While at it, make all open coded instances of get/put to
use fscache_get/put_object().
* Unbound workqueue is used.
* work_busy() output is printed instead of slow-work flags in object
debugging outputs. They mean basically the same thing bit-for-bit.
* sysctl fscache.object_max_active added to control concurrency. The
default value is nr_cpus clamped between 4 and
WQ_UNBOUND_MAX_ACTIVE.
* slow_work_sleep_till_thread_needed() is replaced with fscache
private implementation fscache_object_sleep_till_congested() which
waits on fscache_object_wq congestion.
* debugfs support is dropped for now. Tracing API based debug
facility is planned to be added.
Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: David Howells <dhowells@redhat.com>
cachefiles_determine_cache_security() is expected to return with a
security override in place. However, if set_create_files_as() fails, we
fail to do this. In this case, we should just reinstate the security
override that was set by the caller.
Furthermore, if set_create_files_as() fails, we should dispose of the
new credentials we were in the process of creating.
Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Fix an occasional EIO returned by a call to vfs_unlink():
[ 4868.465413] CacheFiles: I/O Error: Unlink failed
[ 4868.465444] FS-Cache: Cache cachefiles stopped due to I/O error
[ 4947.320011] CacheFiles: File cache on md3 unregistering
[ 4947.320041] FS-Cache: Withdrawing cache "mycache"
[ 5127.348683] FS-Cache: Cache "mycache" added (type cachefiles)
[ 5127.348716] CacheFiles: File cache on md3 registered
[ 7076.871081] CacheFiles: I/O Error: Unlink failed
[ 7076.871130] FS-Cache: Cache cachefiles stopped due to I/O error
[ 7116.780891] CacheFiles: File cache on md3 unregistering
[ 7116.780937] FS-Cache: Withdrawing cache "mycache"
[ 7296.813394] FS-Cache: Cache "mycache" added (type cachefiles)
[ 7296.813432] CacheFiles: File cache on md3 registered
What happens is this:
(1) A cached NFS file is seen to have become out of date, so NFS retires the
object and immediately acquires a new object with the same key.
(2) Retirement of the old object is done asynchronously - so the lookup/create
to generate the new object may be done first.
This can be a problem as the old object and the new object must exist at
the same point in the backing filesystem (i.e. they must have the same
pathname).
(3) The lookup for the new object sees that a backing file already exists,
checks to see whether it is valid and sees that it isn't. It then deletes
that file and creates a new one on disk.
(4) The retirement phase for the old file is then performed. It tries to
delete the dentry it has, but ext4_unlink() returns -EIO because the inode
attached to that dentry no longer matches the inode number associated with
the filename in the parent directory.
The trace below shows this quite well.
[md5sum] ==> __fscache_relinquish_cookie(ffff88002d12fb58{NFS.fh,ffff88002ce62100},1)
[md5sum] ==> __fscache_acquire_cookie({NFS.server},{NFS.fh},ffff88002ce62100)
NFS has retired the old cookie and asked for a new one.
[kslowd] ==> fscache_object_state_machine({OBJ52,OBJECT_ACTIVE,24})
[kslowd] <== fscache_object_state_machine() [->OBJECT_DYING]
[kslowd] ==> fscache_object_state_machine({OBJ53,OBJECT_INIT,0})
[kslowd] <== fscache_object_state_machine() [->OBJECT_LOOKING_UP]
[kslowd] ==> fscache_object_state_machine({OBJ52,OBJECT_DYING,24})
[kslowd] <== fscache_object_state_machine() [->OBJECT_RECYCLING]
The old object (OBJ52) is going through the terminal states to get rid of it,
whilst the new object - (OBJ53) - is coming into being.
[kslowd] ==> fscache_object_state_machine({OBJ53,OBJECT_LOOKING_UP,0})
[kslowd] ==> cachefiles_walk_to_object({ffff88003029d8b8},OBJ53,@68,)
[kslowd] lookup '@68'
[kslowd] next -> ffff88002ce41bd0 positive
[kslowd] advance
[kslowd] lookup 'Es0g00og0_Nd_XCYe3BOzvXrsBLMlN6aw16M1htaA'
[kslowd] next -> ffff8800369faac8 positive
The new object has looked up the subdir in which the file would be in (getting
dentry ffff88002ce41bd0) and then looked up the file itself (getting dentry
ffff8800369faac8).
[kslowd] validate 'Es0g00og0_Nd_XCYe3BOzvXrsBLMlN6aw16M1htaA'
[kslowd] ==> cachefiles_bury_object(,'@68','Es0g00og0_Nd_XCYe3BOzvXrsBLMlN6aw16M1htaA')
[kslowd] remove ffff8800369faac8 from ffff88002ce41bd0
[kslowd] unlink stale object
[kslowd] <== cachefiles_bury_object() = 0
It then checks the file's xattrs to see if it's valid. NFS says that the
auxiliary data indicate the file is out of date (obvious to us - that's why NFS
ditched the old version and got a new one). CacheFiles then deletes the old
file (dentry ffff8800369faac8).
[kslowd] redo lookup
[kslowd] lookup 'Es0g00og0_Nd_XCYe3BOzvXrsBLMlN6aw16M1htaA'
[kslowd] next -> ffff88002cd94288 negative
[kslowd] create -> ffff88002cd94288{ffff88002cdaf238{ino=148247}}
CacheFiles then redoes the lookup and gets a negative result in a new dentry
(ffff88002cd94288) which it then creates a file for.
[kslowd] ==> cachefiles_mark_object_active(,OBJ53)
[kslowd] <== cachefiles_mark_object_active() = 0
[kslowd] === OBTAINED_OBJECT ===
[kslowd] <== cachefiles_walk_to_object() = 0 [148247]
[kslowd] <== fscache_object_state_machine() [->OBJECT_AVAILABLE]
The new object is then marked active and the state machine moves to the
available state - at which point NFS can start filling the object.
[kslowd] ==> fscache_object_state_machine({OBJ52,OBJECT_RECYCLING,20})
[kslowd] ==> fscache_release_object()
[kslowd] ==> cachefiles_drop_object({OBJ52,2})
[kslowd] ==> cachefiles_delete_object(,OBJ52{ffff8800369faac8})
The old object, meanwhile, goes on with being retired. If allocation occurs
first, cachefiles_delete_object() has to wait for dir->d_inode->i_mutex to
become available before it can continue.
[kslowd] ==> cachefiles_bury_object(,'@68','Es0g00og0_Nd_XCYe3BOzvXrsBLMlN6aw16M1htaA')
[kslowd] remove ffff8800369faac8 from ffff88002ce41bd0
[kslowd] unlink stale object
EXT4-fs warning (device sda6): ext4_unlink: Inode number mismatch in unlink (148247!=148193)
CacheFiles: I/O Error: Unlink failed
FS-Cache: Cache cachefiles stopped due to I/O error
CacheFiles then tries to delete the file for the old object, but the dentry it
has (ffff8800369faac8) no longer points to a valid inode for that directory
entry, and so ext4_unlink() returns -EIO when de->inode does not match i_ino.
[kslowd] <== cachefiles_bury_object() = -5
[kslowd] <== cachefiles_delete_object() = -5
[kslowd] <== fscache_object_state_machine() [->OBJECT_DEAD]
[kslowd] ==> fscache_object_state_machine({OBJ53,OBJECT_AVAILABLE,0})
[kslowd] <== fscache_object_state_machine() [->OBJECT_ACTIVE]
(Note that the above trace includes extra information beyond that produced by
the upstream code).
The fix is to note when an object that is being retired has had its object
deleted preemptively by a replacement object that is being created, and to
skip the second removal attempt in such a case.
Reported-by: Greg M <gregm@servu.net.au>
Reported-by: Mark Moseley <moseleymark@gmail.com>
Reported-by: Romain DEGEZ <romain.degez@smartjog.com>
Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
percpu.h is included by sched.h and module.h and thus ends up being
included when building most .c files. percpu.h includes slab.h which
in turn includes gfp.h making everything defined by the two files
universally available and complicating inclusion dependencies.
percpu.h -> slab.h dependency is about to be removed. Prepare for
this change by updating users of gfp and slab facilities include those
headers directly instead of assuming availability. As this conversion
needs to touch large number of source files, the following script is
used as the basis of conversion.
http://userweb.kernel.org/~tj/misc/slabh-sweep.py
The script does the followings.
* Scan files for gfp and slab usages and update includes such that
only the necessary includes are there. ie. if only gfp is used,
gfp.h, if slab is used, slab.h.
* When the script inserts a new include, it looks at the include
blocks and try to put the new include such that its order conforms
to its surrounding. It's put in the include block which contains
core kernel includes, in the same order that the rest are ordered -
alphabetical, Christmas tree, rev-Xmas-tree or at the end if there
doesn't seem to be any matching order.
* If the script can't find a place to put a new include (mostly
because the file doesn't have fitting include block), it prints out
an error message indicating which .h file needs to be added to the
file.
The conversion was done in the following steps.
1. The initial automatic conversion of all .c files updated slightly
over 4000 files, deleting around 700 includes and adding ~480 gfp.h
and ~3000 slab.h inclusions. The script emitted errors for ~400
files.
2. Each error was manually checked. Some didn't need the inclusion,
some needed manual addition while adding it to implementation .h or
embedding .c file was more appropriate for others. This step added
inclusions to around 150 files.
3. The script was run again and the output was compared to the edits
from #2 to make sure no file was left behind.
4. Several build tests were done and a couple of problems were fixed.
e.g. lib/decompress_*.c used malloc/free() wrappers around slab
APIs requiring slab.h to be added manually.
5. The script was run on all .h files but without automatically
editing them as sprinkling gfp.h and slab.h inclusions around .h
files could easily lead to inclusion dependency hell. Most gfp.h
inclusion directives were ignored as stuff from gfp.h was usually
wildly available and often used in preprocessor macros. Each
slab.h inclusion directive was examined and added manually as
necessary.
6. percpu.h was updated not to include slab.h.
7. Build test were done on the following configurations and failures
were fixed. CONFIG_GCOV_KERNEL was turned off for all tests (as my
distributed build env didn't work with gcov compiles) and a few
more options had to be turned off depending on archs to make things
build (like ipr on powerpc/64 which failed due to missing writeq).
* x86 and x86_64 UP and SMP allmodconfig and a custom test config.
* powerpc and powerpc64 SMP allmodconfig
* sparc and sparc64 SMP allmodconfig
* ia64 SMP allmodconfig
* s390 SMP allmodconfig
* alpha SMP allmodconfig
* um on x86_64 SMP allmodconfig
8. percpu.h modifications were reverted so that it could be applied as
a separate patch and serve as bisection point.
Given the fact that I had only a couple of failures from tests on step
6, I'm fairly confident about the coverage of this conversion patch.
If there is a breakage, it's likely to be something in one of the arch
headers which should be easily discoverable easily on most builds of
the specific arch.
Signed-off-by: Tejun Heo <tj@kernel.org>
Guess-its-ok-by: Christoph Lameter <cl@linux-foundation.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Lee Schermerhorn <Lee.Schermerhorn@hp.com>
cachefiles_delete_object() can race with rename. It gets the parent directory
of the object it's asked to delete, then locks it - but rename may have changed
the object's parent between the get and the completion of the lock.
However, if such a circumstance is detected, we abandon our attempt to delete
the object - since it's no longer in the index key path, it won't be seen
again by lookups of that key. The assumption is that cachefilesd may have
culled it by renaming it to the graveyard for later destruction.
Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
* do ima_get_count() in __dentry_open()
* stop doing that in followups
* move ima_path_check() to right after nameidata_to_filp()
* don't bump counters on it
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Makes use of skip_spaces() defined in lib/string.c for removing leading
spaces from strings all over the tree.
It decreases lib.a code size by 47 bytes and reuses the function tree-wide:
text data bss dec hex filename
64688 584 592 65864 10148 (TOTALS-BEFORE)
64641 584 592 65817 10119 (TOTALS-AFTER)
Also, while at it, if we see (*str && isspace(*str)), we can be sure to
remove the first condition (*str) as the second one (isspace(*str)) also
evaluates to 0 whenever *str == 0, making it redundant. In other words,
"a char equals zero is never a space".
Julia Lawall tried the semantic patch (http://coccinelle.lip6.fr) below,
and found occurrences of this pattern on 3 more files:
drivers/leds/led-class.c
drivers/leds/ledtrig-timer.c
drivers/video/output.c
@@
expression str;
@@
( // ignore skip_spaces cases
while (*str && isspace(*str)) { \(str++;\|++str;\) }
|
- *str &&
isspace(*str)
)
Signed-off-by: André Goddard Rosa <andre.goddard@gmail.com>
Cc: Julia Lawall <julia@diku.dk>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Jeff Dike <jdike@addtoit.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Richard Purdie <rpurdie@rpsys.net>
Cc: Neil Brown <neilb@suse.de>
Cc: Kyle McMartin <kyle@mcmartin.ca>
Cc: Henrique de Moraes Holschuh <hmh@hmh.eng.br>
Cc: David Howells <dhowells@redhat.com>
Cc: <linux-ext4@vger.kernel.org>
Cc: Samuel Ortiz <samuel@sortiz.org>
Cc: Patrick McHardy <kaber@trash.net>
Cc: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
When IMA is active, using dentry_open without updating the
IMA counters will result in free/open imbalance errors when
fput is eventually called.
Signed-off-by: Marc Dionne <marc.c.dionne@gmail.com>
Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Don't log the CacheFiles lookup/create object routined failing with ENOBUFS as
under high memory load or high cache load they can do this quite a lot. This
error simply means that the requested object cannot be created on disk due to
lack of space, or due to failure of the backing filesystem to find sufficient
resources.
Signed-off-by: David Howells <dhowells@redhat.com>
Catch an overly long wait for an old, dying active object when we want to
replace it with a new one. The probability is that all the slow-work threads
are hogged, and the delete can't get a look in.
What we do instead is:
(1) if there's nothing in the slow work queue, we sleep until either the dying
object has finished dying or there is something in the slow work queue
behind which we can queue our object.
(2) if there is something in the slow work queue, we return ETIMEDOUT to
fscache_lookup_object(), which then puts us back on the slow work queue,
presumably behind the deletion that we're blocked by. We are then
deferred for a while until we work our way back through the queue -
without blocking a slow-work thread unnecessarily.
A backtrace similar to the following may appear in the log without this patch:
INFO: task kslowd004:5711 blocked for more than 120 seconds.
"echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
kslowd004 D 0000000000000000 0 5711 2 0x00000080
ffff88000340bb80 0000000000000046 ffff88002550d000 0000000000000000
ffff88002550d000 0000000000000007 ffff88000340bfd8 ffff88002550d2a8
000000000000ddf0 00000000000118c0 00000000000118c0 ffff88002550d2a8
Call Trace:
[<ffffffff81058e21>] ? trace_hardirqs_on+0xd/0xf
[<ffffffffa011c4d8>] ? cachefiles_wait_bit+0x0/0xd [cachefiles]
[<ffffffffa011c4e1>] cachefiles_wait_bit+0x9/0xd [cachefiles]
[<ffffffff81353153>] __wait_on_bit+0x43/0x76
[<ffffffff8111ae39>] ? ext3_xattr_get+0x1ec/0x270
[<ffffffff813531ef>] out_of_line_wait_on_bit+0x69/0x74
[<ffffffffa011c4d8>] ? cachefiles_wait_bit+0x0/0xd [cachefiles]
[<ffffffff8104c125>] ? wake_bit_function+0x0/0x2e
[<ffffffffa011bc79>] cachefiles_mark_object_active+0x203/0x23b [cachefiles]
[<ffffffffa011c209>] cachefiles_walk_to_object+0x558/0x827 [cachefiles]
[<ffffffffa011a429>] cachefiles_lookup_object+0xac/0x12a [cachefiles]
[<ffffffffa00aa1e9>] fscache_lookup_object+0x1c7/0x214 [fscache]
[<ffffffffa00aafc5>] fscache_object_state_machine+0xa5/0x52d [fscache]
[<ffffffffa00ab4ac>] fscache_object_slow_work_execute+0x5f/0xa0 [fscache]
[<ffffffff81082093>] slow_work_execute+0x18f/0x2d1
[<ffffffff8108239a>] slow_work_thread+0x1c5/0x308
[<ffffffff8104c0f1>] ? autoremove_wake_function+0x0/0x34
[<ffffffff810821d5>] ? slow_work_thread+0x0/0x308
[<ffffffff8104be91>] kthread+0x7a/0x82
[<ffffffff8100beda>] child_rip+0xa/0x20
[<ffffffff8100b87c>] ? restore_args+0x0/0x30
[<ffffffff8104be17>] ? kthread+0x0/0x82
[<ffffffff8100bed0>] ? child_rip+0x0/0x20
1 lock held by kslowd004/5711:
#0: (&sb->s_type->i_mutex_key#7/1){+.+.+.}, at: [<ffffffffa011be64>] cachefiles_walk_to_object+0x1b3/0x827 [cachefiles]
Signed-off-by: David Howells <dhowells@redhat.com>
Show more debugging information if cachefiles_mark_object_active() is asked to
activate an active object.
This may happen, for instance, if the netfs tries to register an object with
the same key multiple times.
The code is changed to (a) get the appropriate object lock to protect the
cookie pointer whilst we dereference it, and (b) get and display the cookie key
if available.
Signed-off-by: David Howells <dhowells@redhat.com>