You've already forked linux-apfs
mirror of
https://github.com/linux-apfs/linux-apfs.git
synced 2026-05-01 15:00:59 -07:00
4e717f5c10
The list_lru implementation has one function, list_lru_dispose_all, with only one user (the dentry code). At first, such function appears to make sense because we are really not interested in the result of isolating each dentry separately - all of them are going away anyway. However, it's implementation is buggy in the following way: When we call list_lru_dispose_all in fs/dcache.c, we scan all dentries marking them with DCACHE_SHRINK_LIST. However, this is done without the nlru->lock taken. The imediate result of that is that someone else may add or remove the dentry from the LRU at the same time. When list_lru_del happens in that scenario we will see an element that is not yet marked with DCACHE_SHRINK_LIST (even though it will be in the future) and obviously remove it from an lru where the element no longer is. Since list_lru_dispose_all will in effect count down nlru's nr_items and list_lru_del will do the same, this will lead to an imbalance. The solution for this would not be so simple: we can obviously just keep the lru_lock taken, but then we have no guarantees that we will be able to acquire the dentry lock (dentry->d_lock). To properly solve this, we need a communication mechanism between the lru and dentry code, so they can coordinate this with each other. Such mechanism already exists in the form of the list_lru_walk_cb callback. So it is possible to construct a dcache-side prune function that does the right thing only by calling list_lru_walk in a loop until no more dentries are available. With only one user, plus the fact that a sane solution for the problem would involve boucing between dcache and list_lru anyway, I see little justification to keep the special case list_lru_dispose_all in tree. Signed-off-by: Glauber Costa <glommer@openvz.org> Cc: Michal Hocko <mhocko@suse.cz> Acked-by: Dave Chinner <dchinner@redhat.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
141 lines
4.8 KiB
C
141 lines
4.8 KiB
C
/*
|
|
* Copyright (c) 2013 Red Hat, Inc. and Parallels Inc. All rights reserved.
|
|
* Authors: David Chinner and Glauber Costa
|
|
*
|
|
* Generic LRU infrastructure
|
|
*/
|
|
#ifndef _LRU_LIST_H
|
|
#define _LRU_LIST_H
|
|
|
|
#include <linux/list.h>
|
|
#include <linux/nodemask.h>
|
|
|
|
/* list_lru_walk_cb has to always return one of those */
|
|
enum lru_status {
|
|
LRU_REMOVED, /* item removed from list */
|
|
LRU_ROTATE, /* item referenced, give another pass */
|
|
LRU_SKIP, /* item cannot be locked, skip */
|
|
LRU_RETRY, /* item not freeable. May drop the lock
|
|
internally, but has to return locked. */
|
|
};
|
|
|
|
struct list_lru_node {
|
|
spinlock_t lock;
|
|
struct list_head list;
|
|
/* kept as signed so we can catch imbalance bugs */
|
|
long nr_items;
|
|
} ____cacheline_aligned_in_smp;
|
|
|
|
struct list_lru {
|
|
/*
|
|
* Because we use a fixed-size array, this struct can be very big if
|
|
* MAX_NUMNODES is big. If this becomes a problem this is fixable by
|
|
* turning this into a pointer and dynamically allocating this to
|
|
* nr_node_ids. This quantity is firwmare-provided, and still would
|
|
* provide room for all nodes at the cost of a pointer lookup and an
|
|
* extra allocation. Because that allocation will most likely come from
|
|
* a different slab cache than the main structure holding this
|
|
* structure, we may very well fail.
|
|
*/
|
|
struct list_lru_node node[MAX_NUMNODES];
|
|
nodemask_t active_nodes;
|
|
};
|
|
|
|
int list_lru_init(struct list_lru *lru);
|
|
|
|
/**
|
|
* list_lru_add: add an element to the lru list's tail
|
|
* @list_lru: the lru pointer
|
|
* @item: the item to be added.
|
|
*
|
|
* If the element is already part of a list, this function returns doing
|
|
* nothing. Therefore the caller does not need to keep state about whether or
|
|
* not the element already belongs in the list and is allowed to lazy update
|
|
* it. Note however that this is valid for *a* list, not *this* list. If
|
|
* the caller organize itself in a way that elements can be in more than
|
|
* one type of list, it is up to the caller to fully remove the item from
|
|
* the previous list (with list_lru_del() for instance) before moving it
|
|
* to @list_lru
|
|
*
|
|
* Return value: true if the list was updated, false otherwise
|
|
*/
|
|
bool list_lru_add(struct list_lru *lru, struct list_head *item);
|
|
|
|
/**
|
|
* list_lru_del: delete an element to the lru list
|
|
* @list_lru: the lru pointer
|
|
* @item: the item to be deleted.
|
|
*
|
|
* This function works analogously as list_lru_add in terms of list
|
|
* manipulation. The comments about an element already pertaining to
|
|
* a list are also valid for list_lru_del.
|
|
*
|
|
* Return value: true if the list was updated, false otherwise
|
|
*/
|
|
bool list_lru_del(struct list_lru *lru, struct list_head *item);
|
|
|
|
/**
|
|
* list_lru_count_node: return the number of objects currently held by @lru
|
|
* @lru: the lru pointer.
|
|
* @nid: the node id to count from.
|
|
*
|
|
* Always return a non-negative number, 0 for empty lists. There is no
|
|
* guarantee that the list is not updated while the count is being computed.
|
|
* Callers that want such a guarantee need to provide an outer lock.
|
|
*/
|
|
unsigned long list_lru_count_node(struct list_lru *lru, int nid);
|
|
static inline unsigned long list_lru_count(struct list_lru *lru)
|
|
{
|
|
long count = 0;
|
|
int nid;
|
|
|
|
for_each_node_mask(nid, lru->active_nodes)
|
|
count += list_lru_count_node(lru, nid);
|
|
|
|
return count;
|
|
}
|
|
|
|
typedef enum lru_status
|
|
(*list_lru_walk_cb)(struct list_head *item, spinlock_t *lock, void *cb_arg);
|
|
/**
|
|
* list_lru_walk_node: walk a list_lru, isolating and disposing freeable items.
|
|
* @lru: the lru pointer.
|
|
* @nid: the node id to scan from.
|
|
* @isolate: callback function that is resposible for deciding what to do with
|
|
* the item currently being scanned
|
|
* @cb_arg: opaque type that will be passed to @isolate
|
|
* @nr_to_walk: how many items to scan.
|
|
*
|
|
* This function will scan all elements in a particular list_lru, calling the
|
|
* @isolate callback for each of those items, along with the current list
|
|
* spinlock and a caller-provided opaque. The @isolate callback can choose to
|
|
* drop the lock internally, but *must* return with the lock held. The callback
|
|
* will return an enum lru_status telling the list_lru infrastructure what to
|
|
* do with the object being scanned.
|
|
*
|
|
* Please note that nr_to_walk does not mean how many objects will be freed,
|
|
* just how many objects will be scanned.
|
|
*
|
|
* Return value: the number of objects effectively removed from the LRU.
|
|
*/
|
|
unsigned long list_lru_walk_node(struct list_lru *lru, int nid,
|
|
list_lru_walk_cb isolate, void *cb_arg,
|
|
unsigned long *nr_to_walk);
|
|
|
|
static inline unsigned long
|
|
list_lru_walk(struct list_lru *lru, list_lru_walk_cb isolate,
|
|
void *cb_arg, unsigned long nr_to_walk)
|
|
{
|
|
long isolated = 0;
|
|
int nid;
|
|
|
|
for_each_node_mask(nid, lru->active_nodes) {
|
|
isolated += list_lru_walk_node(lru, nid, isolate,
|
|
cb_arg, &nr_to_walk);
|
|
if (nr_to_walk <= 0)
|
|
break;
|
|
}
|
|
return isolated;
|
|
}
|
|
#endif /* _LRU_LIST_H */
|