Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client

* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client:
  ceph: use separate class for ceph sockets' sk_lock
  ceph: reserve one more caps space when doing readdir
  ceph: queue_cap_snap should always queue dirty context
  ceph: fix dentry reference leak in dcache readdir
  ceph: decode v5 of osdmap (pool names) [protocol change]
  ceph: fix ack counter reset on connection reset
  ceph: fix leaked inode ref due to snap metadata writeback race
  ceph: fix snap context reference leaks
  ceph: allow writeback of snapped pages older than 'oldest' snapc
  ceph: fix dentry rehashing on virtual .snap dir
This commit is contained in:
Linus Torvalds
2010-04-14 18:45:31 -07:00
10 changed files with 212 additions and 134 deletions
+30 -32
View File
@@ -337,16 +337,15 @@ out:
/*
* Get ref for the oldest snapc for an inode with dirty data... that is, the
* only snap context we are allowed to write back.
*
* Caller holds i_lock.
*/
static struct ceph_snap_context *__get_oldest_context(struct inode *inode,
u64 *snap_size)
static struct ceph_snap_context *get_oldest_context(struct inode *inode,
u64 *snap_size)
{
struct ceph_inode_info *ci = ceph_inode(inode);
struct ceph_snap_context *snapc = NULL;
struct ceph_cap_snap *capsnap = NULL;
spin_lock(&inode->i_lock);
list_for_each_entry(capsnap, &ci->i_cap_snaps, ci_item) {
dout(" cap_snap %p snapc %p has %d dirty pages\n", capsnap,
capsnap->context, capsnap->dirty_pages);
@@ -357,21 +356,11 @@ static struct ceph_snap_context *__get_oldest_context(struct inode *inode,
break;
}
}
if (!snapc && ci->i_snap_realm) {
snapc = ceph_get_snap_context(ci->i_snap_realm->cached_context);
if (!snapc && ci->i_head_snapc) {
snapc = ceph_get_snap_context(ci->i_head_snapc);
dout(" head snapc %p has %d dirty pages\n",
snapc, ci->i_wrbuffer_ref_head);
}
return snapc;
}
static struct ceph_snap_context *get_oldest_context(struct inode *inode,
u64 *snap_size)
{
struct ceph_snap_context *snapc = NULL;
spin_lock(&inode->i_lock);
snapc = __get_oldest_context(inode, snap_size);
spin_unlock(&inode->i_lock);
return snapc;
}
@@ -392,7 +381,7 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
int len = PAGE_CACHE_SIZE;
loff_t i_size;
int err = 0;
struct ceph_snap_context *snapc;
struct ceph_snap_context *snapc, *oldest;
u64 snap_size = 0;
long writeback_stat;
@@ -413,13 +402,16 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
dout("writepage %p page %p not dirty?\n", inode, page);
goto out;
}
if (snapc != get_oldest_context(inode, &snap_size)) {
oldest = get_oldest_context(inode, &snap_size);
if (snapc->seq > oldest->seq) {
dout("writepage %p page %p snapc %p not writeable - noop\n",
inode, page, (void *)page->private);
/* we should only noop if called by kswapd */
WARN_ON((current->flags & PF_MEMALLOC) == 0);
ceph_put_snap_context(oldest);
goto out;
}
ceph_put_snap_context(oldest);
/* is this a partial page at end of file? */
if (snap_size)
@@ -458,7 +450,7 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
ClearPagePrivate(page);
end_page_writeback(page);
ceph_put_wrbuffer_cap_refs(ci, 1, snapc);
ceph_put_snap_context(snapc);
ceph_put_snap_context(snapc); /* page's reference */
out:
return err;
}
@@ -558,9 +550,9 @@ static void writepages_finish(struct ceph_osd_request *req,
dout("inode %p skipping page %p\n", inode, page);
wbc->pages_skipped++;
}
ceph_put_snap_context((void *)page->private);
page->private = 0;
ClearPagePrivate(page);
ceph_put_snap_context(snapc);
dout("unlocking %d %p\n", i, page);
end_page_writeback(page);
@@ -618,7 +610,7 @@ static int ceph_writepages_start(struct address_space *mapping,
int range_whole = 0;
int should_loop = 1;
pgoff_t max_pages = 0, max_pages_ever = 0;
struct ceph_snap_context *snapc = NULL, *last_snapc = NULL;
struct ceph_snap_context *snapc = NULL, *last_snapc = NULL, *pgsnapc;
struct pagevec pvec;
int done = 0;
int rc = 0;
@@ -770,9 +762,10 @@ get_more_pages:
}
/* only if matching snap context */
if (snapc != (void *)page->private) {
dout("page snapc %p != oldest %p\n",
(void *)page->private, snapc);
pgsnapc = (void *)page->private;
if (pgsnapc->seq > snapc->seq) {
dout("page snapc %p %lld > oldest %p %lld\n",
pgsnapc, pgsnapc->seq, snapc, snapc->seq);
unlock_page(page);
if (!locked_pages)
continue; /* keep looking for snap */
@@ -914,7 +907,10 @@ static int context_is_writeable_or_written(struct inode *inode,
struct ceph_snap_context *snapc)
{
struct ceph_snap_context *oldest = get_oldest_context(inode, NULL);
return !oldest || snapc->seq <= oldest->seq;
int ret = !oldest || snapc->seq <= oldest->seq;
ceph_put_snap_context(oldest);
return ret;
}
/*
@@ -936,8 +932,8 @@ static int ceph_update_writeable_page(struct file *file,
int pos_in_page = pos & ~PAGE_CACHE_MASK;
int end_in_page = pos_in_page + len;
loff_t i_size;
struct ceph_snap_context *snapc;
int r;
struct ceph_snap_context *snapc, *oldest;
retry_locked:
/* writepages currently holds page lock, but if we change that later, */
@@ -947,23 +943,24 @@ retry_locked:
BUG_ON(!ci->i_snap_realm);
down_read(&mdsc->snap_rwsem);
BUG_ON(!ci->i_snap_realm->cached_context);
if (page->private &&
(void *)page->private != ci->i_snap_realm->cached_context) {
snapc = (void *)page->private;
if (snapc && snapc != ci->i_head_snapc) {
/*
* this page is already dirty in another (older) snap
* context! is it writeable now?
*/
snapc = get_oldest_context(inode, NULL);
oldest = get_oldest_context(inode, NULL);
up_read(&mdsc->snap_rwsem);
if (snapc != (void *)page->private) {
if (snapc->seq > oldest->seq) {
ceph_put_snap_context(oldest);
dout(" page %p snapc %p not current or oldest\n",
page, (void *)page->private);
page, snapc);
/*
* queue for writeback, and wait for snapc to
* be writeable or written
*/
snapc = ceph_get_snap_context((void *)page->private);
snapc = ceph_get_snap_context(snapc);
unlock_page(page);
ceph_queue_writeback(inode);
r = wait_event_interruptible(ci->i_cap_wq,
@@ -973,6 +970,7 @@ retry_locked:
return r;
return -EAGAIN;
}
ceph_put_snap_context(oldest);
/* yay, writeable, do it now (without dropping page lock) */
dout(" page %p snapc %p not current, but oldest\n",