mirror of
https://github.com/ukui/kernel.git
synced 2026-03-09 10:07:04 -07:00
Merge tag 'ceph-for-4.11-rc1' of git://github.com/ceph/ceph-client
Pull ceph updates from Ilya Dryomov:
"This time around we have:
- support for rbd data-pool feature, which enables rbd images on
erasure-coded pools (myself). CEPH_PG_MAX_SIZE has been bumped to
allow erasure-coded profiles with k+m up to 32.
- a patch for ceph_d_revalidate() performance regression introduced
in 4.9, along with some cleanups in the area (Jeff Layton)
- a set of fixes for unsafe ->d_parent accesses in CephFS (Jeff
Layton)
- buffered reads are now processed in rsize windows instead of rasize
windows (Andreas Gerstmayr). The new default for rsize mount option
is 64M.
- ack vs commit distinction is gone, greatly simplifying ->fsync()
and MOSDOpReply handling code (myself)
... also a few filesystem bug fixes from Zheng, a CRUSH sync up (CRUSH
computations are still serialized though) and several minor fixes and
cleanups all over"
* tag 'ceph-for-4.11-rc1' of git://github.com/ceph/ceph-client: (52 commits)
libceph, rbd, ceph: WRITE | ONDISK -> WRITE
libceph: get rid of ack vs commit
ceph: remove special ack vs commit behavior
ceph: tidy some white space in get_nonsnap_parent()
crush: fix dprintk compilation
crush: do is_out test only if we do not collide
ceph: remove req from unsafe list when unregistering it
rbd: constify device_type structure
rbd: kill obj_request->object_name and rbd_segment_name_cache
rbd: store and use obj_request->object_no
rbd: RBD_V{1,2}_DATA_FORMAT macros
rbd: factor out __rbd_osd_req_create()
rbd: set offset and length outside of rbd_obj_request_create()
rbd: support for data-pool feature
rbd: introduce rbd_init_layout()
rbd: use rbd_obj_bytes() more
rbd: remove now unused rbd_obj_request_wait() and helpers
rbd: switch rbd_obj_method_sync() to ceph_osdc_call()
libceph: pass reply buffer length through ceph_osdc_call()
rbd: do away with obj_request in rbd_obj_read_sync()
...
This commit is contained in:
@@ -98,11 +98,10 @@ Mount Options
|
||||
size.
|
||||
|
||||
rsize=X
|
||||
Specify the maximum read size in bytes. By default there is no
|
||||
maximum.
|
||||
Specify the maximum read size in bytes. Default: 64 MB.
|
||||
|
||||
rasize=X
|
||||
Specify the maximum readahead.
|
||||
Specify the maximum readahead. Default: 8 MB.
|
||||
|
||||
mount_timeout=X
|
||||
Specify the timeout value for mount (in seconds), in the case
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -25,8 +25,8 @@
|
||||
*/
|
||||
|
||||
#define RBD_HEADER_PREFIX "rbd_header."
|
||||
#define RBD_DATA_PREFIX "rbd_data."
|
||||
#define RBD_ID_PREFIX "rbd_id."
|
||||
#define RBD_V2_DATA_FORMAT "%s.%016llx"
|
||||
|
||||
#define RBD_LOCK_NAME "rbd_lock"
|
||||
#define RBD_LOCK_TAG "internal"
|
||||
@@ -42,13 +42,14 @@ enum rbd_notify_op {
|
||||
/*
|
||||
* For format version 1, rbd image 'foo' consists of objects
|
||||
* foo.rbd - image metadata
|
||||
* rb.<idhi>.<idlo>.00000000
|
||||
* rb.<idhi>.<idlo>.00000001
|
||||
* rb.<idhi>.<idlo>.<extra>.000000000000
|
||||
* rb.<idhi>.<idlo>.<extra>.000000000001
|
||||
* ... - data
|
||||
* There is no notion of a persistent image id in rbd format 1.
|
||||
*/
|
||||
|
||||
#define RBD_SUFFIX ".rbd"
|
||||
#define RBD_V1_DATA_FORMAT "%s.%012llx"
|
||||
|
||||
#define RBD_DIRECTORY "rbd_directory"
|
||||
#define RBD_INFO "rbd_info"
|
||||
@@ -57,9 +58,6 @@ enum rbd_notify_op {
|
||||
#define RBD_MIN_OBJ_ORDER 16
|
||||
#define RBD_MAX_OBJ_ORDER 30
|
||||
|
||||
#define RBD_COMP_NONE 0
|
||||
#define RBD_CRYPT_NONE 0
|
||||
|
||||
#define RBD_HEADER_TEXT "<<< Rados Block Device Image >>>\n"
|
||||
#define RBD_HEADER_SIGNATURE "RBD"
|
||||
#define RBD_HEADER_VERSION "001.005"
|
||||
|
||||
@@ -391,6 +391,7 @@ static int start_read(struct inode *inode, struct list_head *page_list, int max)
|
||||
nr_pages = i;
|
||||
if (nr_pages > 0) {
|
||||
len = nr_pages << PAGE_SHIFT;
|
||||
osd_req_op_extent_update(req, 0, len);
|
||||
break;
|
||||
}
|
||||
goto out_pages;
|
||||
@@ -771,7 +772,7 @@ static int ceph_writepages_start(struct address_space *mapping,
|
||||
wbc->sync_mode == WB_SYNC_NONE ? "NONE" :
|
||||
(wbc->sync_mode == WB_SYNC_ALL ? "ALL" : "HOLD"));
|
||||
|
||||
if (ACCESS_ONCE(fsc->mount_state) == CEPH_MOUNT_SHUTDOWN) {
|
||||
if (READ_ONCE(fsc->mount_state) == CEPH_MOUNT_SHUTDOWN) {
|
||||
if (ci->i_wrbuffer_ref > 0) {
|
||||
pr_warn_ratelimited(
|
||||
"writepage_start %p %lld forced umount\n",
|
||||
@@ -1017,8 +1018,7 @@ new_request:
|
||||
&ci->i_layout, vino,
|
||||
offset, &len, 0, num_ops,
|
||||
CEPH_OSD_OP_WRITE,
|
||||
CEPH_OSD_FLAG_WRITE |
|
||||
CEPH_OSD_FLAG_ONDISK,
|
||||
CEPH_OSD_FLAG_WRITE,
|
||||
snapc, truncate_seq,
|
||||
truncate_size, false);
|
||||
if (IS_ERR(req)) {
|
||||
@@ -1028,8 +1028,7 @@ new_request:
|
||||
min(num_ops,
|
||||
CEPH_OSD_SLAB_OPS),
|
||||
CEPH_OSD_OP_WRITE,
|
||||
CEPH_OSD_FLAG_WRITE |
|
||||
CEPH_OSD_FLAG_ONDISK,
|
||||
CEPH_OSD_FLAG_WRITE,
|
||||
snapc, truncate_seq,
|
||||
truncate_size, true);
|
||||
BUG_ON(IS_ERR(req));
|
||||
@@ -1194,7 +1193,7 @@ static int ceph_update_writeable_page(struct file *file,
|
||||
int r;
|
||||
struct ceph_snap_context *snapc, *oldest;
|
||||
|
||||
if (ACCESS_ONCE(fsc->mount_state) == CEPH_MOUNT_SHUTDOWN) {
|
||||
if (READ_ONCE(fsc->mount_state) == CEPH_MOUNT_SHUTDOWN) {
|
||||
dout(" page %p forced umount\n", page);
|
||||
unlock_page(page);
|
||||
return -EIO;
|
||||
@@ -1681,8 +1680,7 @@ int ceph_uninline_data(struct file *filp, struct page *locked_page)
|
||||
|
||||
req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout,
|
||||
ceph_vino(inode), 0, &len, 0, 1,
|
||||
CEPH_OSD_OP_CREATE,
|
||||
CEPH_OSD_FLAG_ONDISK | CEPH_OSD_FLAG_WRITE,
|
||||
CEPH_OSD_OP_CREATE, CEPH_OSD_FLAG_WRITE,
|
||||
NULL, 0, 0, false);
|
||||
if (IS_ERR(req)) {
|
||||
err = PTR_ERR(req);
|
||||
@@ -1699,8 +1697,7 @@ int ceph_uninline_data(struct file *filp, struct page *locked_page)
|
||||
|
||||
req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout,
|
||||
ceph_vino(inode), 0, &len, 1, 3,
|
||||
CEPH_OSD_OP_WRITE,
|
||||
CEPH_OSD_FLAG_ONDISK | CEPH_OSD_FLAG_WRITE,
|
||||
CEPH_OSD_OP_WRITE, CEPH_OSD_FLAG_WRITE,
|
||||
NULL, ci->i_truncate_seq,
|
||||
ci->i_truncate_size, false);
|
||||
if (IS_ERR(req)) {
|
||||
@@ -1873,7 +1870,7 @@ static int __ceph_pool_perm_get(struct ceph_inode_info *ci,
|
||||
goto out_unlock;
|
||||
}
|
||||
|
||||
wr_req->r_flags = CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ACK;
|
||||
wr_req->r_flags = CEPH_OSD_FLAG_WRITE;
|
||||
osd_req_op_init(wr_req, 0, CEPH_OSD_OP_CREATE, CEPH_OSD_OP_FLAG_EXCL);
|
||||
ceph_oloc_copy(&wr_req->r_base_oloc, &rd_req->r_base_oloc);
|
||||
ceph_oid_copy(&wr_req->r_base_oid, &rd_req->r_base_oid);
|
||||
|
||||
@@ -234,7 +234,7 @@ void ceph_fscache_file_set_cookie(struct inode *inode, struct file *filp)
|
||||
fscache_enable_cookie(ci->fscache, ceph_fscache_can_enable,
|
||||
inode);
|
||||
if (fscache_cookie_enabled(ci->fscache)) {
|
||||
dout("fscache_file_set_cookie %p %p enabing cache\n",
|
||||
dout("fscache_file_set_cookie %p %p enabling cache\n",
|
||||
inode, filp);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -867,7 +867,7 @@ int __ceph_caps_file_wanted(struct ceph_inode_info *ci)
|
||||
/*
|
||||
* Return caps we have registered with the MDS(s) as 'wanted'.
|
||||
*/
|
||||
int __ceph_caps_mds_wanted(struct ceph_inode_info *ci)
|
||||
int __ceph_caps_mds_wanted(struct ceph_inode_info *ci, bool check)
|
||||
{
|
||||
struct ceph_cap *cap;
|
||||
struct rb_node *p;
|
||||
@@ -875,7 +875,7 @@ int __ceph_caps_mds_wanted(struct ceph_inode_info *ci)
|
||||
|
||||
for (p = rb_first(&ci->i_caps); p; p = rb_next(p)) {
|
||||
cap = rb_entry(p, struct ceph_cap, ci_node);
|
||||
if (!__cap_is_valid(cap))
|
||||
if (check && !__cap_is_valid(cap))
|
||||
continue;
|
||||
if (cap == ci->i_auth_cap)
|
||||
mds_wanted |= cap->mds_wanted;
|
||||
@@ -1184,6 +1184,13 @@ static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap,
|
||||
delayed = 1;
|
||||
}
|
||||
ci->i_ceph_flags &= ~(CEPH_I_NODELAY | CEPH_I_FLUSH);
|
||||
if (want & ~cap->mds_wanted) {
|
||||
/* user space may open/close single file frequently.
|
||||
* This avoids droping mds_wanted immediately after
|
||||
* requesting new mds_wanted.
|
||||
*/
|
||||
__cap_set_timeouts(mdsc, ci);
|
||||
}
|
||||
|
||||
cap->issued &= retain; /* drop bits we don't want */
|
||||
if (cap->implemented & ~cap->issued) {
|
||||
@@ -2084,8 +2091,6 @@ int ceph_fsync(struct file *file, loff_t start, loff_t end, int datasync)
|
||||
|
||||
dout("fsync %p%s\n", inode, datasync ? " datasync" : "");
|
||||
|
||||
ceph_sync_write_wait(inode);
|
||||
|
||||
ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
@@ -2477,23 +2482,22 @@ again:
|
||||
|
||||
if (ci->i_ceph_flags & CEPH_I_CAP_DROPPED) {
|
||||
int mds_wanted;
|
||||
if (ACCESS_ONCE(mdsc->fsc->mount_state) ==
|
||||
if (READ_ONCE(mdsc->fsc->mount_state) ==
|
||||
CEPH_MOUNT_SHUTDOWN) {
|
||||
dout("get_cap_refs %p forced umount\n", inode);
|
||||
*err = -EIO;
|
||||
ret = 1;
|
||||
goto out_unlock;
|
||||
}
|
||||
mds_wanted = __ceph_caps_mds_wanted(ci);
|
||||
if ((mds_wanted & need) != need) {
|
||||
mds_wanted = __ceph_caps_mds_wanted(ci, false);
|
||||
if (need & ~(mds_wanted & need)) {
|
||||
dout("get_cap_refs %p caps were dropped"
|
||||
" (session killed?)\n", inode);
|
||||
*err = -ESTALE;
|
||||
ret = 1;
|
||||
goto out_unlock;
|
||||
}
|
||||
if ((mds_wanted & file_wanted) ==
|
||||
(file_wanted & (CEPH_CAP_FILE_RD|CEPH_CAP_FILE_WR)))
|
||||
if (!(file_wanted & ~mds_wanted))
|
||||
ci->i_ceph_flags &= ~CEPH_I_CAP_DROPPED;
|
||||
}
|
||||
|
||||
@@ -3404,6 +3408,7 @@ retry:
|
||||
tcap->implemented |= issued;
|
||||
if (cap == ci->i_auth_cap)
|
||||
ci->i_auth_cap = tcap;
|
||||
|
||||
if (!list_empty(&ci->i_cap_flush_list) &&
|
||||
ci->i_auth_cap == tcap) {
|
||||
spin_lock(&mdsc->cap_dirty_lock);
|
||||
@@ -3417,9 +3422,18 @@ retry:
|
||||
} else if (tsession) {
|
||||
/* add placeholder for the export tagert */
|
||||
int flag = (cap == ci->i_auth_cap) ? CEPH_CAP_FLAG_AUTH : 0;
|
||||
tcap = new_cap;
|
||||
ceph_add_cap(inode, tsession, t_cap_id, -1, issued, 0,
|
||||
t_seq - 1, t_mseq, (u64)-1, flag, &new_cap);
|
||||
|
||||
if (!list_empty(&ci->i_cap_flush_list) &&
|
||||
ci->i_auth_cap == tcap) {
|
||||
spin_lock(&mdsc->cap_dirty_lock);
|
||||
list_move_tail(&ci->i_flushing_item,
|
||||
&tcap->session->s_cap_flushing);
|
||||
spin_unlock(&mdsc->cap_dirty_lock);
|
||||
}
|
||||
|
||||
__ceph_remove_cap(cap, false);
|
||||
goto out_unlock;
|
||||
}
|
||||
@@ -3924,9 +3938,10 @@ int ceph_encode_inode_release(void **p, struct inode *inode,
|
||||
}
|
||||
|
||||
int ceph_encode_dentry_release(void **p, struct dentry *dentry,
|
||||
struct inode *dir,
|
||||
int mds, int drop, int unless)
|
||||
{
|
||||
struct inode *dir = d_inode(dentry->d_parent);
|
||||
struct dentry *parent = NULL;
|
||||
struct ceph_mds_request_release *rel = *p;
|
||||
struct ceph_dentry_info *di = ceph_dentry(dentry);
|
||||
int force = 0;
|
||||
@@ -3941,9 +3956,14 @@ int ceph_encode_dentry_release(void **p, struct dentry *dentry,
|
||||
spin_lock(&dentry->d_lock);
|
||||
if (di->lease_session && di->lease_session->s_mds == mds)
|
||||
force = 1;
|
||||
if (!dir) {
|
||||
parent = dget(dentry->d_parent);
|
||||
dir = d_inode(parent);
|
||||
}
|
||||
spin_unlock(&dentry->d_lock);
|
||||
|
||||
ret = ceph_encode_inode_release(p, dir, mds, drop, unless, force);
|
||||
dput(parent);
|
||||
|
||||
spin_lock(&dentry->d_lock);
|
||||
if (ret && di->lease_session && di->lease_session->s_mds == mds) {
|
||||
|
||||
@@ -70,7 +70,7 @@ static int mdsc_show(struct seq_file *s, void *p)
|
||||
|
||||
seq_printf(s, "%s", ceph_mds_op_name(req->r_op));
|
||||
|
||||
if (req->r_got_unsafe)
|
||||
if (test_bit(CEPH_MDS_R_GOT_UNSAFE, &req->r_req_flags))
|
||||
seq_puts(s, "\t(unsafe)");
|
||||
else
|
||||
seq_puts(s, "\t");
|
||||
|
||||
@@ -371,7 +371,7 @@ more:
|
||||
/* hints to request -> mds selection code */
|
||||
req->r_direct_mode = USE_AUTH_MDS;
|
||||
req->r_direct_hash = ceph_frag_value(frag);
|
||||
req->r_direct_is_hash = true;
|
||||
__set_bit(CEPH_MDS_R_DIRECT_IS_HASH, &req->r_req_flags);
|
||||
if (fi->last_name) {
|
||||
req->r_path2 = kstrdup(fi->last_name, GFP_KERNEL);
|
||||
if (!req->r_path2) {
|
||||
@@ -417,7 +417,7 @@ more:
|
||||
fi->frag = frag;
|
||||
fi->last_readdir = req;
|
||||
|
||||
if (req->r_did_prepopulate) {
|
||||
if (test_bit(CEPH_MDS_R_DID_PREPOPULATE, &req->r_req_flags)) {
|
||||
fi->readdir_cache_idx = req->r_readdir_cache_idx;
|
||||
if (fi->readdir_cache_idx < 0) {
|
||||
/* preclude from marking dir ordered */
|
||||
@@ -752,7 +752,8 @@ static struct dentry *ceph_lookup(struct inode *dir, struct dentry *dentry,
|
||||
mask |= CEPH_CAP_XATTR_SHARED;
|
||||
req->r_args.getattr.mask = cpu_to_le32(mask);
|
||||
|
||||
req->r_locked_dir = dir;
|
||||
req->r_parent = dir;
|
||||
set_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags);
|
||||
err = ceph_mdsc_do_request(mdsc, NULL, req);
|
||||
err = ceph_handle_snapdir(req, dentry, err);
|
||||
dentry = ceph_finish_lookup(req, dentry, err);
|
||||
@@ -813,7 +814,8 @@ static int ceph_mknod(struct inode *dir, struct dentry *dentry,
|
||||
}
|
||||
req->r_dentry = dget(dentry);
|
||||
req->r_num_caps = 2;
|
||||
req->r_locked_dir = dir;
|
||||
req->r_parent = dir;
|
||||
set_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags);
|
||||
req->r_args.mknod.mode = cpu_to_le32(mode);
|
||||
req->r_args.mknod.rdev = cpu_to_le32(rdev);
|
||||
req->r_dentry_drop = CEPH_CAP_FILE_SHARED;
|
||||
@@ -864,7 +866,8 @@ static int ceph_symlink(struct inode *dir, struct dentry *dentry,
|
||||
ceph_mdsc_put_request(req);
|
||||
goto out;
|
||||
}
|
||||
req->r_locked_dir = dir;
|
||||
req->r_parent = dir;
|
||||
set_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags);
|
||||
req->r_dentry = dget(dentry);
|
||||
req->r_num_caps = 2;
|
||||
req->r_dentry_drop = CEPH_CAP_FILE_SHARED;
|
||||
@@ -913,7 +916,8 @@ static int ceph_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
|
||||
|
||||
req->r_dentry = dget(dentry);
|
||||
req->r_num_caps = 2;
|
||||
req->r_locked_dir = dir;
|
||||
req->r_parent = dir;
|
||||
set_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags);
|
||||
req->r_args.mkdir.mode = cpu_to_le32(mode);
|
||||
req->r_dentry_drop = CEPH_CAP_FILE_SHARED;
|
||||
req->r_dentry_unless = CEPH_CAP_FILE_EXCL;
|
||||
@@ -957,7 +961,8 @@ static int ceph_link(struct dentry *old_dentry, struct inode *dir,
|
||||
req->r_dentry = dget(dentry);
|
||||
req->r_num_caps = 2;
|
||||
req->r_old_dentry = dget(old_dentry);
|
||||
req->r_locked_dir = dir;
|
||||
req->r_parent = dir;
|
||||
set_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags);
|
||||
req->r_dentry_drop = CEPH_CAP_FILE_SHARED;
|
||||
req->r_dentry_unless = CEPH_CAP_FILE_EXCL;
|
||||
/* release LINK_SHARED on source inode (mds will lock it) */
|
||||
@@ -1023,7 +1028,8 @@ static int ceph_unlink(struct inode *dir, struct dentry *dentry)
|
||||
}
|
||||
req->r_dentry = dget(dentry);
|
||||
req->r_num_caps = 2;
|
||||
req->r_locked_dir = dir;
|
||||
req->r_parent = dir;
|
||||
set_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags);
|
||||
req->r_dentry_drop = CEPH_CAP_FILE_SHARED;
|
||||
req->r_dentry_unless = CEPH_CAP_FILE_EXCL;
|
||||
req->r_inode_drop = drop_caps_for_unlink(inode);
|
||||
@@ -1066,7 +1072,8 @@ static int ceph_rename(struct inode *old_dir, struct dentry *old_dentry,
|
||||
req->r_num_caps = 2;
|
||||
req->r_old_dentry = dget(old_dentry);
|
||||
req->r_old_dentry_dir = old_dir;
|
||||
req->r_locked_dir = new_dir;
|
||||
req->r_parent = new_dir;
|
||||
set_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags);
|
||||
req->r_old_dentry_drop = CEPH_CAP_FILE_SHARED;
|
||||
req->r_old_dentry_unless = CEPH_CAP_FILE_EXCL;
|
||||
req->r_dentry_drop = CEPH_CAP_FILE_SHARED;
|
||||
@@ -1194,7 +1201,7 @@ static int ceph_d_revalidate(struct dentry *dentry, unsigned int flags)
|
||||
struct inode *dir;
|
||||
|
||||
if (flags & LOOKUP_RCU) {
|
||||
parent = ACCESS_ONCE(dentry->d_parent);
|
||||
parent = READ_ONCE(dentry->d_parent);
|
||||
dir = d_inode_rcu(parent);
|
||||
if (!dir)
|
||||
return -ECHILD;
|
||||
@@ -1237,11 +1244,12 @@ static int ceph_d_revalidate(struct dentry *dentry, unsigned int flags)
|
||||
return -ECHILD;
|
||||
|
||||
op = ceph_snap(dir) == CEPH_SNAPDIR ?
|
||||
CEPH_MDS_OP_LOOKUPSNAP : CEPH_MDS_OP_GETATTR;
|
||||
CEPH_MDS_OP_LOOKUPSNAP : CEPH_MDS_OP_LOOKUP;
|
||||
req = ceph_mdsc_create_request(mdsc, op, USE_ANY_MDS);
|
||||
if (!IS_ERR(req)) {
|
||||
req->r_dentry = dget(dentry);
|
||||
req->r_num_caps = op == CEPH_MDS_OP_GETATTR ? 1 : 2;
|
||||
req->r_num_caps = 2;
|
||||
req->r_parent = dir;
|
||||
|
||||
mask = CEPH_STAT_CAP_INODE | CEPH_CAP_AUTH_SHARED;
|
||||
if (ceph_security_xattr_wanted(dir))
|
||||
|
||||
@@ -207,7 +207,8 @@ static int ceph_get_name(struct dentry *parent, char *name,
|
||||
req->r_inode = d_inode(child);
|
||||
ihold(d_inode(child));
|
||||
req->r_ino2 = ceph_vino(d_inode(parent));
|
||||
req->r_locked_dir = d_inode(parent);
|
||||
req->r_parent = d_inode(parent);
|
||||
set_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags);
|
||||
req->r_num_caps = 2;
|
||||
err = ceph_mdsc_do_request(mdsc, NULL, req);
|
||||
|
||||
|
||||
106
fs/ceph/file.c
106
fs/ceph/file.c
@@ -283,7 +283,7 @@ int ceph_open(struct inode *inode, struct file *file)
|
||||
spin_lock(&ci->i_ceph_lock);
|
||||
if (__ceph_is_any_real_caps(ci) &&
|
||||
(((fmode & CEPH_FILE_MODE_WR) == 0) || ci->i_auth_cap)) {
|
||||
int mds_wanted = __ceph_caps_mds_wanted(ci);
|
||||
int mds_wanted = __ceph_caps_mds_wanted(ci, true);
|
||||
int issued = __ceph_caps_issued(ci, NULL);
|
||||
|
||||
dout("open %p fmode %d want %s issued %s using existing\n",
|
||||
@@ -379,7 +379,8 @@ int ceph_atomic_open(struct inode *dir, struct dentry *dentry,
|
||||
mask |= CEPH_CAP_XATTR_SHARED;
|
||||
req->r_args.open.mask = cpu_to_le32(mask);
|
||||
|
||||
req->r_locked_dir = dir; /* caller holds dir->i_mutex */
|
||||
req->r_parent = dir;
|
||||
set_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags);
|
||||
err = ceph_mdsc_do_request(mdsc,
|
||||
(flags & (O_CREAT|O_TRUNC)) ? dir : NULL,
|
||||
req);
|
||||
@@ -758,9 +759,7 @@ static void ceph_aio_retry_work(struct work_struct *work)
|
||||
goto out;
|
||||
}
|
||||
|
||||
req->r_flags = CEPH_OSD_FLAG_ORDERSNAP |
|
||||
CEPH_OSD_FLAG_ONDISK |
|
||||
CEPH_OSD_FLAG_WRITE;
|
||||
req->r_flags = CEPH_OSD_FLAG_ORDERSNAP | CEPH_OSD_FLAG_WRITE;
|
||||
ceph_oloc_copy(&req->r_base_oloc, &orig_req->r_base_oloc);
|
||||
ceph_oid_copy(&req->r_base_oid, &orig_req->r_base_oid);
|
||||
|
||||
@@ -794,89 +793,6 @@ out:
|
||||
kfree(aio_work);
|
||||
}
|
||||
|
||||
/*
|
||||
* Write commit request unsafe callback, called to tell us when a
|
||||
* request is unsafe (that is, in flight--has been handed to the
|
||||
* messenger to send to its target osd). It is called again when
|
||||
* we've received a response message indicating the request is
|
||||
* "safe" (its CEPH_OSD_FLAG_ONDISK flag is set), or when a request
|
||||
* is completed early (and unsuccessfully) due to a timeout or
|
||||
* interrupt.
|
||||
*
|
||||
* This is used if we requested both an ACK and ONDISK commit reply
|
||||
* from the OSD.
|
||||
*/
|
||||
static void ceph_sync_write_unsafe(struct ceph_osd_request *req, bool unsafe)
|
||||
{
|
||||
struct ceph_inode_info *ci = ceph_inode(req->r_inode);
|
||||
|
||||
dout("%s %p tid %llu %ssafe\n", __func__, req, req->r_tid,
|
||||
unsafe ? "un" : "");
|
||||
if (unsafe) {
|
||||
ceph_get_cap_refs(ci, CEPH_CAP_FILE_WR);
|
||||
spin_lock(&ci->i_unsafe_lock);
|
||||
list_add_tail(&req->r_unsafe_item,
|
||||
&ci->i_unsafe_writes);
|
||||
spin_unlock(&ci->i_unsafe_lock);
|
||||
|
||||
complete_all(&req->r_completion);
|
||||
} else {
|
||||
spin_lock(&ci->i_unsafe_lock);
|
||||
list_del_init(&req->r_unsafe_item);
|
||||
spin_unlock(&ci->i_unsafe_lock);
|
||||
ceph_put_cap_refs(ci, CEPH_CAP_FILE_WR);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Wait on any unsafe replies for the given inode. First wait on the
|
||||
* newest request, and make that the upper bound. Then, if there are
|
||||
* more requests, keep waiting on the oldest as long as it is still older
|
||||
* than the original request.
|
||||
*/
|
||||
void ceph_sync_write_wait(struct inode *inode)
|
||||
{
|
||||
struct ceph_inode_info *ci = ceph_inode(inode);
|
||||
struct list_head *head = &ci->i_unsafe_writes;
|
||||
struct ceph_osd_request *req;
|
||||
u64 last_tid;
|
||||
|
||||
if (!S_ISREG(inode->i_mode))
|
||||
return;
|
||||
|
||||
spin_lock(&ci->i_unsafe_lock);
|
||||
if (list_empty(head))
|
||||
goto out;
|
||||
|
||||
/* set upper bound as _last_ entry in chain */
|
||||
|
||||
req = list_last_entry(head, struct ceph_osd_request,
|
||||
r_unsafe_item);
|
||||
last_tid = req->r_tid;
|
||||
|
||||
do {
|
||||
ceph_osdc_get_request(req);
|
||||
spin_unlock(&ci->i_unsafe_lock);
|
||||
|
||||
dout("sync_write_wait on tid %llu (until %llu)\n",
|
||||
req->r_tid, last_tid);
|
||||
wait_for_completion(&req->r_done_completion);
|
||||
ceph_osdc_put_request(req);
|
||||
|
||||
spin_lock(&ci->i_unsafe_lock);
|
||||
/*
|
||||
* from here on look at first entry in chain, since we
|
||||
* only want to wait for anything older than last_tid
|
||||
*/
|
||||
if (list_empty(head))
|
||||
break;
|
||||
req = list_first_entry(head, struct ceph_osd_request,
|
||||
r_unsafe_item);
|
||||
} while (req->r_tid < last_tid);
|
||||
out:
|
||||
spin_unlock(&ci->i_unsafe_lock);
|
||||
}
|
||||
|
||||
static ssize_t
|
||||
ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter,
|
||||
struct ceph_snap_context *snapc,
|
||||
@@ -915,9 +831,7 @@ ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter,
|
||||
if (ret2 < 0)
|
||||
dout("invalidate_inode_pages2_range returned %d\n", ret2);
|
||||
|
||||
flags = CEPH_OSD_FLAG_ORDERSNAP |
|
||||
CEPH_OSD_FLAG_ONDISK |
|
||||
CEPH_OSD_FLAG_WRITE;
|
||||
flags = CEPH_OSD_FLAG_ORDERSNAP | CEPH_OSD_FLAG_WRITE;
|
||||
} else {
|
||||
flags = CEPH_OSD_FLAG_READ;
|
||||
}
|
||||
@@ -1116,10 +1030,7 @@ ceph_sync_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos,
|
||||
if (ret < 0)
|
||||
dout("invalidate_inode_pages2_range returned %d\n", ret);
|
||||
|
||||
flags = CEPH_OSD_FLAG_ORDERSNAP |
|
||||
CEPH_OSD_FLAG_ONDISK |
|
||||
CEPH_OSD_FLAG_WRITE |
|
||||
CEPH_OSD_FLAG_ACK;
|
||||
flags = CEPH_OSD_FLAG_ORDERSNAP | CEPH_OSD_FLAG_WRITE;
|
||||
|
||||
while ((len = iov_iter_count(from)) > 0) {
|
||||
size_t left;
|
||||
@@ -1165,8 +1076,6 @@ ceph_sync_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos,
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* get a second commit callback */
|
||||
req->r_unsafe_callback = ceph_sync_write_unsafe;
|
||||
req->r_inode = inode;
|
||||
|
||||
osd_req_op_extent_osd_data_pages(req, 0, pages, len, 0,
|
||||
@@ -1616,8 +1525,7 @@ static int ceph_zero_partial_object(struct inode *inode,
|
||||
ceph_vino(inode),
|
||||
offset, length,
|
||||
0, 1, op,
|
||||
CEPH_OSD_FLAG_WRITE |
|
||||
CEPH_OSD_FLAG_ONDISK,
|
||||
CEPH_OSD_FLAG_WRITE,
|
||||
NULL, 0, 0, false);
|
||||
if (IS_ERR(req)) {
|
||||
ret = PTR_ERR(req);
|
||||
|
||||
172
fs/ceph/inode.c
172
fs/ceph/inode.c
@@ -499,7 +499,6 @@ struct inode *ceph_alloc_inode(struct super_block *sb)
|
||||
ci->i_rdcache_gen = 0;
|
||||
ci->i_rdcache_revoking = 0;
|
||||
|
||||
INIT_LIST_HEAD(&ci->i_unsafe_writes);
|
||||
INIT_LIST_HEAD(&ci->i_unsafe_dirops);
|
||||
INIT_LIST_HEAD(&ci->i_unsafe_iops);
|
||||
spin_lock_init(&ci->i_unsafe_lock);
|
||||
@@ -583,14 +582,6 @@ int ceph_drop_inode(struct inode *inode)
|
||||
return 1;
|
||||
}
|
||||
|
||||
void ceph_evict_inode(struct inode *inode)
|
||||
{
|
||||
/* wait unsafe sync writes */
|
||||
ceph_sync_write_wait(inode);
|
||||
truncate_inode_pages_final(&inode->i_data);
|
||||
clear_inode(inode);
|
||||
}
|
||||
|
||||
static inline blkcnt_t calc_inode_blocks(u64 size)
|
||||
{
|
||||
return (size + (1<<9) - 1) >> 9;
|
||||
@@ -1016,7 +1007,9 @@ out:
|
||||
static void update_dentry_lease(struct dentry *dentry,
|
||||
struct ceph_mds_reply_lease *lease,
|
||||
struct ceph_mds_session *session,
|
||||
unsigned long from_time)
|
||||
unsigned long from_time,
|
||||
struct ceph_vino *tgt_vino,
|
||||
struct ceph_vino *dir_vino)
|
||||
{
|
||||
struct ceph_dentry_info *di = ceph_dentry(dentry);
|
||||
long unsigned duration = le32_to_cpu(lease->duration_ms);
|
||||
@@ -1024,13 +1017,27 @@ static void update_dentry_lease(struct dentry *dentry,
|
||||
long unsigned half_ttl = from_time + (duration * HZ / 2) / 1000;
|
||||
struct inode *dir;
|
||||
|
||||
/*
|
||||
* Make sure dentry's inode matches tgt_vino. NULL tgt_vino means that
|
||||
* we expect a negative dentry.
|
||||
*/
|
||||
if (!tgt_vino && d_really_is_positive(dentry))
|
||||
return;
|
||||
|
||||
if (tgt_vino && (d_really_is_negative(dentry) ||
|
||||
!ceph_ino_compare(d_inode(dentry), tgt_vino)))
|
||||
return;
|
||||
|
||||
spin_lock(&dentry->d_lock);
|
||||
dout("update_dentry_lease %p duration %lu ms ttl %lu\n",
|
||||
dentry, duration, ttl);
|
||||
|
||||
/* make lease_rdcache_gen match directory */
|
||||
dir = d_inode(dentry->d_parent);
|
||||
|
||||
/* make sure parent matches dir_vino */
|
||||
if (!ceph_ino_compare(dir, dir_vino))
|
||||
goto out_unlock;
|
||||
|
||||
/* only track leases on regular dentries */
|
||||
if (ceph_snap(dir) != CEPH_NOSNAP)
|
||||
goto out_unlock;
|
||||
@@ -1108,61 +1115,27 @@ out:
|
||||
*
|
||||
* Called with snap_rwsem (read).
|
||||
*/
|
||||
int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req,
|
||||
struct ceph_mds_session *session)
|
||||
int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req)
|
||||
{
|
||||
struct ceph_mds_session *session = req->r_session;
|
||||
struct ceph_mds_reply_info_parsed *rinfo = &req->r_reply_info;
|
||||
struct inode *in = NULL;
|
||||
struct ceph_vino vino;
|
||||
struct ceph_vino tvino, dvino;
|
||||
struct ceph_fs_client *fsc = ceph_sb_to_client(sb);
|
||||
int err = 0;
|
||||
|
||||
dout("fill_trace %p is_dentry %d is_target %d\n", req,
|
||||
rinfo->head->is_dentry, rinfo->head->is_target);
|
||||
|
||||
#if 0
|
||||
/*
|
||||
* Debugging hook:
|
||||
*
|
||||
* If we resend completed ops to a recovering mds, we get no
|
||||
* trace. Since that is very rare, pretend this is the case
|
||||
* to ensure the 'no trace' handlers in the callers behave.
|
||||
*
|
||||
* Fill in inodes unconditionally to avoid breaking cap
|
||||
* invariants.
|
||||
*/
|
||||
if (rinfo->head->op & CEPH_MDS_OP_WRITE) {
|
||||
pr_info("fill_trace faking empty trace on %lld %s\n",
|
||||
req->r_tid, ceph_mds_op_name(rinfo->head->op));
|
||||
if (rinfo->head->is_dentry) {
|
||||
rinfo->head->is_dentry = 0;
|
||||
err = fill_inode(req->r_locked_dir,
|
||||
&rinfo->diri, rinfo->dirfrag,
|
||||
session, req->r_request_started, -1);
|
||||
}
|
||||
if (rinfo->head->is_target) {
|
||||
rinfo->head->is_target = 0;
|
||||
ininfo = rinfo->targeti.in;
|
||||
vino.ino = le64_to_cpu(ininfo->ino);
|
||||
vino.snap = le64_to_cpu(ininfo->snapid);
|
||||
in = ceph_get_inode(sb, vino);
|
||||
err = fill_inode(in, &rinfo->targeti, NULL,
|
||||
session, req->r_request_started,
|
||||
req->r_fmode);
|
||||
iput(in);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
if (!rinfo->head->is_target && !rinfo->head->is_dentry) {
|
||||
dout("fill_trace reply is empty!\n");
|
||||
if (rinfo->head->result == 0 && req->r_locked_dir)
|
||||
if (rinfo->head->result == 0 && req->r_parent)
|
||||
ceph_invalidate_dir_request(req);
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (rinfo->head->is_dentry) {
|
||||
struct inode *dir = req->r_locked_dir;
|
||||
struct inode *dir = req->r_parent;
|
||||
|
||||
if (dir) {
|
||||
err = fill_inode(dir, NULL,
|
||||
@@ -1188,8 +1161,8 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req,
|
||||
dname.name = rinfo->dname;
|
||||
dname.len = rinfo->dname_len;
|
||||
dname.hash = full_name_hash(parent, dname.name, dname.len);
|
||||
vino.ino = le64_to_cpu(rinfo->targeti.in->ino);
|
||||
vino.snap = le64_to_cpu(rinfo->targeti.in->snapid);
|
||||
tvino.ino = le64_to_cpu(rinfo->targeti.in->ino);
|
||||
tvino.snap = le64_to_cpu(rinfo->targeti.in->snapid);
|
||||
retry_lookup:
|
||||
dn = d_lookup(parent, &dname);
|
||||
dout("d_lookup on parent=%p name=%.*s got %p\n",
|
||||
@@ -1206,8 +1179,8 @@ retry_lookup:
|
||||
}
|
||||
err = 0;
|
||||
} else if (d_really_is_positive(dn) &&
|
||||
(ceph_ino(d_inode(dn)) != vino.ino ||
|
||||
ceph_snap(d_inode(dn)) != vino.snap)) {
|
||||
(ceph_ino(d_inode(dn)) != tvino.ino ||
|
||||
ceph_snap(d_inode(dn)) != tvino.snap)) {
|
||||
dout(" dn %p points to wrong inode %p\n",
|
||||
dn, d_inode(dn));
|
||||
d_delete(dn);
|
||||
@@ -1221,10 +1194,10 @@ retry_lookup:
|
||||
}
|
||||
|
||||
if (rinfo->head->is_target) {
|
||||
vino.ino = le64_to_cpu(rinfo->targeti.in->ino);
|
||||
vino.snap = le64_to_cpu(rinfo->targeti.in->snapid);
|
||||
tvino.ino = le64_to_cpu(rinfo->targeti.in->ino);
|
||||
tvino.snap = le64_to_cpu(rinfo->targeti.in->snapid);
|
||||
|
||||
in = ceph_get_inode(sb, vino);
|
||||
in = ceph_get_inode(sb, tvino);
|
||||
if (IS_ERR(in)) {
|
||||
err = PTR_ERR(in);
|
||||
goto done;
|
||||
@@ -1233,8 +1206,8 @@ retry_lookup:
|
||||
|
||||
err = fill_inode(in, req->r_locked_page, &rinfo->targeti, NULL,
|
||||
session, req->r_request_started,
|
||||
(!req->r_aborted && rinfo->head->result == 0) ?
|
||||
req->r_fmode : -1,
|
||||
(!test_bit(CEPH_MDS_R_ABORTED, &req->r_req_flags) &&
|
||||
rinfo->head->result == 0) ? req->r_fmode : -1,
|
||||
&req->r_caps_reservation);
|
||||
if (err < 0) {
|
||||
pr_err("fill_inode badness %p %llx.%llx\n",
|
||||
@@ -1247,8 +1220,9 @@ retry_lookup:
|
||||
* ignore null lease/binding on snapdir ENOENT, or else we
|
||||
* will have trouble splicing in the virtual snapdir later
|
||||
*/
|
||||
if (rinfo->head->is_dentry && !req->r_aborted &&
|
||||
req->r_locked_dir &&
|
||||
if (rinfo->head->is_dentry &&
|
||||
!test_bit(CEPH_MDS_R_ABORTED, &req->r_req_flags) &&
|
||||
test_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags) &&
|
||||
(rinfo->head->is_target || strncmp(req->r_dentry->d_name.name,
|
||||
fsc->mount_options->snapdir_name,
|
||||
req->r_dentry->d_name.len))) {
|
||||
@@ -1257,17 +1231,19 @@ retry_lookup:
|
||||
* mknod symlink mkdir : null -> new inode
|
||||
* unlink : linked -> null
|
||||
*/
|
||||
struct inode *dir = req->r_locked_dir;
|
||||
struct inode *dir = req->r_parent;
|
||||
struct dentry *dn = req->r_dentry;
|
||||
bool have_dir_cap, have_lease;
|
||||
|
||||
BUG_ON(!dn);
|
||||
BUG_ON(!dir);
|
||||
BUG_ON(d_inode(dn->d_parent) != dir);
|
||||
BUG_ON(ceph_ino(dir) !=
|
||||
le64_to_cpu(rinfo->diri.in->ino));
|
||||
BUG_ON(ceph_snap(dir) !=
|
||||
le64_to_cpu(rinfo->diri.in->snapid));
|
||||
|
||||
dvino.ino = le64_to_cpu(rinfo->diri.in->ino);
|
||||
dvino.snap = le64_to_cpu(rinfo->diri.in->snapid);
|
||||
|
||||
BUG_ON(ceph_ino(dir) != dvino.ino);
|
||||
BUG_ON(ceph_snap(dir) != dvino.snap);
|
||||
|
||||
/* do we have a lease on the whole dir? */
|
||||
have_dir_cap =
|
||||
@@ -1319,12 +1295,13 @@ retry_lookup:
|
||||
ceph_dir_clear_ordered(dir);
|
||||
dout("d_delete %p\n", dn);
|
||||
d_delete(dn);
|
||||
} else {
|
||||
if (have_lease && d_unhashed(dn))
|
||||
} else if (have_lease) {
|
||||
if (d_unhashed(dn))
|
||||
d_add(dn, NULL);
|
||||
update_dentry_lease(dn, rinfo->dlease,
|
||||
session,
|
||||
req->r_request_started);
|
||||
req->r_request_started,
|
||||
NULL, &dvino);
|
||||
}
|
||||
goto done;
|
||||
}
|
||||
@@ -1347,15 +1324,19 @@ retry_lookup:
|
||||
have_lease = false;
|
||||
}
|
||||
|
||||
if (have_lease)
|
||||
if (have_lease) {
|
||||
tvino.ino = le64_to_cpu(rinfo->targeti.in->ino);
|
||||
tvino.snap = le64_to_cpu(rinfo->targeti.in->snapid);
|
||||
update_dentry_lease(dn, rinfo->dlease, session,
|
||||
req->r_request_started);
|
||||
req->r_request_started,
|
||||
&tvino, &dvino);
|
||||
}
|
||||
dout(" final dn %p\n", dn);
|
||||
} else if (!req->r_aborted &&
|
||||
(req->r_op == CEPH_MDS_OP_LOOKUPSNAP ||
|
||||
req->r_op == CEPH_MDS_OP_MKSNAP)) {
|
||||
} else if ((req->r_op == CEPH_MDS_OP_LOOKUPSNAP ||
|
||||
req->r_op == CEPH_MDS_OP_MKSNAP) &&
|
||||
!test_bit(CEPH_MDS_R_ABORTED, &req->r_req_flags)) {
|
||||
struct dentry *dn = req->r_dentry;
|
||||
struct inode *dir = req->r_locked_dir;
|
||||
struct inode *dir = req->r_parent;
|
||||
|
||||
/* fill out a snapdir LOOKUPSNAP dentry */
|
||||
BUG_ON(!dn);
|
||||
@@ -1370,6 +1351,26 @@ retry_lookup:
|
||||
goto done;
|
||||
}
|
||||
req->r_dentry = dn; /* may have spliced */
|
||||
} else if (rinfo->head->is_dentry) {
|
||||
struct ceph_vino *ptvino = NULL;
|
||||
|
||||
if ((le32_to_cpu(rinfo->diri.in->cap.caps) & CEPH_CAP_FILE_SHARED) ||
|
||||
le32_to_cpu(rinfo->dlease->duration_ms)) {
|
||||
dvino.ino = le64_to_cpu(rinfo->diri.in->ino);
|
||||
dvino.snap = le64_to_cpu(rinfo->diri.in->snapid);
|
||||
|
||||
if (rinfo->head->is_target) {
|
||||
tvino.ino = le64_to_cpu(rinfo->targeti.in->ino);
|
||||
tvino.snap = le64_to_cpu(rinfo->targeti.in->snapid);
|
||||
ptvino = &tvino;
|
||||
}
|
||||
|
||||
update_dentry_lease(req->r_dentry, rinfo->dlease,
|
||||
session, req->r_request_started, ptvino,
|
||||
&dvino);
|
||||
} else {
|
||||
dout("%s: no dentry lease or dir cap\n", __func__);
|
||||
}
|
||||
}
|
||||
done:
|
||||
dout("fill_trace done err=%d\n", err);
|
||||
@@ -1478,7 +1479,7 @@ int ceph_readdir_prepopulate(struct ceph_mds_request *req,
|
||||
u32 fpos_offset;
|
||||
struct ceph_readdir_cache_control cache_ctl = {};
|
||||
|
||||
if (req->r_aborted)
|
||||
if (test_bit(CEPH_MDS_R_ABORTED, &req->r_req_flags))
|
||||
return readdir_prepopulate_inodes_only(req, session);
|
||||
|
||||
if (rinfo->hash_order && req->r_path2) {
|
||||
@@ -1523,14 +1524,14 @@ int ceph_readdir_prepopulate(struct ceph_mds_request *req,
|
||||
/* FIXME: release caps/leases if error occurs */
|
||||
for (i = 0; i < rinfo->dir_nr; i++) {
|
||||
struct ceph_mds_reply_dir_entry *rde = rinfo->dir_entries + i;
|
||||
struct ceph_vino vino;
|
||||
struct ceph_vino tvino, dvino;
|
||||
|
||||
dname.name = rde->name;
|
||||
dname.len = rde->name_len;
|
||||
dname.hash = full_name_hash(parent, dname.name, dname.len);
|
||||
|
||||
vino.ino = le64_to_cpu(rde->inode.in->ino);
|
||||
vino.snap = le64_to_cpu(rde->inode.in->snapid);
|
||||
tvino.ino = le64_to_cpu(rde->inode.in->ino);
|
||||
tvino.snap = le64_to_cpu(rde->inode.in->snapid);
|
||||
|
||||
if (rinfo->hash_order) {
|
||||
u32 hash = ceph_str_hash(ci->i_dir_layout.dl_dir_hash,
|
||||
@@ -1559,8 +1560,8 @@ retry_lookup:
|
||||
goto out;
|
||||
}
|
||||
} else if (d_really_is_positive(dn) &&
|
||||
(ceph_ino(d_inode(dn)) != vino.ino ||
|
||||
ceph_snap(d_inode(dn)) != vino.snap)) {
|
||||
(ceph_ino(d_inode(dn)) != tvino.ino ||
|
||||
ceph_snap(d_inode(dn)) != tvino.snap)) {
|
||||
dout(" dn %p points to wrong inode %p\n",
|
||||
dn, d_inode(dn));
|
||||
d_delete(dn);
|
||||
@@ -1572,7 +1573,7 @@ retry_lookup:
|
||||
if (d_really_is_positive(dn)) {
|
||||
in = d_inode(dn);
|
||||
} else {
|
||||
in = ceph_get_inode(parent->d_sb, vino);
|
||||
in = ceph_get_inode(parent->d_sb, tvino);
|
||||
if (IS_ERR(in)) {
|
||||
dout("new_inode badness\n");
|
||||
d_drop(dn);
|
||||
@@ -1617,8 +1618,9 @@ retry_lookup:
|
||||
|
||||
ceph_dentry(dn)->offset = rde->offset;
|
||||
|
||||
dvino = ceph_vino(d_inode(parent));
|
||||
update_dentry_lease(dn, rde->lease, req->r_session,
|
||||
req->r_request_started);
|
||||
req->r_request_started, &tvino, &dvino);
|
||||
|
||||
if (err == 0 && skipped == 0 && cache_ctl.index >= 0) {
|
||||
ret = fill_readdir_cache(d_inode(parent), dn,
|
||||
@@ -1632,7 +1634,7 @@ next_item:
|
||||
}
|
||||
out:
|
||||
if (err == 0 && skipped == 0) {
|
||||
req->r_did_prepopulate = true;
|
||||
set_bit(CEPH_MDS_R_DID_PREPOPULATE, &req->r_req_flags);
|
||||
req->r_readdir_cache_idx = cache_ctl.index;
|
||||
}
|
||||
ceph_readdir_cache_release(&cache_ctl);
|
||||
@@ -1720,7 +1722,7 @@ static void ceph_invalidate_work(struct work_struct *work)
|
||||
|
||||
mutex_lock(&ci->i_truncate_mutex);
|
||||
|
||||
if (ACCESS_ONCE(fsc->mount_state) == CEPH_MOUNT_SHUTDOWN) {
|
||||
if (READ_ONCE(fsc->mount_state) == CEPH_MOUNT_SHUTDOWN) {
|
||||
pr_warn_ratelimited("invalidate_pages %p %lld forced umount\n",
|
||||
inode, ceph_ino(inode));
|
||||
mapping_set_error(inode->i_mapping, -EIO);
|
||||
|
||||
@@ -25,7 +25,7 @@ static long ceph_ioctl_get_layout(struct file *file, void __user *arg)
|
||||
l.stripe_count = ci->i_layout.stripe_count;
|
||||
l.object_size = ci->i_layout.object_size;
|
||||
l.data_pool = ci->i_layout.pool_id;
|
||||
l.preferred_osd = (s32)-1;
|
||||
l.preferred_osd = -1;
|
||||
if (copy_to_user(arg, &l, sizeof(l)))
|
||||
return -EFAULT;
|
||||
}
|
||||
@@ -97,7 +97,7 @@ static long ceph_ioctl_set_layout(struct file *file, void __user *arg)
|
||||
nl.data_pool = ci->i_layout.pool_id;
|
||||
|
||||
/* this is obsolete, and always -1 */
|
||||
nl.preferred_osd = le64_to_cpu(-1);
|
||||
nl.preferred_osd = -1;
|
||||
|
||||
err = __validate_layout(mdsc, &nl);
|
||||
if (err)
|
||||
|
||||
@@ -547,8 +547,8 @@ void ceph_mdsc_release_request(struct kref *kref)
|
||||
ceph_put_cap_refs(ceph_inode(req->r_inode), CEPH_CAP_PIN);
|
||||
iput(req->r_inode);
|
||||
}
|
||||
if (req->r_locked_dir)
|
||||
ceph_put_cap_refs(ceph_inode(req->r_locked_dir), CEPH_CAP_PIN);
|
||||
if (req->r_parent)
|
||||
ceph_put_cap_refs(ceph_inode(req->r_parent), CEPH_CAP_PIN);
|
||||
iput(req->r_target_inode);
|
||||
if (req->r_dentry)
|
||||
dput(req->r_dentry);
|
||||
@@ -628,6 +628,9 @@ static void __unregister_request(struct ceph_mds_client *mdsc,
|
||||
{
|
||||
dout("__unregister_request %p tid %lld\n", req, req->r_tid);
|
||||
|
||||
/* Never leave an unregistered request on an unsafe list! */
|
||||
list_del_init(&req->r_unsafe_item);
|
||||
|
||||
if (req->r_tid == mdsc->oldest_tid) {
|
||||
struct rb_node *p = rb_next(&req->r_node);
|
||||
mdsc->oldest_tid = 0;
|
||||
@@ -644,13 +647,15 @@ static void __unregister_request(struct ceph_mds_client *mdsc,
|
||||
|
||||
erase_request(&mdsc->request_tree, req);
|
||||
|
||||
if (req->r_unsafe_dir && req->r_got_unsafe) {
|
||||
if (req->r_unsafe_dir &&
|
||||
test_bit(CEPH_MDS_R_GOT_UNSAFE, &req->r_req_flags)) {
|
||||
struct ceph_inode_info *ci = ceph_inode(req->r_unsafe_dir);
|
||||
spin_lock(&ci->i_unsafe_lock);
|
||||
list_del_init(&req->r_unsafe_dir_item);
|
||||
spin_unlock(&ci->i_unsafe_lock);
|
||||
}
|
||||
if (req->r_target_inode && req->r_got_unsafe) {
|
||||
if (req->r_target_inode &&
|
||||
test_bit(CEPH_MDS_R_GOT_UNSAFE, &req->r_req_flags)) {
|
||||
struct ceph_inode_info *ci = ceph_inode(req->r_target_inode);
|
||||
spin_lock(&ci->i_unsafe_lock);
|
||||
list_del_init(&req->r_unsafe_target_item);
|
||||
@@ -667,6 +672,28 @@ static void __unregister_request(struct ceph_mds_client *mdsc,
|
||||
ceph_mdsc_put_request(req);
|
||||
}
|
||||
|
||||
/*
|
||||
* Walk back up the dentry tree until we hit a dentry representing a
|
||||
* non-snapshot inode. We do this using the rcu_read_lock (which must be held
|
||||
* when calling this) to ensure that the objects won't disappear while we're
|
||||
* working with them. Once we hit a candidate dentry, we attempt to take a
|
||||
* reference to it, and return that as the result.
|
||||
*/
|
||||
static struct inode *get_nonsnap_parent(struct dentry *dentry)
|
||||
{
|
||||
struct inode *inode = NULL;
|
||||
|
||||
while (dentry && !IS_ROOT(dentry)) {
|
||||
inode = d_inode_rcu(dentry);
|
||||
if (!inode || ceph_snap(inode) == CEPH_NOSNAP)
|
||||
break;
|
||||
dentry = dentry->d_parent;
|
||||
}
|
||||
if (inode)
|
||||
inode = igrab(inode);
|
||||
return inode;
|
||||
}
|
||||
|
||||
/*
|
||||
* Choose mds to send request to next. If there is a hint set in the
|
||||
* request (e.g., due to a prior forward hint from the mds), use that.
|
||||
@@ -675,19 +702,6 @@ static void __unregister_request(struct ceph_mds_client *mdsc,
|
||||
*
|
||||
* Called under mdsc->mutex.
|
||||
*/
|
||||
static struct dentry *get_nonsnap_parent(struct dentry *dentry)
|
||||
{
|
||||
/*
|
||||
* we don't need to worry about protecting the d_parent access
|
||||
* here because we never renaming inside the snapped namespace
|
||||
* except to resplice to another snapdir, and either the old or new
|
||||
* result is a valid result.
|
||||
*/
|
||||
while (!IS_ROOT(dentry) && ceph_snap(d_inode(dentry)) != CEPH_NOSNAP)
|
||||
dentry = dentry->d_parent;
|
||||
return dentry;
|
||||
}
|
||||
|
||||
static int __choose_mds(struct ceph_mds_client *mdsc,
|
||||
struct ceph_mds_request *req)
|
||||
{
|
||||
@@ -697,7 +711,7 @@ static int __choose_mds(struct ceph_mds_client *mdsc,
|
||||
int mode = req->r_direct_mode;
|
||||
int mds = -1;
|
||||
u32 hash = req->r_direct_hash;
|
||||
bool is_hash = req->r_direct_is_hash;
|
||||
bool is_hash = test_bit(CEPH_MDS_R_DIRECT_IS_HASH, &req->r_req_flags);
|
||||
|
||||
/*
|
||||
* is there a specific mds we should try? ignore hint if we have
|
||||
@@ -717,30 +731,39 @@ static int __choose_mds(struct ceph_mds_client *mdsc,
|
||||
inode = NULL;
|
||||
if (req->r_inode) {
|
||||
inode = req->r_inode;
|
||||
ihold(inode);
|
||||
} else if (req->r_dentry) {
|
||||
/* ignore race with rename; old or new d_parent is okay */
|
||||
struct dentry *parent = req->r_dentry->d_parent;
|
||||
struct inode *dir = d_inode(parent);
|
||||
struct dentry *parent;
|
||||
struct inode *dir;
|
||||
|
||||
if (dir->i_sb != mdsc->fsc->sb) {
|
||||
/* not this fs! */
|
||||
rcu_read_lock();
|
||||
parent = req->r_dentry->d_parent;
|
||||
dir = req->r_parent ? : d_inode_rcu(parent);
|
||||
|
||||
if (!dir || dir->i_sb != mdsc->fsc->sb) {
|
||||
/* not this fs or parent went negative */
|
||||
inode = d_inode(req->r_dentry);
|
||||
if (inode)
|
||||
ihold(inode);
|
||||
} else if (ceph_snap(dir) != CEPH_NOSNAP) {
|
||||
/* direct snapped/virtual snapdir requests
|
||||
* based on parent dir inode */
|
||||
struct dentry *dn = get_nonsnap_parent(parent);
|
||||
inode = d_inode(dn);
|
||||
inode = get_nonsnap_parent(parent);
|
||||
dout("__choose_mds using nonsnap parent %p\n", inode);
|
||||
} else {
|
||||
/* dentry target */
|
||||
inode = d_inode(req->r_dentry);
|
||||
if (!inode || mode == USE_AUTH_MDS) {
|
||||
/* dir + name */
|
||||
inode = dir;
|
||||
inode = igrab(dir);
|
||||
hash = ceph_dentry_hash(dir, req->r_dentry);
|
||||
is_hash = true;
|
||||
} else {
|
||||
ihold(inode);
|
||||
}
|
||||
}
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
||||
dout("__choose_mds %p is_hash=%d (%d) mode %d\n", inode, (int)is_hash,
|
||||
@@ -769,7 +792,7 @@ static int __choose_mds(struct ceph_mds_client *mdsc,
|
||||
(int)r, frag.ndist);
|
||||
if (ceph_mdsmap_get_state(mdsc->mdsmap, mds) >=
|
||||
CEPH_MDS_STATE_ACTIVE)
|
||||
return mds;
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* since this file/dir wasn't known to be
|
||||
@@ -784,7 +807,7 @@ static int __choose_mds(struct ceph_mds_client *mdsc,
|
||||
inode, ceph_vinop(inode), frag.frag, mds);
|
||||
if (ceph_mdsmap_get_state(mdsc->mdsmap, mds) >=
|
||||
CEPH_MDS_STATE_ACTIVE)
|
||||
return mds;
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -797,6 +820,7 @@ static int __choose_mds(struct ceph_mds_client *mdsc,
|
||||
cap = rb_entry(rb_first(&ci->i_caps), struct ceph_cap, ci_node);
|
||||
if (!cap) {
|
||||
spin_unlock(&ci->i_ceph_lock);
|
||||
iput(inode);
|
||||
goto random;
|
||||
}
|
||||
mds = cap->session->s_mds;
|
||||
@@ -804,6 +828,8 @@ static int __choose_mds(struct ceph_mds_client *mdsc,
|
||||
inode, ceph_vinop(inode), mds,
|
||||
cap == ci->i_auth_cap ? "auth " : "", cap);
|
||||
spin_unlock(&ci->i_ceph_lock);
|
||||
out:
|
||||
iput(inode);
|
||||
return mds;
|
||||
|
||||
random:
|
||||
@@ -1036,7 +1062,6 @@ static void cleanup_session_requests(struct ceph_mds_client *mdsc,
|
||||
while (!list_empty(&session->s_unsafe)) {
|
||||
req = list_first_entry(&session->s_unsafe,
|
||||
struct ceph_mds_request, r_unsafe_item);
|
||||
list_del_init(&req->r_unsafe_item);
|
||||
pr_warn_ratelimited(" dropping unsafe request %llu\n",
|
||||
req->r_tid);
|
||||
__unregister_request(mdsc, req);
|
||||
@@ -1146,7 +1171,7 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap,
|
||||
ci->i_ceph_flags |= CEPH_I_CAP_DROPPED;
|
||||
|
||||
if (ci->i_wrbuffer_ref > 0 &&
|
||||
ACCESS_ONCE(fsc->mount_state) == CEPH_MOUNT_SHUTDOWN)
|
||||
READ_ONCE(fsc->mount_state) == CEPH_MOUNT_SHUTDOWN)
|
||||
invalidate = true;
|
||||
|
||||
while (!list_empty(&ci->i_cap_flush_list)) {
|
||||
@@ -1775,18 +1800,23 @@ retry:
|
||||
return path;
|
||||
}
|
||||
|
||||
static int build_dentry_path(struct dentry *dentry,
|
||||
static int build_dentry_path(struct dentry *dentry, struct inode *dir,
|
||||
const char **ppath, int *ppathlen, u64 *pino,
|
||||
int *pfreepath)
|
||||
{
|
||||
char *path;
|
||||
|
||||
if (ceph_snap(d_inode(dentry->d_parent)) == CEPH_NOSNAP) {
|
||||
*pino = ceph_ino(d_inode(dentry->d_parent));
|
||||
rcu_read_lock();
|
||||
if (!dir)
|
||||
dir = d_inode_rcu(dentry->d_parent);
|
||||
if (dir && ceph_snap(dir) == CEPH_NOSNAP) {
|
||||
*pino = ceph_ino(dir);
|
||||
rcu_read_unlock();
|
||||
*ppath = dentry->d_name.name;
|
||||
*ppathlen = dentry->d_name.len;
|
||||
return 0;
|
||||
}
|
||||
rcu_read_unlock();
|
||||
path = ceph_mdsc_build_path(dentry, ppathlen, pino, 1);
|
||||
if (IS_ERR(path))
|
||||
return PTR_ERR(path);
|
||||
@@ -1822,8 +1852,8 @@ static int build_inode_path(struct inode *inode,
|
||||
* an explicit ino+path.
|
||||
*/
|
||||
static int set_request_path_attr(struct inode *rinode, struct dentry *rdentry,
|
||||
const char *rpath, u64 rino,
|
||||
const char **ppath, int *pathlen,
|
||||
struct inode *rdiri, const char *rpath,
|
||||
u64 rino, const char **ppath, int *pathlen,
|
||||
u64 *ino, int *freepath)
|
||||
{
|
||||
int r = 0;
|
||||
@@ -1833,7 +1863,8 @@ static int set_request_path_attr(struct inode *rinode, struct dentry *rdentry,
|
||||
dout(" inode %p %llx.%llx\n", rinode, ceph_ino(rinode),
|
||||
ceph_snap(rinode));
|
||||
} else if (rdentry) {
|
||||
r = build_dentry_path(rdentry, ppath, pathlen, ino, freepath);
|
||||
r = build_dentry_path(rdentry, rdiri, ppath, pathlen, ino,
|
||||
freepath);
|
||||
dout(" dentry %p %llx/%.*s\n", rdentry, *ino, *pathlen,
|
||||
*ppath);
|
||||
} else if (rpath || rino) {
|
||||
@@ -1866,7 +1897,7 @@ static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc,
|
||||
int ret;
|
||||
|
||||
ret = set_request_path_attr(req->r_inode, req->r_dentry,
|
||||
req->r_path1, req->r_ino1.ino,
|
||||
req->r_parent, req->r_path1, req->r_ino1.ino,
|
||||
&path1, &pathlen1, &ino1, &freepath1);
|
||||
if (ret < 0) {
|
||||
msg = ERR_PTR(ret);
|
||||
@@ -1874,6 +1905,7 @@ static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc,
|
||||
}
|
||||
|
||||
ret = set_request_path_attr(NULL, req->r_old_dentry,
|
||||
req->r_old_dentry_dir,
|
||||
req->r_path2, req->r_ino2.ino,
|
||||
&path2, &pathlen2, &ino2, &freepath2);
|
||||
if (ret < 0) {
|
||||
@@ -1927,10 +1959,13 @@ static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc,
|
||||
mds, req->r_inode_drop, req->r_inode_unless, 0);
|
||||
if (req->r_dentry_drop)
|
||||
releases += ceph_encode_dentry_release(&p, req->r_dentry,
|
||||
mds, req->r_dentry_drop, req->r_dentry_unless);
|
||||
req->r_parent, mds, req->r_dentry_drop,
|
||||
req->r_dentry_unless);
|
||||
if (req->r_old_dentry_drop)
|
||||
releases += ceph_encode_dentry_release(&p, req->r_old_dentry,
|
||||
mds, req->r_old_dentry_drop, req->r_old_dentry_unless);
|
||||
req->r_old_dentry_dir, mds,
|
||||
req->r_old_dentry_drop,
|
||||
req->r_old_dentry_unless);
|
||||
if (req->r_old_inode_drop)
|
||||
releases += ceph_encode_inode_release(&p,
|
||||
d_inode(req->r_old_dentry),
|
||||
@@ -2012,7 +2047,7 @@ static int __prepare_send_request(struct ceph_mds_client *mdsc,
|
||||
dout("prepare_send_request %p tid %lld %s (attempt %d)\n", req,
|
||||
req->r_tid, ceph_mds_op_name(req->r_op), req->r_attempts);
|
||||
|
||||
if (req->r_got_unsafe) {
|
||||
if (test_bit(CEPH_MDS_R_GOT_UNSAFE, &req->r_req_flags)) {
|
||||
void *p;
|
||||
/*
|
||||
* Replay. Do not regenerate message (and rebuild
|
||||
@@ -2061,16 +2096,16 @@ static int __prepare_send_request(struct ceph_mds_client *mdsc,
|
||||
|
||||
rhead = msg->front.iov_base;
|
||||
rhead->oldest_client_tid = cpu_to_le64(__get_oldest_tid(mdsc));
|
||||
if (req->r_got_unsafe)
|
||||
if (test_bit(CEPH_MDS_R_GOT_UNSAFE, &req->r_req_flags))
|
||||
flags |= CEPH_MDS_FLAG_REPLAY;
|
||||
if (req->r_locked_dir)
|
||||
if (req->r_parent)
|
||||
flags |= CEPH_MDS_FLAG_WANT_DENTRY;
|
||||
rhead->flags = cpu_to_le32(flags);
|
||||
rhead->num_fwd = req->r_num_fwd;
|
||||
rhead->num_retry = req->r_attempts - 1;
|
||||
rhead->ino = 0;
|
||||
|
||||
dout(" r_locked_dir = %p\n", req->r_locked_dir);
|
||||
dout(" r_parent = %p\n", req->r_parent);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -2084,8 +2119,8 @@ static int __do_request(struct ceph_mds_client *mdsc,
|
||||
int mds = -1;
|
||||
int err = 0;
|
||||
|
||||
if (req->r_err || req->r_got_result) {
|
||||
if (req->r_aborted)
|
||||
if (req->r_err || test_bit(CEPH_MDS_R_GOT_RESULT, &req->r_req_flags)) {
|
||||
if (test_bit(CEPH_MDS_R_ABORTED, &req->r_req_flags))
|
||||
__unregister_request(mdsc, req);
|
||||
goto out;
|
||||
}
|
||||
@@ -2096,12 +2131,12 @@ static int __do_request(struct ceph_mds_client *mdsc,
|
||||
err = -EIO;
|
||||
goto finish;
|
||||
}
|
||||
if (ACCESS_ONCE(mdsc->fsc->mount_state) == CEPH_MOUNT_SHUTDOWN) {
|
||||
if (READ_ONCE(mdsc->fsc->mount_state) == CEPH_MOUNT_SHUTDOWN) {
|
||||
dout("do_request forced umount\n");
|
||||
err = -EIO;
|
||||
goto finish;
|
||||
}
|
||||
if (ACCESS_ONCE(mdsc->fsc->mount_state) == CEPH_MOUNT_MOUNTING) {
|
||||
if (READ_ONCE(mdsc->fsc->mount_state) == CEPH_MOUNT_MOUNTING) {
|
||||
if (mdsc->mdsmap_err) {
|
||||
err = mdsc->mdsmap_err;
|
||||
dout("do_request mdsmap err %d\n", err);
|
||||
@@ -2215,7 +2250,7 @@ static void kick_requests(struct ceph_mds_client *mdsc, int mds)
|
||||
while (p) {
|
||||
req = rb_entry(p, struct ceph_mds_request, r_node);
|
||||
p = rb_next(p);
|
||||
if (req->r_got_unsafe)
|
||||
if (test_bit(CEPH_MDS_R_GOT_UNSAFE, &req->r_req_flags))
|
||||
continue;
|
||||
if (req->r_attempts > 0)
|
||||
continue; /* only new requests */
|
||||
@@ -2250,11 +2285,11 @@ int ceph_mdsc_do_request(struct ceph_mds_client *mdsc,
|
||||
|
||||
dout("do_request on %p\n", req);
|
||||
|
||||
/* take CAP_PIN refs for r_inode, r_locked_dir, r_old_dentry */
|
||||
/* take CAP_PIN refs for r_inode, r_parent, r_old_dentry */
|
||||
if (req->r_inode)
|
||||
ceph_get_cap_refs(ceph_inode(req->r_inode), CEPH_CAP_PIN);
|
||||
if (req->r_locked_dir)
|
||||
ceph_get_cap_refs(ceph_inode(req->r_locked_dir), CEPH_CAP_PIN);
|
||||
if (req->r_parent)
|
||||
ceph_get_cap_refs(ceph_inode(req->r_parent), CEPH_CAP_PIN);
|
||||
if (req->r_old_dentry_dir)
|
||||
ceph_get_cap_refs(ceph_inode(req->r_old_dentry_dir),
|
||||
CEPH_CAP_PIN);
|
||||
@@ -2289,7 +2324,7 @@ int ceph_mdsc_do_request(struct ceph_mds_client *mdsc,
|
||||
mutex_lock(&mdsc->mutex);
|
||||
|
||||
/* only abort if we didn't race with a real reply */
|
||||
if (req->r_got_result) {
|
||||
if (test_bit(CEPH_MDS_R_GOT_RESULT, &req->r_req_flags)) {
|
||||
err = le32_to_cpu(req->r_reply_info.head->result);
|
||||
} else if (err < 0) {
|
||||
dout("aborted request %lld with %d\n", req->r_tid, err);
|
||||
@@ -2301,10 +2336,10 @@ int ceph_mdsc_do_request(struct ceph_mds_client *mdsc,
|
||||
*/
|
||||
mutex_lock(&req->r_fill_mutex);
|
||||
req->r_err = err;
|
||||
req->r_aborted = true;
|
||||
set_bit(CEPH_MDS_R_ABORTED, &req->r_req_flags);
|
||||
mutex_unlock(&req->r_fill_mutex);
|
||||
|
||||
if (req->r_locked_dir &&
|
||||
if (req->r_parent &&
|
||||
(req->r_op & CEPH_MDS_OP_WRITE))
|
||||
ceph_invalidate_dir_request(req);
|
||||
} else {
|
||||
@@ -2323,7 +2358,7 @@ out:
|
||||
*/
|
||||
void ceph_invalidate_dir_request(struct ceph_mds_request *req)
|
||||
{
|
||||
struct inode *inode = req->r_locked_dir;
|
||||
struct inode *inode = req->r_parent;
|
||||
|
||||
dout("invalidate_dir_request %p (complete, lease(s))\n", inode);
|
||||
|
||||
@@ -2379,14 +2414,14 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg)
|
||||
}
|
||||
|
||||
/* dup? */
|
||||
if ((req->r_got_unsafe && !head->safe) ||
|
||||
(req->r_got_safe && head->safe)) {
|
||||
if ((test_bit(CEPH_MDS_R_GOT_UNSAFE, &req->r_req_flags) && !head->safe) ||
|
||||
(test_bit(CEPH_MDS_R_GOT_SAFE, &req->r_req_flags) && head->safe)) {
|
||||
pr_warn("got a dup %s reply on %llu from mds%d\n",
|
||||
head->safe ? "safe" : "unsafe", tid, mds);
|
||||
mutex_unlock(&mdsc->mutex);
|
||||
goto out;
|
||||
}
|
||||
if (req->r_got_safe) {
|
||||
if (test_bit(CEPH_MDS_R_GOT_SAFE, &req->r_req_flags)) {
|
||||
pr_warn("got unsafe after safe on %llu from mds%d\n",
|
||||
tid, mds);
|
||||
mutex_unlock(&mdsc->mutex);
|
||||
@@ -2425,10 +2460,10 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg)
|
||||
|
||||
|
||||
if (head->safe) {
|
||||
req->r_got_safe = true;
|
||||
set_bit(CEPH_MDS_R_GOT_SAFE, &req->r_req_flags);
|
||||
__unregister_request(mdsc, req);
|
||||
|
||||
if (req->r_got_unsafe) {
|
||||
if (test_bit(CEPH_MDS_R_GOT_UNSAFE, &req->r_req_flags)) {
|
||||
/*
|
||||
* We already handled the unsafe response, now do the
|
||||
* cleanup. No need to examine the response; the MDS
|
||||
@@ -2437,7 +2472,6 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg)
|
||||
* useful we could do with a revised return value.
|
||||
*/
|
||||
dout("got safe reply %llu, mds%d\n", tid, mds);
|
||||
list_del_init(&req->r_unsafe_item);
|
||||
|
||||
/* last unsafe request during umount? */
|
||||
if (mdsc->stopping && !__get_oldest_req(mdsc))
|
||||
@@ -2446,7 +2480,7 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg)
|
||||
goto out;
|
||||
}
|
||||
} else {
|
||||
req->r_got_unsafe = true;
|
||||
set_bit(CEPH_MDS_R_GOT_UNSAFE, &req->r_req_flags);
|
||||
list_add_tail(&req->r_unsafe_item, &req->r_session->s_unsafe);
|
||||
if (req->r_unsafe_dir) {
|
||||
struct ceph_inode_info *ci =
|
||||
@@ -2486,7 +2520,7 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg)
|
||||
/* insert trace into our cache */
|
||||
mutex_lock(&req->r_fill_mutex);
|
||||
current->journal_info = req;
|
||||
err = ceph_fill_trace(mdsc->fsc->sb, req, req->r_session);
|
||||
err = ceph_fill_trace(mdsc->fsc->sb, req);
|
||||
if (err == 0) {
|
||||
if (result == 0 && (req->r_op == CEPH_MDS_OP_READDIR ||
|
||||
req->r_op == CEPH_MDS_OP_LSSNAP))
|
||||
@@ -2500,7 +2534,8 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg)
|
||||
if (realm)
|
||||
ceph_put_snap_realm(mdsc, realm);
|
||||
|
||||
if (err == 0 && req->r_got_unsafe && req->r_target_inode) {
|
||||
if (err == 0 && req->r_target_inode &&
|
||||
test_bit(CEPH_MDS_R_GOT_UNSAFE, &req->r_req_flags)) {
|
||||
struct ceph_inode_info *ci = ceph_inode(req->r_target_inode);
|
||||
spin_lock(&ci->i_unsafe_lock);
|
||||
list_add_tail(&req->r_unsafe_target_item, &ci->i_unsafe_iops);
|
||||
@@ -2508,12 +2543,12 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg)
|
||||
}
|
||||
out_err:
|
||||
mutex_lock(&mdsc->mutex);
|
||||
if (!req->r_aborted) {
|
||||
if (!test_bit(CEPH_MDS_R_ABORTED, &req->r_req_flags)) {
|
||||
if (err) {
|
||||
req->r_err = err;
|
||||
} else {
|
||||
req->r_reply = ceph_msg_get(msg);
|
||||
req->r_got_result = true;
|
||||
set_bit(CEPH_MDS_R_GOT_RESULT, &req->r_req_flags);
|
||||
}
|
||||
} else {
|
||||
dout("reply arrived after request %lld was aborted\n", tid);
|
||||
@@ -2557,7 +2592,7 @@ static void handle_forward(struct ceph_mds_client *mdsc,
|
||||
goto out; /* dup reply? */
|
||||
}
|
||||
|
||||
if (req->r_aborted) {
|
||||
if (test_bit(CEPH_MDS_R_ABORTED, &req->r_req_flags)) {
|
||||
dout("forward tid %llu aborted, unregistering\n", tid);
|
||||
__unregister_request(mdsc, req);
|
||||
} else if (fwd_seq <= req->r_num_fwd) {
|
||||
@@ -2567,7 +2602,7 @@ static void handle_forward(struct ceph_mds_client *mdsc,
|
||||
/* resend. forward race not possible; mds would drop */
|
||||
dout("forward tid %llu to mds%d (we resend)\n", tid, next_mds);
|
||||
BUG_ON(req->r_err);
|
||||
BUG_ON(req->r_got_result);
|
||||
BUG_ON(test_bit(CEPH_MDS_R_GOT_RESULT, &req->r_req_flags));
|
||||
req->r_attempts = 0;
|
||||
req->r_num_fwd = fwd_seq;
|
||||
req->r_resend_mds = next_mds;
|
||||
@@ -2732,7 +2767,7 @@ static void replay_unsafe_requests(struct ceph_mds_client *mdsc,
|
||||
while (p) {
|
||||
req = rb_entry(p, struct ceph_mds_request, r_node);
|
||||
p = rb_next(p);
|
||||
if (req->r_got_unsafe)
|
||||
if (test_bit(CEPH_MDS_R_GOT_UNSAFE, &req->r_req_flags))
|
||||
continue;
|
||||
if (req->r_attempts == 0)
|
||||
continue; /* only old requests */
|
||||
@@ -3556,7 +3591,7 @@ void ceph_mdsc_sync(struct ceph_mds_client *mdsc)
|
||||
{
|
||||
u64 want_tid, want_flush;
|
||||
|
||||
if (ACCESS_ONCE(mdsc->fsc->mount_state) == CEPH_MOUNT_SHUTDOWN)
|
||||
if (READ_ONCE(mdsc->fsc->mount_state) == CEPH_MOUNT_SHUTDOWN)
|
||||
return;
|
||||
|
||||
dout("sync\n");
|
||||
@@ -3587,7 +3622,7 @@ void ceph_mdsc_sync(struct ceph_mds_client *mdsc)
|
||||
*/
|
||||
static bool done_closing_sessions(struct ceph_mds_client *mdsc, int skipped)
|
||||
{
|
||||
if (ACCESS_ONCE(mdsc->fsc->mount_state) == CEPH_MOUNT_SHUTDOWN)
|
||||
if (READ_ONCE(mdsc->fsc->mount_state) == CEPH_MOUNT_SHUTDOWN)
|
||||
return true;
|
||||
return atomic_read(&mdsc->num_sessions) <= skipped;
|
||||
}
|
||||
|
||||
@@ -202,9 +202,18 @@ struct ceph_mds_request {
|
||||
char *r_path1, *r_path2;
|
||||
struct ceph_vino r_ino1, r_ino2;
|
||||
|
||||
struct inode *r_locked_dir; /* dir (if any) i_mutex locked by vfs */
|
||||
struct inode *r_parent; /* parent dir inode */
|
||||
struct inode *r_target_inode; /* resulting inode */
|
||||
|
||||
#define CEPH_MDS_R_DIRECT_IS_HASH (1) /* r_direct_hash is valid */
|
||||
#define CEPH_MDS_R_ABORTED (2) /* call was aborted */
|
||||
#define CEPH_MDS_R_GOT_UNSAFE (3) /* got an unsafe reply */
|
||||
#define CEPH_MDS_R_GOT_SAFE (4) /* got a safe reply */
|
||||
#define CEPH_MDS_R_GOT_RESULT (5) /* got a result */
|
||||
#define CEPH_MDS_R_DID_PREPOPULATE (6) /* prepopulated readdir */
|
||||
#define CEPH_MDS_R_PARENT_LOCKED (7) /* is r_parent->i_rwsem wlocked? */
|
||||
unsigned long r_req_flags;
|
||||
|
||||
struct mutex r_fill_mutex;
|
||||
|
||||
union ceph_mds_request_args r_args;
|
||||
@@ -216,7 +225,6 @@ struct ceph_mds_request {
|
||||
/* for choosing which mds to send this request to */
|
||||
int r_direct_mode;
|
||||
u32 r_direct_hash; /* choose dir frag based on this dentry hash */
|
||||
bool r_direct_is_hash; /* true if r_direct_hash is valid */
|
||||
|
||||
/* data payload is used for xattr ops */
|
||||
struct ceph_pagelist *r_pagelist;
|
||||
@@ -234,7 +242,6 @@ struct ceph_mds_request {
|
||||
struct ceph_mds_reply_info_parsed r_reply_info;
|
||||
struct page *r_locked_page;
|
||||
int r_err;
|
||||
bool r_aborted;
|
||||
|
||||
unsigned long r_timeout; /* optional. jiffies, 0 is "wait forever" */
|
||||
unsigned long r_started; /* start time to measure timeout against */
|
||||
@@ -262,9 +269,7 @@ struct ceph_mds_request {
|
||||
ceph_mds_request_callback_t r_callback;
|
||||
ceph_mds_request_wait_callback_t r_wait_for_completion;
|
||||
struct list_head r_unsafe_item; /* per-session unsafe list item */
|
||||
bool r_got_unsafe, r_got_safe, r_got_result;
|
||||
|
||||
bool r_did_prepopulate;
|
||||
long long r_dir_release_cnt;
|
||||
long long r_dir_ordered_cnt;
|
||||
int r_readdir_cache_idx;
|
||||
|
||||
@@ -757,7 +757,6 @@ static const struct super_operations ceph_super_ops = {
|
||||
.destroy_inode = ceph_destroy_inode,
|
||||
.write_inode = ceph_write_inode,
|
||||
.drop_inode = ceph_drop_inode,
|
||||
.evict_inode = ceph_evict_inode,
|
||||
.sync_fs = ceph_sync_fs,
|
||||
.put_super = ceph_put_super,
|
||||
.show_options = ceph_show_options,
|
||||
@@ -952,6 +951,14 @@ static int ceph_register_bdi(struct super_block *sb,
|
||||
fsc->backing_dev_info.ra_pages =
|
||||
VM_MAX_READAHEAD * 1024 / PAGE_SIZE;
|
||||
|
||||
if (fsc->mount_options->rsize > fsc->mount_options->rasize &&
|
||||
fsc->mount_options->rsize >= PAGE_SIZE)
|
||||
fsc->backing_dev_info.io_pages =
|
||||
(fsc->mount_options->rsize + PAGE_SIZE - 1)
|
||||
>> PAGE_SHIFT;
|
||||
else if (fsc->mount_options->rsize == 0)
|
||||
fsc->backing_dev_info.io_pages = ULONG_MAX;
|
||||
|
||||
err = bdi_register(&fsc->backing_dev_info, NULL, "ceph-%ld",
|
||||
atomic_long_inc_return(&bdi_seq));
|
||||
if (!err)
|
||||
|
||||
@@ -45,8 +45,8 @@
|
||||
#define ceph_test_mount_opt(fsc, opt) \
|
||||
(!!((fsc)->mount_options->flags & CEPH_MOUNT_OPT_##opt))
|
||||
|
||||
#define CEPH_RSIZE_DEFAULT 0 /* max read size */
|
||||
#define CEPH_RASIZE_DEFAULT (8192*1024) /* readahead */
|
||||
#define CEPH_RSIZE_DEFAULT (64*1024*1024) /* max read size */
|
||||
#define CEPH_RASIZE_DEFAULT (8192*1024) /* max readahead */
|
||||
#define CEPH_MAX_READDIR_DEFAULT 1024
|
||||
#define CEPH_MAX_READDIR_BYTES_DEFAULT (512*1024)
|
||||
#define CEPH_SNAPDIRNAME_DEFAULT ".snap"
|
||||
@@ -343,7 +343,6 @@ struct ceph_inode_info {
|
||||
u32 i_rdcache_gen; /* incremented each time we get FILE_CACHE. */
|
||||
u32 i_rdcache_revoking; /* RDCACHE gen to async invalidate, if any */
|
||||
|
||||
struct list_head i_unsafe_writes; /* uncommitted sync writes */
|
||||
struct list_head i_unsafe_dirops; /* uncommitted mds dir ops */
|
||||
struct list_head i_unsafe_iops; /* uncommitted mds inode ops */
|
||||
spinlock_t i_unsafe_lock;
|
||||
@@ -602,7 +601,7 @@ static inline int __ceph_caps_wanted(struct ceph_inode_info *ci)
|
||||
}
|
||||
|
||||
/* what the mds thinks we want */
|
||||
extern int __ceph_caps_mds_wanted(struct ceph_inode_info *ci);
|
||||
extern int __ceph_caps_mds_wanted(struct ceph_inode_info *ci, bool check);
|
||||
|
||||
extern void ceph_caps_init(struct ceph_mds_client *mdsc);
|
||||
extern void ceph_caps_finalize(struct ceph_mds_client *mdsc);
|
||||
@@ -753,7 +752,6 @@ extern const struct inode_operations ceph_file_iops;
|
||||
extern struct inode *ceph_alloc_inode(struct super_block *sb);
|
||||
extern void ceph_destroy_inode(struct inode *inode);
|
||||
extern int ceph_drop_inode(struct inode *inode);
|
||||
extern void ceph_evict_inode(struct inode *inode);
|
||||
|
||||
extern struct inode *ceph_get_inode(struct super_block *sb,
|
||||
struct ceph_vino vino);
|
||||
@@ -764,8 +762,7 @@ extern void ceph_fill_file_time(struct inode *inode, int issued,
|
||||
u64 time_warp_seq, struct timespec *ctime,
|
||||
struct timespec *mtime, struct timespec *atime);
|
||||
extern int ceph_fill_trace(struct super_block *sb,
|
||||
struct ceph_mds_request *req,
|
||||
struct ceph_mds_session *session);
|
||||
struct ceph_mds_request *req);
|
||||
extern int ceph_readdir_prepopulate(struct ceph_mds_request *req,
|
||||
struct ceph_mds_session *session);
|
||||
|
||||
@@ -904,6 +901,7 @@ extern void ceph_flush_dirty_caps(struct ceph_mds_client *mdsc);
|
||||
extern int ceph_encode_inode_release(void **p, struct inode *inode,
|
||||
int mds, int drop, int unless, int force);
|
||||
extern int ceph_encode_dentry_release(void **p, struct dentry *dn,
|
||||
struct inode *dir,
|
||||
int mds, int drop, int unless);
|
||||
|
||||
extern int ceph_get_caps(struct ceph_inode_info *ci, int need, int want,
|
||||
@@ -933,7 +931,7 @@ extern int ceph_atomic_open(struct inode *dir, struct dentry *dentry,
|
||||
extern int ceph_release(struct inode *inode, struct file *filp);
|
||||
extern void ceph_fill_inline_data(struct inode *inode, struct page *locked_page,
|
||||
char *data, size_t len);
|
||||
extern void ceph_sync_write_wait(struct inode *inode);
|
||||
|
||||
/* dir.c */
|
||||
extern const struct file_operations ceph_dir_fops;
|
||||
extern const struct file_operations ceph_snapdir_fops;
|
||||
|
||||
@@ -22,7 +22,6 @@ struct ceph_osd_client;
|
||||
* completion callback for async writepages
|
||||
*/
|
||||
typedef void (*ceph_osdc_callback_t)(struct ceph_osd_request *);
|
||||
typedef void (*ceph_osdc_unsafe_callback_t)(struct ceph_osd_request *, bool);
|
||||
|
||||
#define CEPH_HOMELESS_OSD -1
|
||||
|
||||
@@ -170,15 +169,12 @@ struct ceph_osd_request {
|
||||
unsigned int r_num_ops;
|
||||
|
||||
int r_result;
|
||||
bool r_got_reply;
|
||||
|
||||
struct ceph_osd_client *r_osdc;
|
||||
struct kref r_kref;
|
||||
bool r_mempool;
|
||||
struct completion r_completion;
|
||||
struct completion r_done_completion; /* fsync waiter */
|
||||
struct completion r_completion; /* private to osd_client.c */
|
||||
ceph_osdc_callback_t r_callback;
|
||||
ceph_osdc_unsafe_callback_t r_unsafe_callback;
|
||||
struct list_head r_unsafe_item;
|
||||
|
||||
struct inode *r_inode; /* for use by callbacks */
|
||||
|
||||
@@ -57,7 +57,7 @@ static inline bool ceph_can_shift_osds(struct ceph_pg_pool_info *pool)
|
||||
case CEPH_POOL_TYPE_EC:
|
||||
return false;
|
||||
default:
|
||||
BUG_ON(1);
|
||||
BUG();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -81,13 +81,6 @@ void ceph_oloc_copy(struct ceph_object_locator *dest,
|
||||
const struct ceph_object_locator *src);
|
||||
void ceph_oloc_destroy(struct ceph_object_locator *oloc);
|
||||
|
||||
/*
|
||||
* Maximum supported by kernel client object name length
|
||||
*
|
||||
* (probably outdated: must be >= RBD_MAX_MD_NAME_LEN -- currently 100)
|
||||
*/
|
||||
#define CEPH_MAX_OID_NAME_LEN 100
|
||||
|
||||
/*
|
||||
* 51-char inline_name is long enough for all cephfs and all but one
|
||||
* rbd requests: <imgname> in "<imgname>.rbd"/"rbd_id.<imgname>" can be
|
||||
@@ -173,8 +166,8 @@ struct ceph_osdmap {
|
||||
* the list of osds that store+replicate them. */
|
||||
struct crush_map *crush;
|
||||
|
||||
struct mutex crush_scratch_mutex;
|
||||
int crush_scratch_ary[CEPH_PG_MAX_SIZE * 3];
|
||||
struct mutex crush_workspace_mutex;
|
||||
void *crush_workspace;
|
||||
};
|
||||
|
||||
static inline bool ceph_osd_exists(struct ceph_osdmap *map, int osd)
|
||||
|
||||
@@ -50,7 +50,7 @@ struct ceph_timespec {
|
||||
#define CEPH_PG_LAYOUT_LINEAR 2
|
||||
#define CEPH_PG_LAYOUT_HYBRID 3
|
||||
|
||||
#define CEPH_PG_MAX_SIZE 16 /* max # osds in a single pg */
|
||||
#define CEPH_PG_MAX_SIZE 32 /* max # osds in a single pg */
|
||||
|
||||
/*
|
||||
* placement group.
|
||||
|
||||
@@ -135,13 +135,6 @@ struct crush_bucket {
|
||||
__u32 size; /* num items */
|
||||
__s32 *items;
|
||||
|
||||
/*
|
||||
* cached random permutation: used for uniform bucket and for
|
||||
* the linear search fallback for the other bucket types.
|
||||
*/
|
||||
__u32 perm_x; /* @x for which *perm is defined */
|
||||
__u32 perm_n; /* num elements of *perm that are permuted/defined */
|
||||
__u32 *perm;
|
||||
};
|
||||
|
||||
struct crush_bucket_uniform {
|
||||
@@ -211,6 +204,21 @@ struct crush_map {
|
||||
* device fails. */
|
||||
__u8 chooseleaf_stable;
|
||||
|
||||
/*
|
||||
* This value is calculated after decode or construction by
|
||||
* the builder. It is exposed here (rather than having a
|
||||
* 'build CRUSH working space' function) so that callers can
|
||||
* reserve a static buffer, allocate space on the stack, or
|
||||
* otherwise avoid calling into the heap allocator if they
|
||||
* want to. The size of the working space depends on the map,
|
||||
* while the size of the scratch vector passed to the mapper
|
||||
* depends on the size of the desired result set.
|
||||
*
|
||||
* Nothing stops the caller from allocating both in one swell
|
||||
* foop and passing in two points, though.
|
||||
*/
|
||||
size_t working_size;
|
||||
|
||||
#ifndef __KERNEL__
|
||||
/*
|
||||
* version 0 (original) of straw_calc has various flaws. version 1
|
||||
@@ -248,4 +256,23 @@ static inline int crush_calc_tree_node(int i)
|
||||
return ((i+1) << 1)-1;
|
||||
}
|
||||
|
||||
/*
|
||||
* These data structures are private to the CRUSH implementation. They
|
||||
* are exposed in this header file because builder needs their
|
||||
* definitions to calculate the total working size.
|
||||
*
|
||||
* Moving this out of the crush map allow us to treat the CRUSH map as
|
||||
* immutable within the mapper and removes the requirement for a CRUSH
|
||||
* map lock.
|
||||
*/
|
||||
struct crush_work_bucket {
|
||||
__u32 perm_x; /* @x for which *perm is defined */
|
||||
__u32 perm_n; /* num elements of *perm that are permuted/defined */
|
||||
__u32 *perm; /* Permutation of the bucket's items */
|
||||
};
|
||||
|
||||
struct crush_work {
|
||||
struct crush_work_bucket **work; /* Per-bucket working store */
|
||||
};
|
||||
|
||||
#endif
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user