Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client

Pull Ceph updates from Sage Weil:
 "There is quite a bit here, including some overdue refactoring and
  cleanup on the mon_client and osd_client code from Ilya, scattered
  writeback support for CephFS and a pile of bug fixes from Zheng, and a
  few random cleanups and fixes from others"

[ I already decided not to pull this because of it having been rebased
  recently, but ended up changing my mind after all.  Next time I'll
  really hold people to it.  Oh well.   - Linus ]

* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client: (34 commits)
  libceph: use KMEM_CACHE macro
  ceph: use kmem_cache_zalloc
  rbd: use KMEM_CACHE macro
  ceph: use lookup request to revalidate dentry
  ceph: kill ceph_get_dentry_parent_inode()
  ceph: fix security xattr deadlock
  ceph: don't request vxattrs from MDS
  ceph: fix mounting same fs multiple times
  ceph: remove unnecessary NULL check
  ceph: avoid updating directory inode's i_size accidentally
  ceph: fix race during filling readdir cache
  libceph: use sizeof_footer() more
  ceph: kill ceph_empty_snapc
  ceph: fix a wrong comparison
  ceph: replace CURRENT_TIME by current_fs_time()
  ceph: scattered page writeback
  libceph: add helper that duplicates last extent operation
  libceph: enable large, variable-sized OSD requests
  libceph: osdc->req_mempool should be backed by a slab pool
  libceph: make r_request msg_size calculation clearer
  ...
This commit is contained in:
Linus Torvalds
2016-03-26 15:53:16 -07:00
22 changed files with 807 additions and 515 deletions
+3 -1
View File
@@ -361,7 +361,6 @@ ceph_parse_options(char *options, const char *dev_name,
opt->osd_keepalive_timeout = CEPH_OSD_KEEPALIVE_DEFAULT;
opt->mount_timeout = CEPH_MOUNT_TIMEOUT_DEFAULT;
opt->osd_idle_ttl = CEPH_OSD_IDLE_TTL_DEFAULT;
opt->monc_ping_timeout = CEPH_MONC_PING_TIMEOUT_DEFAULT;
/* get mon ip(s) */
/* ip1[:port1][,ip2[:port2]...] */
@@ -686,6 +685,9 @@ int __ceph_open_session(struct ceph_client *client, unsigned long started)
return client->auth_err;
}
pr_info("client%llu fsid %pU\n", ceph_client_id(client), &client->fsid);
ceph_debugfs_client_init(client);
return 0;
}
EXPORT_SYMBOL(__ceph_open_session);
+11 -6
View File
@@ -112,15 +112,20 @@ static int monc_show(struct seq_file *s, void *p)
struct ceph_mon_generic_request *req;
struct ceph_mon_client *monc = &client->monc;
struct rb_node *rp;
int i;
mutex_lock(&monc->mutex);
if (monc->have_mdsmap)
seq_printf(s, "have mdsmap %u\n", (unsigned int)monc->have_mdsmap);
if (monc->have_osdmap)
seq_printf(s, "have osdmap %u\n", (unsigned int)monc->have_osdmap);
if (monc->want_next_osdmap)
seq_printf(s, "want next osdmap\n");
for (i = 0; i < ARRAY_SIZE(monc->subs); i++) {
seq_printf(s, "have %s %u", ceph_sub_str[i],
monc->subs[i].have);
if (monc->subs[i].want)
seq_printf(s, " want %llu%s",
le64_to_cpu(monc->subs[i].item.start),
(monc->subs[i].item.flags &
CEPH_SUBSCRIBE_ONETIME ? "" : "+"));
seq_putc(s, '\n');
}
for (rp = rb_first(&monc->generic_request_tree); rp; rp = rb_next(rp)) {
__u16 op;
+5 -24
View File
@@ -235,18 +235,12 @@ static struct workqueue_struct *ceph_msgr_wq;
static int ceph_msgr_slab_init(void)
{
BUG_ON(ceph_msg_cache);
ceph_msg_cache = kmem_cache_create("ceph_msg",
sizeof (struct ceph_msg),
__alignof__(struct ceph_msg), 0, NULL);
ceph_msg_cache = KMEM_CACHE(ceph_msg, 0);
if (!ceph_msg_cache)
return -ENOMEM;
BUG_ON(ceph_msg_data_cache);
ceph_msg_data_cache = kmem_cache_create("ceph_msg_data",
sizeof (struct ceph_msg_data),
__alignof__(struct ceph_msg_data),
0, NULL);
ceph_msg_data_cache = KMEM_CACHE(ceph_msg_data, 0);
if (ceph_msg_data_cache)
return 0;
@@ -1221,25 +1215,19 @@ static void prepare_message_data(struct ceph_msg *msg, u32 data_len)
static void prepare_write_message_footer(struct ceph_connection *con)
{
struct ceph_msg *m = con->out_msg;
int v = con->out_kvec_left;
m->footer.flags |= CEPH_MSG_FOOTER_COMPLETE;
dout("prepare_write_message_footer %p\n", con);
con->out_kvec[v].iov_base = &m->footer;
con_out_kvec_add(con, sizeof_footer(con), &m->footer);
if (con->peer_features & CEPH_FEATURE_MSG_AUTH) {
if (con->ops->sign_message)
con->ops->sign_message(m);
else
m->footer.sig = 0;
con->out_kvec[v].iov_len = sizeof(m->footer);
con->out_kvec_bytes += sizeof(m->footer);
} else {
m->old_footer.flags = m->footer.flags;
con->out_kvec[v].iov_len = sizeof(m->old_footer);
con->out_kvec_bytes += sizeof(m->old_footer);
}
con->out_kvec_left++;
con->out_more = m->more_to_follow;
con->out_msg_done = true;
}
@@ -2409,11 +2397,7 @@ static int read_partial_message(struct ceph_connection *con)
}
/* footer */
if (need_sign)
size = sizeof(m->footer);
else
size = sizeof(m->old_footer);
size = sizeof_footer(con);
end += size;
ret = read_partial(con, end, size, &m->footer);
if (ret <= 0)
@@ -3089,10 +3073,7 @@ void ceph_msg_revoke(struct ceph_msg *msg)
con->out_skip += con_out_kvec_skip(con);
} else {
BUG_ON(!msg->data_length);
if (con->peer_features & CEPH_FEATURE_MSG_AUTH)
con->out_skip += sizeof(msg->footer);
else
con->out_skip += sizeof(msg->old_footer);
con->out_skip += sizeof_footer(con);
}
/* data, middle, front */
if (msg->data_length)
+249 -202
View File
File diff suppressed because it is too large Load Diff
+73 -36
View File
@@ -338,9 +338,10 @@ static void ceph_osdc_release_request(struct kref *kref)
ceph_put_snap_context(req->r_snapc);
if (req->r_mempool)
mempool_free(req, req->r_osdc->req_mempool);
else
else if (req->r_num_ops <= CEPH_OSD_SLAB_OPS)
kmem_cache_free(ceph_osd_request_cache, req);
else
kfree(req);
}
void ceph_osdc_get_request(struct ceph_osd_request *req)
@@ -369,28 +370,22 @@ struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc,
struct ceph_msg *msg;
size_t msg_size;
BUILD_BUG_ON(CEPH_OSD_MAX_OP > U16_MAX);
BUG_ON(num_ops > CEPH_OSD_MAX_OP);
msg_size = 4 + 4 + 8 + 8 + 4+8;
msg_size += 2 + 4 + 8 + 4 + 4; /* oloc */
msg_size += 1 + 8 + 4 + 4; /* pg_t */
msg_size += 4 + CEPH_MAX_OID_NAME_LEN; /* oid */
msg_size += 2 + num_ops*sizeof(struct ceph_osd_op);
msg_size += 8; /* snapid */
msg_size += 8; /* snap_seq */
msg_size += 8 * (snapc ? snapc->num_snaps : 0); /* snaps */
msg_size += 4;
if (use_mempool) {
BUG_ON(num_ops > CEPH_OSD_SLAB_OPS);
req = mempool_alloc(osdc->req_mempool, gfp_flags);
memset(req, 0, sizeof(*req));
} else if (num_ops <= CEPH_OSD_SLAB_OPS) {
req = kmem_cache_alloc(ceph_osd_request_cache, gfp_flags);
} else {
req = kmem_cache_zalloc(ceph_osd_request_cache, gfp_flags);
BUG_ON(num_ops > CEPH_OSD_MAX_OPS);
req = kmalloc(sizeof(*req) + num_ops * sizeof(req->r_ops[0]),
gfp_flags);
}
if (req == NULL)
if (unlikely(!req))
return NULL;
/* req only, each op is zeroed in _osd_req_op_init() */
memset(req, 0, sizeof(*req));
req->r_osdc = osdc;
req->r_mempool = use_mempool;
req->r_num_ops = num_ops;
@@ -408,18 +403,36 @@ struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc,
req->r_base_oloc.pool = -1;
req->r_target_oloc.pool = -1;
msg_size = OSD_OPREPLY_FRONT_LEN;
if (num_ops > CEPH_OSD_SLAB_OPS) {
/* ceph_osd_op and rval */
msg_size += (num_ops - CEPH_OSD_SLAB_OPS) *
(sizeof(struct ceph_osd_op) + 4);
}
/* create reply message */
if (use_mempool)
msg = ceph_msgpool_get(&osdc->msgpool_op_reply, 0);
else
msg = ceph_msg_new(CEPH_MSG_OSD_OPREPLY,
OSD_OPREPLY_FRONT_LEN, gfp_flags, true);
msg = ceph_msg_new(CEPH_MSG_OSD_OPREPLY, msg_size,
gfp_flags, true);
if (!msg) {
ceph_osdc_put_request(req);
return NULL;
}
req->r_reply = msg;
msg_size = 4 + 4 + 4; /* client_inc, osdmap_epoch, flags */
msg_size += 4 + 4 + 4 + 8; /* mtime, reassert_version */
msg_size += 2 + 4 + 8 + 4 + 4; /* oloc */
msg_size += 1 + 8 + 4 + 4; /* pgid */
msg_size += 4 + CEPH_MAX_OID_NAME_LEN; /* oid */
msg_size += 2 + num_ops * sizeof(struct ceph_osd_op);
msg_size += 8; /* snapid */
msg_size += 8; /* snap_seq */
msg_size += 4 + 8 * (snapc ? snapc->num_snaps : 0); /* snaps */
msg_size += 4; /* retry_attempt */
/* create request message; allow space for oid */
if (use_mempool)
msg = ceph_msgpool_get(&osdc->msgpool_op, 0);
@@ -498,7 +511,7 @@ void osd_req_op_extent_init(struct ceph_osd_request *osd_req,
if (opcode == CEPH_OSD_OP_WRITE || opcode == CEPH_OSD_OP_WRITEFULL)
payload_len += length;
op->payload_len = payload_len;
op->indata_len = payload_len;
}
EXPORT_SYMBOL(osd_req_op_extent_init);
@@ -517,10 +530,32 @@ void osd_req_op_extent_update(struct ceph_osd_request *osd_req,
BUG_ON(length > previous);
op->extent.length = length;
op->payload_len -= previous - length;
op->indata_len -= previous - length;
}
EXPORT_SYMBOL(osd_req_op_extent_update);
void osd_req_op_extent_dup_last(struct ceph_osd_request *osd_req,
unsigned int which, u64 offset_inc)
{
struct ceph_osd_req_op *op, *prev_op;
BUG_ON(which + 1 >= osd_req->r_num_ops);
prev_op = &osd_req->r_ops[which];
op = _osd_req_op_init(osd_req, which + 1, prev_op->op, prev_op->flags);
/* dup previous one */
op->indata_len = prev_op->indata_len;
op->outdata_len = prev_op->outdata_len;
op->extent = prev_op->extent;
/* adjust offset */
op->extent.offset += offset_inc;
op->extent.length -= offset_inc;
if (op->op == CEPH_OSD_OP_WRITE || op->op == CEPH_OSD_OP_WRITEFULL)
op->indata_len -= offset_inc;
}
EXPORT_SYMBOL(osd_req_op_extent_dup_last);
void osd_req_op_cls_init(struct ceph_osd_request *osd_req, unsigned int which,
u16 opcode, const char *class, const char *method)
{
@@ -554,7 +589,7 @@ void osd_req_op_cls_init(struct ceph_osd_request *osd_req, unsigned int which,
op->cls.argc = 0; /* currently unused */
op->payload_len = payload_len;
op->indata_len = payload_len;
}
EXPORT_SYMBOL(osd_req_op_cls_init);
@@ -587,7 +622,7 @@ int osd_req_op_xattr_init(struct ceph_osd_request *osd_req, unsigned int which,
op->xattr.cmp_mode = cmp_mode;
ceph_osd_data_pagelist_init(&op->xattr.osd_data, pagelist);
op->payload_len = payload_len;
op->indata_len = payload_len;
return 0;
}
EXPORT_SYMBOL(osd_req_op_xattr_init);
@@ -707,7 +742,7 @@ static u64 osd_req_encode_op(struct ceph_osd_request *req,
BUG_ON(osd_data->type == CEPH_OSD_DATA_TYPE_NONE);
dst->cls.indata_len = cpu_to_le32(data_length);
ceph_osdc_msg_data_add(req->r_request, osd_data);
src->payload_len += data_length;
src->indata_len += data_length;
request_data_len += data_length;
}
osd_data = &src->cls.response_data;
@@ -750,7 +785,7 @@ static u64 osd_req_encode_op(struct ceph_osd_request *req,
dst->op = cpu_to_le16(src->op);
dst->flags = cpu_to_le32(src->flags);
dst->payload_len = cpu_to_le32(src->payload_len);
dst->payload_len = cpu_to_le32(src->indata_len);
return request_data_len;
}
@@ -1810,7 +1845,7 @@ static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg)
ceph_decode_need(&p, end, 4, bad_put);
numops = ceph_decode_32(&p);
if (numops > CEPH_OSD_MAX_OP)
if (numops > CEPH_OSD_MAX_OPS)
goto bad_put;
if (numops != req->r_num_ops)
goto bad_put;
@@ -1821,7 +1856,7 @@ static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg)
int len;
len = le32_to_cpu(op->payload_len);
req->r_reply_op_len[i] = len;
req->r_ops[i].outdata_len = len;
dout(" op %d has %d bytes\n", i, len);
payload_len += len;
p += sizeof(*op);
@@ -1836,7 +1871,7 @@ static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg)
ceph_decode_need(&p, end, 4 + numops * 4, bad_put);
retry_attempt = ceph_decode_32(&p);
for (i = 0; i < numops; i++)
req->r_reply_op_result[i] = ceph_decode_32(&p);
req->r_ops[i].rval = ceph_decode_32(&p);
if (le16_to_cpu(msg->hdr.version) >= 6) {
p += 8 + 4; /* skip replay_version */
@@ -2187,7 +2222,8 @@ void ceph_osdc_handle_map(struct ceph_osd_client *osdc, struct ceph_msg *msg)
goto bad;
done:
downgrade_write(&osdc->map_sem);
ceph_monc_got_osdmap(&osdc->client->monc, osdc->osdmap->epoch);
ceph_monc_got_map(&osdc->client->monc, CEPH_SUB_OSDMAP,
osdc->osdmap->epoch);
/*
* subscribe to subsequent osdmap updates if full to ensure
@@ -2646,8 +2682,8 @@ int ceph_osdc_init(struct ceph_osd_client *osdc, struct ceph_client *client)
round_jiffies_relative(osdc->client->options->osd_idle_ttl));
err = -ENOMEM;
osdc->req_mempool = mempool_create_kmalloc_pool(10,
sizeof(struct ceph_osd_request));
osdc->req_mempool = mempool_create_slab_pool(10,
ceph_osd_request_cache);
if (!osdc->req_mempool)
goto out;
@@ -2782,11 +2818,12 @@ EXPORT_SYMBOL(ceph_osdc_writepages);
int ceph_osdc_setup(void)
{
size_t size = sizeof(struct ceph_osd_request) +
CEPH_OSD_SLAB_OPS * sizeof(struct ceph_osd_req_op);
BUG_ON(ceph_osd_request_cache);
ceph_osd_request_cache = kmem_cache_create("ceph_osd_request",
sizeof (struct ceph_osd_request),
__alignof__(struct ceph_osd_request),
0, NULL);
ceph_osd_request_cache = kmem_cache_create("ceph_osd_request", size,
0, 0, NULL);
return ceph_osd_request_cache ? 0 : -ENOMEM;
}