Merge branch 'pnfs-submit' of git://git.open-osd.org/linux-open-osd

* 'pnfs-submit' of git://git.open-osd.org/linux-open-osd: (32 commits)
  pnfs-obj: pg_test check for max_io_size
  NFSv4.1: define nfs_generic_pg_test
  NFSv4.1: use pnfs_generic_pg_test directly by layout driver
  NFSv4.1: change pg_test return type to bool
  NFSv4.1: unify pnfs_pageio_init functions
  pnfs-obj: objlayout_encode_layoutcommit implementation
  pnfs: encode_layoutcommit
  pnfs-obj: report errors and .encode_layoutreturn Implementation.
  pnfs: encode_layoutreturn
  pnfs: layoutret_on_setattr
  pnfs: layoutreturn
  pnfs-obj: osd raid engine read/write implementation
  pnfs: support for non-rpc layout drivers
  pnfs-obj: define per-inode private structure
  pnfs: alloc and free layout_hdr layoutdriver methods
  pnfs-obj: objio_osd device information retrieval and caching
  pnfs-obj: decode layout, alloc/free lseg
  pnfs-obj: pnfs_osd XDR client implementation
  pnfs-obj: pnfs_osd XDR definitions
  pnfs-obj: objlayoutdriver module skeleton
  ...
This commit is contained in:
Linus Torvalds
2011-05-29 14:10:13 -07:00
32 changed files with 3910 additions and 282 deletions
+10
View File
@@ -87,6 +87,16 @@ config NFS_V4_1
config PNFS_FILE_LAYOUT
tristate
config PNFS_OBJLAYOUT
tristate "Provide support for the pNFS Objects Layout Driver for NFSv4.1 pNFS (EXPERIMENTAL)"
depends on NFS_FS && NFS_V4_1 && SCSI_OSD_ULD
help
Say M here if you want your pNFS client to support the Objects Layout Driver.
Requires the SCSI osd initiator library (SCSI_OSD_INITIATOR) and
upper level driver (SCSI_OSD_ULD).
If unsure, say N.
config ROOT_NFS
bool "Root file system on NFS"
depends on NFS_FS=y && IP_PNP
+3 -1
View File
@@ -15,9 +15,11 @@ nfs-$(CONFIG_NFS_V4) += nfs4proc.o nfs4xdr.o nfs4state.o nfs4renewd.o \
delegation.o idmap.o \
callback.o callback_xdr.o callback_proc.o \
nfs4namespace.o
nfs-$(CONFIG_NFS_V4_1) += pnfs.o
nfs-$(CONFIG_NFS_V4_1) += pnfs.o pnfs_dev.o
nfs-$(CONFIG_SYSCTL) += sysctl.o
nfs-$(CONFIG_NFS_FSCACHE) += fscache.o fscache-index.o
obj-$(CONFIG_PNFS_FILE_LAYOUT) += nfs_layout_nfsv41_files.o
nfs_layout_nfsv41_files-y := nfs4filelayout.o nfs4filelayoutdev.o
obj-$(CONFIG_PNFS_OBJLAYOUT) += objlayout/
+17
View File
@@ -167,6 +167,23 @@ extern unsigned nfs4_callback_layoutrecall(
extern void nfs4_check_drain_bc_complete(struct nfs4_session *ses);
extern void nfs4_cb_take_slot(struct nfs_client *clp);
struct cb_devicenotifyitem {
uint32_t cbd_notify_type;
uint32_t cbd_layout_type;
struct nfs4_deviceid cbd_dev_id;
uint32_t cbd_immediate;
};
struct cb_devicenotifyargs {
int ndevs;
struct cb_devicenotifyitem *devs;
};
extern __be32 nfs4_callback_devicenotify(
struct cb_devicenotifyargs *args,
void *dummy, struct cb_process_state *cps);
#endif /* CONFIG_NFS_V4_1 */
extern int check_gss_callback_principal(struct nfs_client *, struct svc_rqst *);
extern __be32 nfs4_callback_getattr(struct cb_getattrargs *args,
+49 -2
View File
@@ -139,7 +139,7 @@ static u32 initiate_file_draining(struct nfs_client *clp,
spin_lock(&ino->i_lock);
if (test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags) ||
mark_matching_lsegs_invalid(lo, &free_me_list,
args->cbl_range.iomode))
&args->cbl_range))
rv = NFS4ERR_DELAY;
else
rv = NFS4ERR_NOMATCHING_LAYOUT;
@@ -184,7 +184,7 @@ static u32 initiate_bulk_draining(struct nfs_client *clp,
ino = lo->plh_inode;
spin_lock(&ino->i_lock);
set_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags);
if (mark_matching_lsegs_invalid(lo, &free_me_list, range.iomode))
if (mark_matching_lsegs_invalid(lo, &free_me_list, &range))
rv = NFS4ERR_DELAY;
list_del_init(&lo->plh_bulk_recall);
spin_unlock(&ino->i_lock);
@@ -241,6 +241,53 @@ static void pnfs_recall_all_layouts(struct nfs_client *clp)
do_callback_layoutrecall(clp, &args);
}
__be32 nfs4_callback_devicenotify(struct cb_devicenotifyargs *args,
void *dummy, struct cb_process_state *cps)
{
int i;
__be32 res = 0;
struct nfs_client *clp = cps->clp;
struct nfs_server *server = NULL;
dprintk("%s: -->\n", __func__);
if (!clp) {
res = cpu_to_be32(NFS4ERR_OP_NOT_IN_SESSION);
goto out;
}
for (i = 0; i < args->ndevs; i++) {
struct cb_devicenotifyitem *dev = &args->devs[i];
if (!server ||
server->pnfs_curr_ld->id != dev->cbd_layout_type) {
rcu_read_lock();
list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link)
if (server->pnfs_curr_ld &&
server->pnfs_curr_ld->id == dev->cbd_layout_type) {
rcu_read_unlock();
goto found;
}
rcu_read_unlock();
dprintk("%s: layout type %u not found\n",
__func__, dev->cbd_layout_type);
continue;
}
found:
if (dev->cbd_notify_type == NOTIFY_DEVICEID4_CHANGE)
dprintk("%s: NOTIFY_DEVICEID4_CHANGE not supported, "
"deleting instead\n", __func__);
nfs4_delete_deviceid(server->pnfs_curr_ld, clp, &dev->cbd_dev_id);
}
out:
kfree(args->devs);
dprintk("%s: exit with status = %u\n",
__func__, be32_to_cpu(res));
return res;
}
int nfs41_validate_delegation_stateid(struct nfs_delegation *delegation, const nfs4_stateid *stateid)
{
if (delegation == NULL)
+95 -1
View File
@@ -25,6 +25,7 @@
#if defined(CONFIG_NFS_V4_1)
#define CB_OP_LAYOUTRECALL_RES_MAXSZ (CB_OP_HDR_RES_MAXSZ)
#define CB_OP_DEVICENOTIFY_RES_MAXSZ (CB_OP_HDR_RES_MAXSZ)
#define CB_OP_SEQUENCE_RES_MAXSZ (CB_OP_HDR_RES_MAXSZ + \
4 + 1 + 3)
#define CB_OP_RECALLANY_RES_MAXSZ (CB_OP_HDR_RES_MAXSZ)
@@ -284,6 +285,93 @@ out:
return status;
}
static
__be32 decode_devicenotify_args(struct svc_rqst *rqstp,
struct xdr_stream *xdr,
struct cb_devicenotifyargs *args)
{
__be32 *p;
__be32 status = 0;
u32 tmp;
int n, i;
args->ndevs = 0;
/* Num of device notifications */
p = read_buf(xdr, sizeof(uint32_t));
if (unlikely(p == NULL)) {
status = htonl(NFS4ERR_BADXDR);
goto out;
}
n = ntohl(*p++);
if (n <= 0)
goto out;
args->devs = kmalloc(n * sizeof(*args->devs), GFP_KERNEL);
if (!args->devs) {
status = htonl(NFS4ERR_DELAY);
goto out;
}
/* Decode each dev notification */
for (i = 0; i < n; i++) {
struct cb_devicenotifyitem *dev = &args->devs[i];
p = read_buf(xdr, (4 * sizeof(uint32_t)) + NFS4_DEVICEID4_SIZE);
if (unlikely(p == NULL)) {
status = htonl(NFS4ERR_BADXDR);
goto err;
}
tmp = ntohl(*p++); /* bitmap size */
if (tmp != 1) {
status = htonl(NFS4ERR_INVAL);
goto err;
}
dev->cbd_notify_type = ntohl(*p++);
if (dev->cbd_notify_type != NOTIFY_DEVICEID4_CHANGE &&
dev->cbd_notify_type != NOTIFY_DEVICEID4_DELETE) {
status = htonl(NFS4ERR_INVAL);
goto err;
}
tmp = ntohl(*p++); /* opaque size */
if (((dev->cbd_notify_type == NOTIFY_DEVICEID4_CHANGE) &&
(tmp != NFS4_DEVICEID4_SIZE + 8)) ||
((dev->cbd_notify_type == NOTIFY_DEVICEID4_DELETE) &&
(tmp != NFS4_DEVICEID4_SIZE + 4))) {
status = htonl(NFS4ERR_INVAL);
goto err;
}
dev->cbd_layout_type = ntohl(*p++);
memcpy(dev->cbd_dev_id.data, p, NFS4_DEVICEID4_SIZE);
p += XDR_QUADLEN(NFS4_DEVICEID4_SIZE);
if (dev->cbd_layout_type == NOTIFY_DEVICEID4_CHANGE) {
p = read_buf(xdr, sizeof(uint32_t));
if (unlikely(p == NULL)) {
status = htonl(NFS4ERR_BADXDR);
goto err;
}
dev->cbd_immediate = ntohl(*p++);
} else {
dev->cbd_immediate = 0;
}
args->ndevs++;
dprintk("%s: type %d layout 0x%x immediate %d\n",
__func__, dev->cbd_notify_type, dev->cbd_layout_type,
dev->cbd_immediate);
}
out:
dprintk("%s: status %d ndevs %d\n",
__func__, ntohl(status), args->ndevs);
return status;
err:
kfree(args->devs);
goto out;
}
static __be32 decode_sessionid(struct xdr_stream *xdr,
struct nfs4_sessionid *sid)
{
@@ -639,10 +727,10 @@ preprocess_nfs41_op(int nop, unsigned int op_nr, struct callback_op **op)
case OP_CB_RECALL_ANY:
case OP_CB_RECALL_SLOT:
case OP_CB_LAYOUTRECALL:
case OP_CB_NOTIFY_DEVICEID:
*op = &callback_ops[op_nr];
break;
case OP_CB_NOTIFY_DEVICEID:
case OP_CB_NOTIFY:
case OP_CB_PUSH_DELEG:
case OP_CB_RECALLABLE_OBJ_AVAIL:
@@ -849,6 +937,12 @@ static struct callback_op callback_ops[] = {
(callback_decode_arg_t)decode_layoutrecall_args,
.res_maxsize = CB_OP_LAYOUTRECALL_RES_MAXSZ,
},
[OP_CB_NOTIFY_DEVICEID] = {
.process_op = (callback_process_op_t)nfs4_callback_devicenotify,
.decode_args =
(callback_decode_arg_t)decode_devicenotify_args,
.res_maxsize = CB_OP_DEVICENOTIFY_RES_MAXSZ,
},
[OP_CB_SEQUENCE] = {
.process_op = (callback_process_op_t)nfs4_callback_sequence,
.decode_args = (callback_decode_arg_t)decode_cb_sequence_args,
+2
View File
@@ -290,6 +290,8 @@ static void nfs_free_client(struct nfs_client *clp)
if (clp->cl_machine_cred != NULL)
put_rpccred(clp->cl_machine_cred);
nfs4_deviceid_purge_client(clp);
kfree(clp->cl_hostname);
kfree(clp);
+2 -7
View File
@@ -512,12 +512,7 @@ int nfs_readdir_page_filler(nfs_readdir_descriptor_t *desc, struct nfs_entry *en
struct page **xdr_pages, struct page *page, unsigned int buflen)
{
struct xdr_stream stream;
struct xdr_buf buf = {
.pages = xdr_pages,
.page_len = buflen,
.buflen = buflen,
.len = buflen,
};
struct xdr_buf buf;
struct page *scratch;
struct nfs_cache_array *array;
unsigned int count = 0;
@@ -527,7 +522,7 @@ int nfs_readdir_page_filler(nfs_readdir_descriptor_t *desc, struct nfs_entry *en
if (scratch == NULL)
return -ENOMEM;
xdr_init_decode(&stream, &buf, NULL);
xdr_init_decode_pages(&stream, &buf, xdr_pages, buflen);
xdr_set_scratch_buffer(&stream, page_address(scratch), PAGE_SIZE);
do {
+2 -1
View File
@@ -1428,9 +1428,10 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
*/
void nfs4_evict_inode(struct inode *inode)
{
pnfs_destroy_layout(NFS_I(inode));
truncate_inode_pages(&inode->i_data, 0);
end_writeback(inode);
pnfs_return_layout(inode);
pnfs_destroy_layout(NFS_I(inode));
/* If we are holding a delegation, return it! */
nfs_inode_return_delegation_noreclaim(inode);
/* First call standard NFS clear_inode() code */
+1
View File
@@ -310,6 +310,7 @@ extern int nfs_migrate_page(struct address_space *,
#endif
/* nfs4proc.c */
extern void __nfs4_read_done_cb(struct nfs_read_data *);
extern void nfs4_reset_read(struct rpc_task *task, struct nfs_read_data *data);
extern int nfs4_init_client(struct nfs_client *clp,
const struct rpc_timeout *timeparms,
+23 -15
View File
@@ -421,6 +421,7 @@ filelayout_check_layout(struct pnfs_layout_hdr *lo,
struct nfs4_deviceid *id,
gfp_t gfp_flags)
{
struct nfs4_deviceid_node *d;
struct nfs4_file_layout_dsaddr *dsaddr;
int status = -EINVAL;
struct nfs_server *nfss = NFS_SERVER(lo->plh_inode);
@@ -428,7 +429,7 @@ filelayout_check_layout(struct pnfs_layout_hdr *lo,
dprintk("--> %s\n", __func__);
if (fl->pattern_offset > lgr->range.offset) {
dprintk("%s pattern_offset %lld to large\n",
dprintk("%s pattern_offset %lld too large\n",
__func__, fl->pattern_offset);
goto out;
}
@@ -440,12 +441,14 @@ filelayout_check_layout(struct pnfs_layout_hdr *lo,
}
/* find and reference the deviceid */
dsaddr = nfs4_fl_find_get_deviceid(id);
if (dsaddr == NULL) {
d = nfs4_find_get_deviceid(NFS_SERVER(lo->plh_inode)->pnfs_curr_ld,
NFS_SERVER(lo->plh_inode)->nfs_client, id);
if (d == NULL) {
dsaddr = get_device_info(lo->plh_inode, id, gfp_flags);
if (dsaddr == NULL)
goto out;
}
} else
dsaddr = container_of(d, struct nfs4_file_layout_dsaddr, id_node);
fl->dsaddr = dsaddr;
if (fl->first_stripe_index < 0 ||
@@ -507,12 +510,7 @@ filelayout_decode_layout(struct pnfs_layout_hdr *flo,
gfp_t gfp_flags)
{
struct xdr_stream stream;
struct xdr_buf buf = {
.pages = lgr->layoutp->pages,
.page_len = lgr->layoutp->len,
.buflen = lgr->layoutp->len,
.len = lgr->layoutp->len,
};
struct xdr_buf buf;
struct page *scratch;
__be32 *p;
uint32_t nfl_util;
@@ -524,7 +522,7 @@ filelayout_decode_layout(struct pnfs_layout_hdr *flo,
if (!scratch)
return -ENOMEM;
xdr_init_decode(&stream, &buf, NULL);
xdr_init_decode_pages(&stream, &buf, lgr->layoutp->pages, lgr->layoutp->len);
xdr_set_scratch_buffer(&stream, page_address(scratch), PAGE_SIZE);
/* 20 = ufl_util (4), first_stripe_index (4), pattern_offset (8),
@@ -535,7 +533,7 @@ filelayout_decode_layout(struct pnfs_layout_hdr *flo,
memcpy(id, p, sizeof(*id));
p += XDR_QUADLEN(NFS4_DEVICEID4_SIZE);
print_deviceid(id);
nfs4_print_deviceid(id);
nfl_util = be32_to_cpup(p++);
if (nfl_util & NFL4_UFLG_COMMIT_THRU_MDS)
@@ -653,16 +651,19 @@ filelayout_alloc_lseg(struct pnfs_layout_hdr *layoutid,
/*
* filelayout_pg_test(). Called by nfs_can_coalesce_requests()
*
* return 1 : coalesce page
* return 0 : don't coalesce page
* return true : coalesce page
* return false : don't coalesce page
*/
int
bool
filelayout_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
struct nfs_page *req)
{
u64 p_stripe, r_stripe;
u32 stripe_unit;
if (!pnfs_generic_pg_test(pgio, prev, req))
return 0;
if (!pgio->pg_lseg)
return 1;
p_stripe = (u64)prev->wb_index << PAGE_CACHE_SHIFT;
@@ -860,6 +861,12 @@ filelayout_commit_pagelist(struct inode *inode, struct list_head *mds_pages,
return -ENOMEM;
}
static void
filelayout_free_deveiceid_node(struct nfs4_deviceid_node *d)
{
nfs4_fl_free_deviceid(container_of(d, struct nfs4_file_layout_dsaddr, id_node));
}
static struct pnfs_layoutdriver_type filelayout_type = {
.id = LAYOUT_NFSV4_1_FILES,
.name = "LAYOUT_NFSV4_1_FILES",
@@ -872,6 +879,7 @@ static struct pnfs_layoutdriver_type filelayout_type = {
.commit_pagelist = filelayout_commit_pagelist,
.read_pagelist = filelayout_read_pagelist,
.write_pagelist = filelayout_write_pagelist,
.free_deviceid_node = filelayout_free_deveiceid_node,
};
static int __init nfs4filelayout_init(void)
+2 -6
View File
@@ -59,9 +59,7 @@ struct nfs4_pnfs_ds {
#define NFS4_DEVICE_ID_NEG_ENTRY 0x00000001
struct nfs4_file_layout_dsaddr {
struct hlist_node node;
struct nfs4_deviceid deviceid;
atomic_t ref;
struct nfs4_deviceid_node id_node;
unsigned long flags;
u32 stripe_count;
u8 *stripe_indices;
@@ -95,14 +93,12 @@ extern struct nfs_fh *
nfs4_fl_select_ds_fh(struct pnfs_layout_segment *lseg, u32 j);
extern void print_ds(struct nfs4_pnfs_ds *ds);
extern void print_deviceid(struct nfs4_deviceid *dev_id);
u32 nfs4_fl_calc_j_index(struct pnfs_layout_segment *lseg, loff_t offset);
u32 nfs4_fl_calc_ds_index(struct pnfs_layout_segment *lseg, u32 j);
struct nfs4_pnfs_ds *nfs4_fl_prepare_ds(struct pnfs_layout_segment *lseg,
u32 ds_idx);
extern struct nfs4_file_layout_dsaddr *
nfs4_fl_find_get_deviceid(struct nfs4_deviceid *dev_id);
extern void nfs4_fl_put_deviceid(struct nfs4_file_layout_dsaddr *dsaddr);
extern void nfs4_fl_free_deviceid(struct nfs4_file_layout_dsaddr *dsaddr);
struct nfs4_file_layout_dsaddr *
get_device_info(struct inode *inode, struct nfs4_deviceid *dev_id, gfp_t gfp_flags);
+18 -101
View File
@@ -36,30 +36,6 @@
#define NFSDBG_FACILITY NFSDBG_PNFS_LD
/*
* Device ID RCU cache. A device ID is unique per client ID and layout type.
*/
#define NFS4_FL_DEVICE_ID_HASH_BITS 5
#define NFS4_FL_DEVICE_ID_HASH_SIZE (1 << NFS4_FL_DEVICE_ID_HASH_BITS)
#define NFS4_FL_DEVICE_ID_HASH_MASK (NFS4_FL_DEVICE_ID_HASH_SIZE - 1)
static inline u32
nfs4_fl_deviceid_hash(struct nfs4_deviceid *id)
{
unsigned char *cptr = (unsigned char *)id->data;
unsigned int nbytes = NFS4_DEVICEID4_SIZE;
u32 x = 0;
while (nbytes--) {
x *= 37;
x += *cptr++;
}
return x & NFS4_FL_DEVICE_ID_HASH_MASK;
}
static struct hlist_head filelayout_deviceid_cache[NFS4_FL_DEVICE_ID_HASH_SIZE];
static DEFINE_SPINLOCK(filelayout_deviceid_lock);
/*
* Data server cache
*
@@ -89,27 +65,6 @@ print_ds(struct nfs4_pnfs_ds *ds)
ds->ds_clp ? ds->ds_clp->cl_exchange_flags : 0);
}
void
print_ds_list(struct nfs4_file_layout_dsaddr *dsaddr)
{
int i;
ifdebug(FACILITY) {
printk("%s dsaddr->ds_num %d\n", __func__,
dsaddr->ds_num);
for (i = 0; i < dsaddr->ds_num; i++)
print_ds(dsaddr->ds_list[i]);
}
}
void print_deviceid(struct nfs4_deviceid *id)
{
u32 *p = (u32 *)id;
dprintk("%s: device id= [%x%x%x%x]\n", __func__,
p[0], p[1], p[2], p[3]);
}
/* nfs4_ds_cache_lock is held */
static struct nfs4_pnfs_ds *
_data_server_lookup_locked(u32 ip_addr, u32 port)
@@ -201,13 +156,13 @@ destroy_ds(struct nfs4_pnfs_ds *ds)
kfree(ds);
}
static void
void
nfs4_fl_free_deviceid(struct nfs4_file_layout_dsaddr *dsaddr)
{
struct nfs4_pnfs_ds *ds;
int i;
print_deviceid(&dsaddr->deviceid);
nfs4_print_deviceid(&dsaddr->id_node.deviceid);
for (i = 0; i < dsaddr->ds_num; i++) {
ds = dsaddr->ds_list[i];
@@ -353,12 +308,7 @@ decode_device(struct inode *ino, struct pnfs_device *pdev, gfp_t gfp_flags)
u8 max_stripe_index;
struct nfs4_file_layout_dsaddr *dsaddr = NULL;
struct xdr_stream stream;
struct xdr_buf buf = {
.pages = pdev->pages,
.page_len = pdev->pglen,
.buflen = pdev->pglen,
.len = pdev->pglen,
};
struct xdr_buf buf;
struct page *scratch;
/* set up xdr stream */
@@ -366,7 +316,7 @@ decode_device(struct inode *ino, struct pnfs_device *pdev, gfp_t gfp_flags)
if (!scratch)
goto out_err;
xdr_init_decode(&stream, &buf, NULL);
xdr_init_decode_pages(&stream, &buf, pdev->pages, pdev->pglen);
xdr_set_scratch_buffer(&stream, page_address(scratch), PAGE_SIZE);
/* Get the stripe count (number of stripe index) */
@@ -431,8 +381,10 @@ decode_device(struct inode *ino, struct pnfs_device *pdev, gfp_t gfp_flags)
dsaddr->stripe_indices = stripe_indices;
stripe_indices = NULL;
dsaddr->ds_num = num;
memcpy(&dsaddr->deviceid, &pdev->dev_id, sizeof(pdev->dev_id));
nfs4_init_deviceid_node(&dsaddr->id_node,
NFS_SERVER(ino)->pnfs_curr_ld,
NFS_SERVER(ino)->nfs_client,
&pdev->dev_id);
for (i = 0; i < dsaddr->ds_num; i++) {
int j;
@@ -505,8 +457,8 @@ out_err:
static struct nfs4_file_layout_dsaddr *
decode_and_add_device(struct inode *inode, struct pnfs_device *dev, gfp_t gfp_flags)
{
struct nfs4_file_layout_dsaddr *d, *new;
long hash;
struct nfs4_deviceid_node *d;
struct nfs4_file_layout_dsaddr *n, *new;
new = decode_device(inode, dev, gfp_flags);
if (!new) {
@@ -515,20 +467,13 @@ decode_and_add_device(struct inode *inode, struct pnfs_device *dev, gfp_t gfp_fl
return NULL;
}
spin_lock(&filelayout_deviceid_lock);
d = nfs4_fl_find_get_deviceid(&new->deviceid);
if (d) {
spin_unlock(&filelayout_deviceid_lock);
d = nfs4_insert_deviceid_node(&new->id_node);
n = container_of(d, struct nfs4_file_layout_dsaddr, id_node);
if (n != new) {
nfs4_fl_free_deviceid(new);
return d;
return n;
}
INIT_HLIST_NODE(&new->node);
atomic_set(&new->ref, 1);
hash = nfs4_fl_deviceid_hash(&new->deviceid);
hlist_add_head_rcu(&new->node, &filelayout_deviceid_cache[hash]);
spin_unlock(&filelayout_deviceid_lock);
return new;
}
@@ -600,35 +545,7 @@ out_free:
void
nfs4_fl_put_deviceid(struct nfs4_file_layout_dsaddr *dsaddr)
{
if (atomic_dec_and_lock(&dsaddr->ref, &filelayout_deviceid_lock)) {
hlist_del_rcu(&dsaddr->node);
spin_unlock(&filelayout_deviceid_lock);
synchronize_rcu();
nfs4_fl_free_deviceid(dsaddr);
}
}
struct nfs4_file_layout_dsaddr *
nfs4_fl_find_get_deviceid(struct nfs4_deviceid *id)
{
struct nfs4_file_layout_dsaddr *d;
struct hlist_node *n;
long hash = nfs4_fl_deviceid_hash(id);
rcu_read_lock();
hlist_for_each_entry_rcu(d, n, &filelayout_deviceid_cache[hash], node) {
if (!memcmp(&d->deviceid, id, sizeof(*id))) {
if (!atomic_inc_not_zero(&d->ref))
goto fail;
rcu_read_unlock();
return d;
}
}
fail:
rcu_read_unlock();
return NULL;
nfs4_put_deviceid_node(&dsaddr->id_node);
}
/*
@@ -676,15 +593,15 @@ static void
filelayout_mark_devid_negative(struct nfs4_file_layout_dsaddr *dsaddr,
int err, u32 ds_addr)
{
u32 *p = (u32 *)&dsaddr->deviceid;
u32 *p = (u32 *)&dsaddr->id_node.deviceid;
printk(KERN_ERR "NFS: data server %x connection error %d."
" Deviceid [%x%x%x%x] marked out of use.\n",
ds_addr, err, p[0], p[1], p[2], p[3]);
spin_lock(&filelayout_deviceid_lock);
spin_lock(&nfs4_ds_cache_lock);
dsaddr->flags |= NFS4_DEVICE_ID_NEG_ENTRY;
spin_unlock(&filelayout_deviceid_lock);
spin_unlock(&nfs4_ds_cache_lock);
}
struct nfs4_pnfs_ds *
+95 -3
View File
@@ -2363,6 +2363,9 @@ nfs4_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
struct nfs4_state *state = NULL;
int status;
if (pnfs_ld_layoutret_on_setattr(inode))
pnfs_return_layout(inode);
nfs_fattr_init(fattr);
/* Search for an existing open(O_WRITE) file */
@@ -3177,6 +3180,11 @@ static int nfs4_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle,
return err;
}
void __nfs4_read_done_cb(struct nfs_read_data *data)
{
nfs_invalidate_atime(data->inode);
}
static int nfs4_read_done_cb(struct rpc_task *task, struct nfs_read_data *data)
{
struct nfs_server *server = NFS_SERVER(data->inode);
@@ -3186,7 +3194,7 @@ static int nfs4_read_done_cb(struct rpc_task *task, struct nfs_read_data *data)
return -EAGAIN;
}
nfs_invalidate_atime(data->inode);
__nfs4_read_done_cb(data);
if (task->tk_status > 0)
renew_lease(server, data->timestamp);
return 0;
@@ -3200,7 +3208,8 @@ static int nfs4_read_done(struct rpc_task *task, struct nfs_read_data *data)
if (!nfs4_sequence_done(task, &data->res.seq_res))
return -EAGAIN;
return data->read_done_cb(task, data);
return data->read_done_cb ? data->read_done_cb(task, data) :
nfs4_read_done_cb(task, data);
}
static void nfs4_proc_read_setup(struct nfs_read_data *data, struct rpc_message *msg)
@@ -3245,7 +3254,8 @@ static int nfs4_write_done(struct rpc_task *task, struct nfs_write_data *data)
{
if (!nfs4_sequence_done(task, &data->res.seq_res))
return -EAGAIN;
return data->write_done_cb(task, data);
return data->write_done_cb ? data->write_done_cb(task, data) :
nfs4_write_done_cb(task, data);
}
/* Reset the the nfs_write_data to send the write to the MDS. */
@@ -5671,6 +5681,88 @@ int nfs4_proc_layoutget(struct nfs4_layoutget *lgp)
return status;
}
static void
nfs4_layoutreturn_prepare(struct rpc_task *task, void *calldata)
{
struct nfs4_layoutreturn *lrp = calldata;
dprintk("--> %s\n", __func__);
if (nfs41_setup_sequence(lrp->clp->cl_session, &lrp->args.seq_args,
&lrp->res.seq_res, 0, task))
return;
rpc_call_start(task);
}
static void nfs4_layoutreturn_done(struct rpc_task *task, void *calldata)
{
struct nfs4_layoutreturn *lrp = calldata;
struct nfs_server *server;
dprintk("--> %s\n", __func__);
if (!nfs4_sequence_done(task, &lrp->res.seq_res))
return;
server = NFS_SERVER(lrp->args.inode);
if (nfs4_async_handle_error(task, server, NULL) == -EAGAIN) {
nfs_restart_rpc(task, lrp->clp);
return;
}
if (task->tk_status == 0) {
struct pnfs_layout_hdr *lo = NFS_I(lrp->args.inode)->layout;
if (lrp->res.lrs_present) {
spin_lock(&lo->plh_inode->i_lock);
pnfs_set_layout_stateid(lo, &lrp->res.stateid, true);
spin_unlock(&lo->plh_inode->i_lock);
} else
BUG_ON(!list_empty(&lo->plh_segs));
}
dprintk("<-- %s\n", __func__);
}
static void nfs4_layoutreturn_release(void *calldata)
{
struct nfs4_layoutreturn *lrp = calldata;
dprintk("--> %s\n", __func__);
put_layout_hdr(NFS_I(lrp->args.inode)->layout);
kfree(calldata);
dprintk("<-- %s\n", __func__);
}
static const struct rpc_call_ops nfs4_layoutreturn_call_ops = {
.rpc_call_prepare = nfs4_layoutreturn_prepare,
.rpc_call_done = nfs4_layoutreturn_done,
.rpc_release = nfs4_layoutreturn_release,
};
int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp)
{
struct rpc_task *task;
struct rpc_message msg = {
.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LAYOUTRETURN],
.rpc_argp = &lrp->args,
.rpc_resp = &lrp->res,
};
struct rpc_task_setup task_setup_data = {
.rpc_client = lrp->clp->cl_rpcclient,
.rpc_message = &msg,
.callback_ops = &nfs4_layoutreturn_call_ops,
.callback_data = lrp,
};
int status;
dprintk("--> %s\n", __func__);
task = rpc_run_task(&task_setup_data);
if (IS_ERR(task))
return PTR_ERR(task);
status = task->tk_status;
dprintk("<-- %s status=%d\n", __func__, status);
rpc_put_task(task);
return status;
}
static int
_nfs4_proc_getdeviceinfo(struct nfs_server *server, struct pnfs_device *pdev)
{
+124 -10
View File
@@ -338,7 +338,11 @@ static int nfs4_stat_to_errno(int);
1 /* layoutupdate4 layout type */ + \
1 /* NULL filelayout layoutupdate4 payload */)
#define decode_layoutcommit_maxsz (op_decode_hdr_maxsz + 3)
#define encode_layoutreturn_maxsz (8 + op_encode_hdr_maxsz + \
encode_stateid_maxsz + \
1 /* FIXME: opaque lrf_body always empty at the moment */)
#define decode_layoutreturn_maxsz (op_decode_hdr_maxsz + \
1 + decode_stateid_maxsz)
#else /* CONFIG_NFS_V4_1 */
#define encode_sequence_maxsz 0
#define decode_sequence_maxsz 0
@@ -760,7 +764,14 @@ static int nfs4_stat_to_errno(int);
decode_putfh_maxsz + \
decode_layoutcommit_maxsz + \
decode_getattr_maxsz)
#define NFS4_enc_layoutreturn_sz (compound_encode_hdr_maxsz + \
encode_sequence_maxsz + \
encode_putfh_maxsz + \
encode_layoutreturn_maxsz)
#define NFS4_dec_layoutreturn_sz (compound_decode_hdr_maxsz + \
decode_sequence_maxsz + \
decode_putfh_maxsz + \
decode_layoutreturn_maxsz)
const u32 nfs41_maxwrite_overhead = ((RPC_MAX_HEADER_WITH_AUTH +
compound_encode_hdr_maxsz +
@@ -1864,6 +1875,7 @@ encode_layoutget(struct xdr_stream *xdr,
static int
encode_layoutcommit(struct xdr_stream *xdr,
struct inode *inode,
const struct nfs4_layoutcommit_args *args,
struct compound_hdr *hdr)
{
@@ -1872,7 +1884,7 @@ encode_layoutcommit(struct xdr_stream *xdr,
dprintk("%s: lbw: %llu type: %d\n", __func__, args->lastbytewritten,
NFS_SERVER(args->inode)->pnfs_curr_ld->id);
p = reserve_space(xdr, 48 + NFS4_STATEID_SIZE);
p = reserve_space(xdr, 44 + NFS4_STATEID_SIZE);
*p++ = cpu_to_be32(OP_LAYOUTCOMMIT);
/* Only whole file layouts */
p = xdr_encode_hyper(p, 0); /* offset */
@@ -1883,12 +1895,49 @@ encode_layoutcommit(struct xdr_stream *xdr,
p = xdr_encode_hyper(p, args->lastbytewritten);
*p++ = cpu_to_be32(0); /* Never send time_modify_changed */
*p++ = cpu_to_be32(NFS_SERVER(args->inode)->pnfs_curr_ld->id);/* type */
*p++ = cpu_to_be32(0); /* no file layout payload */
if (NFS_SERVER(inode)->pnfs_curr_ld->encode_layoutcommit)
NFS_SERVER(inode)->pnfs_curr_ld->encode_layoutcommit(
NFS_I(inode)->layout, xdr, args);
else {
p = reserve_space(xdr, 4);
*p = cpu_to_be32(0); /* no layout-type payload */
}
hdr->nops++;
hdr->replen += decode_layoutcommit_maxsz;
return 0;
}
static void
encode_layoutreturn(struct xdr_stream *xdr,
const struct nfs4_layoutreturn_args *args,
struct compound_hdr *hdr)
{
__be32 *p;
p = reserve_space(xdr, 20);
*p++ = cpu_to_be32(OP_LAYOUTRETURN);
*p++ = cpu_to_be32(0); /* reclaim. always 0 for now */
*p++ = cpu_to_be32(args->layout_type);
*p++ = cpu_to_be32(IOMODE_ANY);
*p = cpu_to_be32(RETURN_FILE);
p = reserve_space(xdr, 16 + NFS4_STATEID_SIZE);
p = xdr_encode_hyper(p, 0);
p = xdr_encode_hyper(p, NFS4_MAX_UINT64);
spin_lock(&args->inode->i_lock);
xdr_encode_opaque_fixed(p, &args->stateid.data, NFS4_STATEID_SIZE);
spin_unlock(&args->inode->i_lock);
if (NFS_SERVER(args->inode)->pnfs_curr_ld->encode_layoutreturn) {
NFS_SERVER(args->inode)->pnfs_curr_ld->encode_layoutreturn(
NFS_I(args->inode)->layout, xdr, args);
} else {
p = reserve_space(xdr, 4);
*p = cpu_to_be32(0);
}
hdr->nops++;
hdr->replen += decode_layoutreturn_maxsz;
}
#endif /* CONFIG_NFS_V4_1 */
/*
@@ -2706,9 +2755,30 @@ static void nfs4_xdr_enc_layoutget(struct rpc_rqst *req,
/*
* Encode LAYOUTCOMMIT request
*/
static int nfs4_xdr_enc_layoutcommit(struct rpc_rqst *req,
struct xdr_stream *xdr,
struct nfs4_layoutcommit_args *args)
static void nfs4_xdr_enc_layoutcommit(struct rpc_rqst *req,
struct xdr_stream *xdr,
struct nfs4_layoutcommit_args *args)
{
struct nfs4_layoutcommit_data *data =
container_of(args, struct nfs4_layoutcommit_data, args);
struct compound_hdr hdr = {
.minorversion = nfs4_xdr_minorversion(&args->seq_args),
};
encode_compound_hdr(xdr, req, &hdr);
encode_sequence(xdr, &args->seq_args, &hdr);
encode_putfh(xdr, NFS_FH(args->inode), &hdr);
encode_layoutcommit(xdr, data->args.inode, args, &hdr);
encode_getfattr(xdr, args->bitmask, &hdr);
encode_nops(&hdr);
}
/*
* Encode LAYOUTRETURN request
*/
static void nfs4_xdr_enc_layoutreturn(struct rpc_rqst *req,
struct xdr_stream *xdr,
struct nfs4_layoutreturn_args *args)
{
struct compound_hdr hdr = {
.minorversion = nfs4_xdr_minorversion(&args->seq_args),
@@ -2717,10 +2787,8 @@ static int nfs4_xdr_enc_layoutcommit(struct rpc_rqst *req,
encode_compound_hdr(xdr, req, &hdr);
encode_sequence(xdr, &args->seq_args, &hdr);
encode_putfh(xdr, NFS_FH(args->inode), &hdr);
encode_layoutcommit(xdr, args, &hdr);
encode_getfattr(xdr, args->bitmask, &hdr);
encode_layoutreturn(xdr, args, &hdr);
encode_nops(&hdr);
return 0;
}
#endif /* CONFIG_NFS_V4_1 */
@@ -5203,6 +5271,27 @@ out_overflow:
return -EIO;
}
static int decode_layoutreturn(struct xdr_stream *xdr,
struct nfs4_layoutreturn_res *res)
{
__be32 *p;
int status;
status = decode_op_hdr(xdr, OP_LAYOUTRETURN);
if (status)
return status;
p = xdr_inline_decode(xdr, 4);
if (unlikely(!p))
goto out_overflow;
res->lrs_present = be32_to_cpup(p);
if (res->lrs_present)
status = decode_stateid(xdr, &res->stateid);
return status;
out_overflow:
print_overflow_msg(__func__, xdr);
return -EIO;
}
static int decode_layoutcommit(struct xdr_stream *xdr,
struct rpc_rqst *req,
struct nfs4_layoutcommit_res *res)
@@ -6319,6 +6408,30 @@ out:
return status;
}
/*
* Decode LAYOUTRETURN response
*/
static int nfs4_xdr_dec_layoutreturn(struct rpc_rqst *rqstp,
struct xdr_stream *xdr,
struct nfs4_layoutreturn_res *res)
{
struct compound_hdr hdr;
int status;
status = decode_compound_hdr(xdr, &hdr);
if (status)
goto out;
status = decode_sequence(xdr, &res->seq_res, rqstp);
if (status)
goto out;
status = decode_putfh(xdr);
if (status)
goto out;
status = decode_layoutreturn(xdr, res);
out:
return status;
}
/*
* Decode LAYOUTCOMMIT response
*/
@@ -6547,6 +6660,7 @@ struct rpc_procinfo nfs4_procedures[] = {
PROC(GETDEVICEINFO, enc_getdeviceinfo, dec_getdeviceinfo),
PROC(LAYOUTGET, enc_layoutget, dec_layoutget),
PROC(LAYOUTCOMMIT, enc_layoutcommit, dec_layoutcommit),
PROC(LAYOUTRETURN, enc_layoutreturn, dec_layoutreturn),
#endif /* CONFIG_NFS_V4_1 */
};
+5
View File
@@ -0,0 +1,5 @@
#
# Makefile for the pNFS Objects Layout Driver kernel module
#
objlayoutdriver-y := objio_osd.o pnfs_osd_xdr_cli.o objlayout.o
obj-$(CONFIG_PNFS_OBJLAYOUT) += objlayoutdriver.o
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
+187
View File
@@ -0,0 +1,187 @@
/*
* Data types and function declerations for interfacing with the
* pNFS standard object layout driver.
*
* Copyright (C) 2007 Panasas Inc. [year of first publication]
* All rights reserved.
*
* Benny Halevy <bhalevy@panasas.com>
* Boaz Harrosh <bharrosh@panasas.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2
* See the file COPYING included with this distribution for more details.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of the Panasas company nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef _OBJLAYOUT_H
#define _OBJLAYOUT_H
#include <linux/nfs_fs.h>
#include <linux/pnfs_osd_xdr.h>
#include "../pnfs.h"
/*
* per-inode layout
*/
struct objlayout {
struct pnfs_layout_hdr pnfs_layout;
/* for layout_commit */
enum osd_delta_space_valid_enum {
OBJ_DSU_INIT = 0,
OBJ_DSU_VALID,
OBJ_DSU_INVALID,
} delta_space_valid;
s64 delta_space_used; /* consumed by write ops */
/* for layout_return */
spinlock_t lock;
struct list_head err_list;
};
static inline struct objlayout *
OBJLAYOUT(struct pnfs_layout_hdr *lo)
{
return container_of(lo, struct objlayout, pnfs_layout);
}
/*
* per-I/O operation state
* embedded in objects provider io_state data structure
*/
struct objlayout_io_state {
struct pnfs_layout_segment *lseg;
struct page **pages;
unsigned pgbase;
unsigned nr_pages;
unsigned long count;
loff_t offset;
bool sync;
void *rpcdata;
int status; /* res */
int eof; /* res */
int committed; /* res */
/* Error reporting (layout_return) */
struct list_head err_list;
unsigned num_comps;
/* Pointer to array of error descriptors of size num_comps.
* It should contain as many entries as devices in the osd_layout
* that participate in the I/O. It is up to the io_engine to allocate
* needed space and set num_comps.
*/
struct pnfs_osd_ioerr *ioerrs;
};
/*
* Raid engine I/O API
*/
extern int objio_alloc_lseg(struct pnfs_layout_segment **outp,
struct pnfs_layout_hdr *pnfslay,
struct pnfs_layout_range *range,
struct xdr_stream *xdr,
gfp_t gfp_flags);
extern void objio_free_lseg(struct pnfs_layout_segment *lseg);
extern int objio_alloc_io_state(
struct pnfs_layout_segment *lseg,
struct objlayout_io_state **outp,
gfp_t gfp_flags);
extern void objio_free_io_state(struct objlayout_io_state *state);
extern ssize_t objio_read_pagelist(struct objlayout_io_state *ol_state);
extern ssize_t objio_write_pagelist(struct objlayout_io_state *ol_state,
bool stable);
/*
* callback API
*/
extern void objlayout_io_set_result(struct objlayout_io_state *state,
unsigned index, struct pnfs_osd_objid *pooid,
int osd_error, u64 offset, u64 length, bool is_write);
static inline void
objlayout_add_delta_space_used(struct objlayout_io_state *state, s64 space_used)
{
struct objlayout *objlay = OBJLAYOUT(state->lseg->pls_layout);
/* If one of the I/Os errored out and the delta_space_used was
* invalid we render the complete report as invalid. Protocol mandate
* the DSU be accurate or not reported.
*/
spin_lock(&objlay->lock);
if (objlay->delta_space_valid != OBJ_DSU_INVALID) {
objlay->delta_space_valid = OBJ_DSU_VALID;
objlay->delta_space_used += space_used;
}
spin_unlock(&objlay->lock);
}
extern void objlayout_read_done(struct objlayout_io_state *state,
ssize_t status, bool sync);
extern void objlayout_write_done(struct objlayout_io_state *state,
ssize_t status, bool sync);
extern int objlayout_get_deviceinfo(struct pnfs_layout_hdr *pnfslay,
struct nfs4_deviceid *d_id, struct pnfs_osd_deviceaddr **deviceaddr,
gfp_t gfp_flags);
extern void objlayout_put_deviceinfo(struct pnfs_osd_deviceaddr *deviceaddr);
/*
* exported generic objects function vectors
*/
extern struct pnfs_layout_hdr *objlayout_alloc_layout_hdr(struct inode *, gfp_t gfp_flags);
extern void objlayout_free_layout_hdr(struct pnfs_layout_hdr *);
extern struct pnfs_layout_segment *objlayout_alloc_lseg(
struct pnfs_layout_hdr *,
struct nfs4_layoutget_res *,
gfp_t gfp_flags);
extern void objlayout_free_lseg(struct pnfs_layout_segment *);
extern enum pnfs_try_status objlayout_read_pagelist(
struct nfs_read_data *);
extern enum pnfs_try_status objlayout_write_pagelist(
struct nfs_write_data *,
int how);
extern void objlayout_encode_layoutcommit(
struct pnfs_layout_hdr *,
struct xdr_stream *,
const struct nfs4_layoutcommit_args *);
extern void objlayout_encode_layoutreturn(
struct pnfs_layout_hdr *,
struct xdr_stream *,
const struct nfs4_layoutreturn_args *);
#endif /* _OBJLAYOUT_H */
+412
View File
@@ -0,0 +1,412 @@
/*
* Object-Based pNFS Layout XDR layer
*
* Copyright (C) 2007 Panasas Inc. [year of first publication]
* All rights reserved.
*
* Benny Halevy <bhalevy@panasas.com>
* Boaz Harrosh <bharrosh@panasas.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2
* See the file COPYING included with this distribution for more details.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of the Panasas company nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <linux/pnfs_osd_xdr.h>
#define NFSDBG_FACILITY NFSDBG_PNFS_LD
/*
* The following implementation is based on RFC5664
*/
/*
* struct pnfs_osd_objid {
* struct nfs4_deviceid oid_device_id;
* u64 oid_partition_id;
* u64 oid_object_id;
* }; // xdr size 32 bytes
*/
static __be32 *
_osd_xdr_decode_objid(__be32 *p, struct pnfs_osd_objid *objid)
{
p = xdr_decode_opaque_fixed(p, objid->oid_device_id.data,
sizeof(objid->oid_device_id.data));
p = xdr_decode_hyper(p, &objid->oid_partition_id);
p = xdr_decode_hyper(p, &objid->oid_object_id);
return p;
}
/*
* struct pnfs_osd_opaque_cred {
* u32 cred_len;
* void *cred;
* }; // xdr size [variable]
* The return pointers are from the xdr buffer
*/
static int
_osd_xdr_decode_opaque_cred(struct pnfs_osd_opaque_cred *opaque_cred,
struct xdr_stream *xdr)
{
__be32 *p = xdr_inline_decode(xdr, 1);
if (!p)
return -EINVAL;
opaque_cred->cred_len = be32_to_cpu(*p++);
p = xdr_inline_decode(xdr, opaque_cred->cred_len);
if (!p)
return -EINVAL;
opaque_cred->cred = p;
return 0;
}
/*
* struct pnfs_osd_object_cred {
* struct pnfs_osd_objid oc_object_id;
* u32 oc_osd_version;
* u32 oc_cap_key_sec;
* struct pnfs_osd_opaque_cred oc_cap_key
* struct pnfs_osd_opaque_cred oc_cap;
* }; // xdr size 32 + 4 + 4 + [variable] + [variable]
*/
static int
_osd_xdr_decode_object_cred(struct pnfs_osd_object_cred *comp,
struct xdr_stream *xdr)
{
__be32 *p = xdr_inline_decode(xdr, 32 + 4 + 4);
int ret;
if (!p)
return -EIO;
p = _osd_xdr_decode_objid(p, &comp->oc_object_id);
comp->oc_osd_version = be32_to_cpup(p++);
comp->oc_cap_key_sec = be32_to_cpup(p);
ret = _osd_xdr_decode_opaque_cred(&comp->oc_cap_key, xdr);
if (unlikely(ret))
return ret;
ret = _osd_xdr_decode_opaque_cred(&comp->oc_cap, xdr);
return ret;
}
/*
* struct pnfs_osd_data_map {
* u32 odm_num_comps;
* u64 odm_stripe_unit;
* u32 odm_group_width;
* u32 odm_group_depth;
* u32 odm_mirror_cnt;
* u32 odm_raid_algorithm;
* }; // xdr size 4 + 8 + 4 + 4 + 4 + 4
*/
static inline int
_osd_data_map_xdr_sz(void)
{
return 4 + 8 + 4 + 4 + 4 + 4;
}
static __be32 *
_osd_xdr_decode_data_map(__be32 *p, struct pnfs_osd_data_map *data_map)
{
data_map->odm_num_comps = be32_to_cpup(p++);
p = xdr_decode_hyper(p, &data_map->odm_stripe_unit);
data_map->odm_group_width = be32_to_cpup(p++);
data_map->odm_group_depth = be32_to_cpup(p++);
data_map->odm_mirror_cnt = be32_to_cpup(p++);
data_map->odm_raid_algorithm = be32_to_cpup(p++);
dprintk("%s: odm_num_comps=%u odm_stripe_unit=%llu odm_group_width=%u "
"odm_group_depth=%u odm_mirror_cnt=%u odm_raid_algorithm=%u\n",
__func__,
data_map->odm_num_comps,
(unsigned long long)data_map->odm_stripe_unit,
data_map->odm_group_width,
data_map->odm_group_depth,
data_map->odm_mirror_cnt,
data_map->odm_raid_algorithm);
return p;
}
int pnfs_osd_xdr_decode_layout_map(struct pnfs_osd_layout *layout,
struct pnfs_osd_xdr_decode_layout_iter *iter, struct xdr_stream *xdr)
{
__be32 *p;
memset(iter, 0, sizeof(*iter));
p = xdr_inline_decode(xdr, _osd_data_map_xdr_sz() + 4 + 4);
if (unlikely(!p))
return -EINVAL;
p = _osd_xdr_decode_data_map(p, &layout->olo_map);
layout->olo_comps_index = be32_to_cpup(p++);
layout->olo_num_comps = be32_to_cpup(p++);
iter->total_comps = layout->olo_num_comps;
return 0;
}
bool pnfs_osd_xdr_decode_layout_comp(struct pnfs_osd_object_cred *comp,
struct pnfs_osd_xdr_decode_layout_iter *iter, struct xdr_stream *xdr,
int *err)
{
BUG_ON(iter->decoded_comps > iter->total_comps);
if (iter->decoded_comps == iter->total_comps)
return false;
*err = _osd_xdr_decode_object_cred(comp, xdr);
if (unlikely(*err)) {
dprintk("%s: _osd_xdr_decode_object_cred=>%d decoded_comps=%d "
"total_comps=%d\n", __func__, *err,
iter->decoded_comps, iter->total_comps);
return false; /* stop the loop */
}
dprintk("%s: dev(%llx:%llx) par=0x%llx obj=0x%llx "
"key_len=%u cap_len=%u\n",
__func__,
_DEVID_LO(&comp->oc_object_id.oid_device_id),
_DEVID_HI(&comp->oc_object_id.oid_device_id),
comp->oc_object_id.oid_partition_id,
comp->oc_object_id.oid_object_id,
comp->oc_cap_key.cred_len, comp->oc_cap.cred_len);
iter->decoded_comps++;
return true;
}
/*
* Get Device Information Decoding
*
* Note: since Device Information is currently done synchronously, all
* variable strings fields are left inside the rpc buffer and are only
* pointed to by the pnfs_osd_deviceaddr members. So the read buffer
* should not be freed while the returned information is in use.
*/
/*
*struct nfs4_string {
* unsigned int len;
* char *data;
*}; // size [variable]
* NOTE: Returned string points to inside the XDR buffer
*/
static __be32 *
__read_u8_opaque(__be32 *p, struct nfs4_string *str)
{
str->len = be32_to_cpup(p++);
str->data = (char *)p;
p += XDR_QUADLEN(str->len);
return p;
}
/*
* struct pnfs_osd_targetid {
* u32 oti_type;
* struct nfs4_string oti_scsi_device_id;
* };// size 4 + [variable]
*/
static __be32 *
__read_targetid(__be32 *p, struct pnfs_osd_targetid* targetid)
{
u32 oti_type;
oti_type = be32_to_cpup(p++);
targetid->oti_type = oti_type;
switch (oti_type) {
case OBJ_TARGET_SCSI_NAME:
case OBJ_TARGET_SCSI_DEVICE_ID:
p = __read_u8_opaque(p, &targetid->oti_scsi_device_id);
}
return p;
}
/*
* struct pnfs_osd_net_addr {
* struct nfs4_string r_netid;
* struct nfs4_string r_addr;
* };
*/
static __be32 *
__read_net_addr(__be32 *p, struct pnfs_osd_net_addr* netaddr)
{
p = __read_u8_opaque(p, &netaddr->r_netid);
p = __read_u8_opaque(p, &netaddr->r_addr);
return p;
}
/*
* struct pnfs_osd_targetaddr {
* u32 ota_available;
* struct pnfs_osd_net_addr ota_netaddr;
* };
*/
static __be32 *
__read_targetaddr(__be32 *p, struct pnfs_osd_targetaddr *targetaddr)
{
u32 ota_available;
ota_available = be32_to_cpup(p++);
targetaddr->ota_available = ota_available;
if (ota_available)
p = __read_net_addr(p, &targetaddr->ota_netaddr);
return p;
}
/*
* struct pnfs_osd_deviceaddr {
* struct pnfs_osd_targetid oda_targetid;
* struct pnfs_osd_targetaddr oda_targetaddr;
* u8 oda_lun[8];
* struct nfs4_string oda_systemid;
* struct pnfs_osd_object_cred oda_root_obj_cred;
* struct nfs4_string oda_osdname;
* };
*/
/* We need this version for the pnfs_osd_xdr_decode_deviceaddr which does
* not have an xdr_stream
*/
static __be32 *
__read_opaque_cred(__be32 *p,
struct pnfs_osd_opaque_cred *opaque_cred)
{
opaque_cred->cred_len = be32_to_cpu(*p++);
opaque_cred->cred = p;
return p + XDR_QUADLEN(opaque_cred->cred_len);
}
static __be32 *
__read_object_cred(__be32 *p, struct pnfs_osd_object_cred *comp)
{
p = _osd_xdr_decode_objid(p, &comp->oc_object_id);
comp->oc_osd_version = be32_to_cpup(p++);
comp->oc_cap_key_sec = be32_to_cpup(p++);
p = __read_opaque_cred(p, &comp->oc_cap_key);
p = __read_opaque_cred(p, &comp->oc_cap);
return p;
}
void pnfs_osd_xdr_decode_deviceaddr(
struct pnfs_osd_deviceaddr *deviceaddr, __be32 *p)
{
p = __read_targetid(p, &deviceaddr->oda_targetid);
p = __read_targetaddr(p, &deviceaddr->oda_targetaddr);
p = xdr_decode_opaque_fixed(p, deviceaddr->oda_lun,
sizeof(deviceaddr->oda_lun));
p = __read_u8_opaque(p, &deviceaddr->oda_systemid);
p = __read_object_cred(p, &deviceaddr->oda_root_obj_cred);
p = __read_u8_opaque(p, &deviceaddr->oda_osdname);
/* libosd likes this terminated in dbg. It's last, so no problems */
deviceaddr->oda_osdname.data[deviceaddr->oda_osdname.len] = 0;
}
/*
* struct pnfs_osd_layoutupdate {
* u32 dsu_valid;
* s64 dsu_delta;
* u32 olu_ioerr_flag;
* }; xdr size 4 + 8 + 4
*/
int
pnfs_osd_xdr_encode_layoutupdate(struct xdr_stream *xdr,
struct pnfs_osd_layoutupdate *lou)
{
__be32 *p = xdr_reserve_space(xdr, 4 + 8 + 4);
if (!p)
return -E2BIG;
*p++ = cpu_to_be32(lou->dsu_valid);
if (lou->dsu_valid)
p = xdr_encode_hyper(p, lou->dsu_delta);
*p++ = cpu_to_be32(lou->olu_ioerr_flag);
return 0;
}
/*
* struct pnfs_osd_objid {
* struct nfs4_deviceid oid_device_id;
* u64 oid_partition_id;
* u64 oid_object_id;
* }; // xdr size 32 bytes
*/
static inline __be32 *
pnfs_osd_xdr_encode_objid(__be32 *p, struct pnfs_osd_objid *object_id)
{
p = xdr_encode_opaque_fixed(p, &object_id->oid_device_id.data,
sizeof(object_id->oid_device_id.data));
p = xdr_encode_hyper(p, object_id->oid_partition_id);
p = xdr_encode_hyper(p, object_id->oid_object_id);
return p;
}
/*
* struct pnfs_osd_ioerr {
* struct pnfs_osd_objid oer_component;
* u64 oer_comp_offset;
* u64 oer_comp_length;
* u32 oer_iswrite;
* u32 oer_errno;
* }; // xdr size 32 + 24 bytes
*/
void pnfs_osd_xdr_encode_ioerr(__be32 *p, struct pnfs_osd_ioerr *ioerr)
{
p = pnfs_osd_xdr_encode_objid(p, &ioerr->oer_component);
p = xdr_encode_hyper(p, ioerr->oer_comp_offset);
p = xdr_encode_hyper(p, ioerr->oer_comp_length);
*p++ = cpu_to_be32(ioerr->oer_iswrite);
*p = cpu_to_be32(ioerr->oer_errno);
}
__be32 *pnfs_osd_xdr_ioerr_reserve_space(struct xdr_stream *xdr)
{
__be32 *p;
p = xdr_reserve_space(xdr, 32 + 24);
if (unlikely(!p))
dprintk("%s: out of xdr space\n", __func__);
return p;
}
+30 -32
View File
@@ -204,6 +204,21 @@ nfs_wait_on_request(struct nfs_page *req)
TASK_UNINTERRUPTIBLE);
}
static bool nfs_generic_pg_test(struct nfs_pageio_descriptor *desc, struct nfs_page *prev, struct nfs_page *req)
{
/*
* FIXME: ideally we should be able to coalesce all requests
* that are not block boundary aligned, but currently this
* is problematic for the case of bsize < PAGE_CACHE_SIZE,
* since nfs_flush_multi and nfs_pagein_multi assume you
* can have only one struct nfs_page.
*/
if (desc->pg_bsize < PAGE_SIZE)
return 0;
return desc->pg_count + req->wb_bytes <= desc->pg_bsize;
}
/**
* nfs_pageio_init - initialise a page io descriptor
* @desc: pointer to descriptor
@@ -229,6 +244,8 @@ void nfs_pageio_init(struct nfs_pageio_descriptor *desc,
desc->pg_ioflags = io_flags;
desc->pg_error = 0;
desc->pg_lseg = NULL;
desc->pg_test = nfs_generic_pg_test;
pnfs_pageio_init(desc, inode);
}
/**
@@ -242,29 +259,23 @@ void nfs_pageio_init(struct nfs_pageio_descriptor *desc,
*
* Return 'true' if this is the case, else return 'false'.
*/
static int nfs_can_coalesce_requests(struct nfs_page *prev,
struct nfs_page *req,
struct nfs_pageio_descriptor *pgio)
static bool nfs_can_coalesce_requests(struct nfs_page *prev,
struct nfs_page *req,
struct nfs_pageio_descriptor *pgio)
{
if (req->wb_context->cred != prev->wb_context->cred)
return 0;
return false;
if (req->wb_lock_context->lockowner != prev->wb_lock_context->lockowner)
return 0;
return false;
if (req->wb_context->state != prev->wb_context->state)
return 0;
return false;
if (req->wb_index != (prev->wb_index + 1))
return 0;
return false;
if (req->wb_pgbase != 0)
return 0;
return false;
if (prev->wb_pgbase + prev->wb_bytes != PAGE_CACHE_SIZE)
return 0;
/*
* Non-whole file layouts need to check that req is inside of
* pgio->pg_lseg.
*/
if (pgio->pg_test && !pgio->pg_test(pgio, prev, req))
return 0;
return 1;
return false;
return pgio->pg_test(pgio, prev, req);
}
/**
@@ -278,31 +289,18 @@ static int nfs_can_coalesce_requests(struct nfs_page *prev,
static int nfs_pageio_do_add_request(struct nfs_pageio_descriptor *desc,
struct nfs_page *req)
{
size_t newlen = req->wb_bytes;
if (desc->pg_count != 0) {
struct nfs_page *prev;
/*
* FIXME: ideally we should be able to coalesce all requests
* that are not block boundary aligned, but currently this
* is problematic for the case of bsize < PAGE_CACHE_SIZE,
* since nfs_flush_multi and nfs_pagein_multi assume you
* can have only one struct nfs_page.
*/
if (desc->pg_bsize < PAGE_SIZE)
return 0;
newlen += desc->pg_count;
if (newlen > desc->pg_bsize)
return 0;
prev = nfs_list_entry(desc->pg_list.prev);
if (!nfs_can_coalesce_requests(prev, req, desc))
return 0;
} else
} else {
desc->pg_base = req->wb_pgbase;
}
nfs_list_remove_request(req);
nfs_list_add_request(req, &desc->pg_list);
desc->pg_count = newlen;
desc->pg_count += req->wb_bytes;
return 1;
}

Some files were not shown because too many files have changed in this diff Show More