Merge branch 'for-linus' of git://oss.sgi.com/xfs/xfs

* 'for-linus' of git://oss.sgi.com/xfs/xfs: (69 commits)
  xfs: add AIL pushing tracepoints
  xfs: put in missed fix for merge problem
  xfs: do not flush data workqueues in xfs_flush_buftarg
  xfs: remove XFS_bflush
  xfs: remove xfs_buf_target_name
  xfs: use xfs_ioerror_alert in xfs_buf_iodone_callbacks
  xfs: clean up xfs_ioerror_alert
  xfs: clean up buffer allocation
  xfs: remove buffers from the delwri list in xfs_buf_stale
  xfs: remove XFS_BUF_STALE and XFS_BUF_SUPER_STALE
  xfs: remove XFS_BUF_SET_VTYPE and XFS_BUF_SET_VTYPE_REF
  xfs: remove XFS_BUF_FINISH_IOWAIT
  xfs: remove xfs_get_buftarg_list
  xfs: fix buffer flushing during unmount
  xfs: optimize fsync on directories
  xfs: reduce the number of log forces from tail pushing
  xfs: Don't allocate new buffers on every call to _xfs_buf_find
  xfs: simplify xfs_trans_ijoin* again
  xfs: unlock the inode before log force in xfs_change_file_space
  xfs: unlock the inode before log force in xfs_fs_nfs_commit_metadata
  ...
This commit is contained in:
Linus Torvalds
2011-10-28 10:31:42 -07:00
47 changed files with 2041 additions and 2332 deletions
+2 -2
View File
@@ -452,7 +452,7 @@ xfs_alloc_read_agfl(
if (error)
return error;
ASSERT(!xfs_buf_geterror(bp));
XFS_BUF_SET_VTYPE_REF(bp, B_FS_AGFL, XFS_AGFL_REF);
xfs_buf_set_ref(bp, XFS_AGFL_REF);
*bpp = bp;
return 0;
}
@@ -2139,7 +2139,7 @@ xfs_read_agf(
xfs_trans_brelse(tp, *bpp);
return XFS_ERROR(EFSCORRUPTED);
}
XFS_BUF_SET_VTYPE_REF(*bpp, B_FS_AGF, XFS_AGF_REF);
xfs_buf_set_ref(*bpp, XFS_AGF_REF);
return 0;
}
+42 -77
View File
@@ -38,40 +38,6 @@
#include <linux/pagevec.h>
#include <linux/writeback.h>
/*
* Prime number of hash buckets since address is used as the key.
*/
#define NVSYNC 37
#define to_ioend_wq(v) (&xfs_ioend_wq[((unsigned long)v) % NVSYNC])
static wait_queue_head_t xfs_ioend_wq[NVSYNC];
void __init
xfs_ioend_init(void)
{
int i;
for (i = 0; i < NVSYNC; i++)
init_waitqueue_head(&xfs_ioend_wq[i]);
}
void
xfs_ioend_wait(
xfs_inode_t *ip)
{
wait_queue_head_t *wq = to_ioend_wq(ip);
wait_event(*wq, (atomic_read(&ip->i_iocount) == 0));
}
STATIC void
xfs_ioend_wake(
xfs_inode_t *ip)
{
if (atomic_dec_and_test(&ip->i_iocount))
wake_up(to_ioend_wq(ip));
}
void
xfs_count_page_state(
struct page *page,
@@ -115,25 +81,20 @@ xfs_destroy_ioend(
xfs_ioend_t *ioend)
{
struct buffer_head *bh, *next;
struct xfs_inode *ip = XFS_I(ioend->io_inode);
for (bh = ioend->io_buffer_head; bh; bh = next) {
next = bh->b_private;
bh->b_end_io(bh, !ioend->io_error);
}
/*
* Volume managers supporting multiple paths can send back ENODEV
* when the final path disappears. In this case continuing to fill
* the page cache with dirty data which cannot be written out is
* evil, so prevent that.
*/
if (unlikely(ioend->io_error == -ENODEV)) {
xfs_do_force_shutdown(ip->i_mount, SHUTDOWN_DEVICE_REQ,
__FILE__, __LINE__);
if (ioend->io_iocb) {
if (ioend->io_isasync) {
aio_complete(ioend->io_iocb, ioend->io_error ?
ioend->io_error : ioend->io_result, 0);
}
inode_dio_done(ioend->io_inode);
}
xfs_ioend_wake(ip);
mempool_free(ioend, xfs_ioend_pool);
}
@@ -155,6 +116,15 @@ xfs_ioend_new_eof(
return isize > ip->i_d.di_size ? isize : 0;
}
/*
* Fast and loose check if this write could update the on-disk inode size.
*/
static inline bool xfs_ioend_is_append(struct xfs_ioend *ioend)
{
return ioend->io_offset + ioend->io_size >
XFS_I(ioend->io_inode)->i_d.di_size;
}
/*
* Update on-disk file size now that data has been written to disk. The
* current in-memory file size is i_size. If a write is beyond eof i_new_size
@@ -173,9 +143,6 @@ xfs_setfilesize(
xfs_inode_t *ip = XFS_I(ioend->io_inode);
xfs_fsize_t isize;
if (unlikely(ioend->io_error))
return 0;
if (!xfs_ilock_nowait(ip, XFS_ILOCK_EXCL))
return EAGAIN;
@@ -192,6 +159,9 @@ xfs_setfilesize(
/*
* Schedule IO completion handling on the final put of an ioend.
*
* If there is no work to do we might as well call it a day and free the
* ioend right now.
*/
STATIC void
xfs_finish_ioend(
@@ -200,8 +170,10 @@ xfs_finish_ioend(
if (atomic_dec_and_test(&ioend->io_remaining)) {
if (ioend->io_type == IO_UNWRITTEN)
queue_work(xfsconvertd_workqueue, &ioend->io_work);
else
else if (xfs_ioend_is_append(ioend))
queue_work(xfsdatad_workqueue, &ioend->io_work);
else
xfs_destroy_ioend(ioend);
}
}
@@ -216,17 +188,24 @@ xfs_end_io(
struct xfs_inode *ip = XFS_I(ioend->io_inode);
int error = 0;
if (XFS_FORCED_SHUTDOWN(ip->i_mount)) {
error = -EIO;
goto done;
}
if (ioend->io_error)
goto done;
/*
* For unwritten extents we need to issue transactions to convert a
* range to normal written extens after the data I/O has finished.
*/
if (ioend->io_type == IO_UNWRITTEN &&
likely(!ioend->io_error && !XFS_FORCED_SHUTDOWN(ip->i_mount))) {
if (ioend->io_type == IO_UNWRITTEN) {
error = xfs_iomap_write_unwritten(ip, ioend->io_offset,
ioend->io_size);
if (error)
ioend->io_error = error;
if (error) {
ioend->io_error = -error;
goto done;
}
}
/*
@@ -236,6 +215,7 @@ xfs_end_io(
error = xfs_setfilesize(ioend);
ASSERT(!error || error == EAGAIN);
done:
/*
* If we didn't complete processing of the ioend, requeue it to the
* tail of the workqueue for another attempt later. Otherwise destroy
@@ -247,8 +227,6 @@ xfs_end_io(
/* ensure we don't spin on blocked ioends */
delay(1);
} else {
if (ioend->io_iocb)
aio_complete(ioend->io_iocb, ioend->io_result, 0);
xfs_destroy_ioend(ioend);
}
}
@@ -285,13 +263,13 @@ xfs_alloc_ioend(
* all the I/O from calling the completion routine too early.
*/
atomic_set(&ioend->io_remaining, 1);
ioend->io_isasync = 0;
ioend->io_error = 0;
ioend->io_list = NULL;
ioend->io_type = type;
ioend->io_inode = inode;
ioend->io_buffer_head = NULL;
ioend->io_buffer_tail = NULL;
atomic_inc(&XFS_I(ioend->io_inode)->i_iocount);
ioend->io_offset = 0;
ioend->io_size = 0;
ioend->io_iocb = NULL;
@@ -337,8 +315,8 @@ xfs_map_blocks(
count = mp->m_maxioffset - offset;
end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + count);
offset_fsb = XFS_B_TO_FSBT(mp, offset);
error = xfs_bmapi(NULL, ip, offset_fsb, end_fsb - offset_fsb,
bmapi_flags, NULL, 0, imap, &nimaps, NULL);
error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb,
imap, &nimaps, bmapi_flags);
xfs_iunlock(ip, XFS_ILOCK_SHARED);
if (error)
@@ -551,7 +529,6 @@ xfs_cancel_ioend(
unlock_buffer(bh);
} while ((bh = next_bh) != NULL);
xfs_ioend_wake(XFS_I(ioend->io_inode));
mempool_free(ioend, xfs_ioend_pool);
} while ((ioend = next) != NULL);
}
@@ -1161,8 +1138,8 @@ __xfs_get_blocks(
end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + size);
offset_fsb = XFS_B_TO_FSBT(mp, offset);
error = xfs_bmapi(NULL, ip, offset_fsb, end_fsb - offset_fsb,
XFS_BMAPI_ENTIRE, NULL, 0, &imap, &nimaps, NULL);
error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb,
&imap, &nimaps, XFS_BMAPI_ENTIRE);
if (error)
goto out_unlock;
@@ -1300,7 +1277,6 @@ xfs_end_io_direct_write(
bool is_async)
{
struct xfs_ioend *ioend = iocb->private;
struct inode *inode = ioend->io_inode;
/*
* blockdev_direct_IO can return an error even after the I/O
@@ -1311,28 +1287,17 @@ xfs_end_io_direct_write(
ioend->io_offset = offset;
ioend->io_size = size;
ioend->io_iocb = iocb;
ioend->io_result = ret;
if (private && size > 0)
ioend->io_type = IO_UNWRITTEN;
if (is_async) {
/*
* If we are converting an unwritten extent we need to delay
* the AIO completion until after the unwrittent extent
* conversion has completed, otherwise do it ASAP.
*/
if (ioend->io_type == IO_UNWRITTEN) {
ioend->io_iocb = iocb;
ioend->io_result = ret;
} else {
aio_complete(iocb, ret, 0);
}
ioend->io_isasync = 1;
xfs_finish_ioend(ioend);
} else {
xfs_finish_ioend_sync(ioend);
}
/* XXX: probably should move into the real I/O completion handler */
inode_dio_done(inode);
}
STATIC ssize_t
+1 -3
View File
@@ -47,6 +47,7 @@ typedef struct xfs_ioend {
unsigned int io_type; /* delalloc / unwritten */
int io_error; /* I/O error code */
atomic_t io_remaining; /* hold count */
unsigned int io_isasync : 1; /* needs aio_complete */
struct inode *io_inode; /* file being written to */
struct buffer_head *io_buffer_head;/* buffer linked list head */
struct buffer_head *io_buffer_tail;/* buffer linked list tail */
@@ -60,9 +61,6 @@ typedef struct xfs_ioend {
extern const struct address_space_operations xfs_address_space_operations;
extern int xfs_get_blocks(struct inode *, sector_t, struct buffer_head *, int);
extern void xfs_ioend_init(void);
extern void xfs_ioend_wait(struct xfs_inode *);
extern void xfs_count_page_state(struct page *, int *, int *);
#endif /* __XFS_AOPS_H__ */
+34 -55
View File
@@ -319,7 +319,7 @@ xfs_attr_set_int(
return (error);
}
xfs_trans_ijoin(args.trans, dp);
xfs_trans_ijoin(args.trans, dp, 0);
/*
* If the attribute list is non-existent or a shortform list,
@@ -389,7 +389,7 @@ xfs_attr_set_int(
* a new one. We need the inode to be in all transactions.
*/
if (committed)
xfs_trans_ijoin(args.trans, dp);
xfs_trans_ijoin(args.trans, dp, 0);
/*
* Commit the leaf transformation. We'll need another (linked)
@@ -537,7 +537,7 @@ xfs_attr_remove_int(xfs_inode_t *dp, struct xfs_name *name, int flags)
* No need to make quota reservations here. We expect to release some
* blocks not allocate in the common case.
*/
xfs_trans_ijoin(args.trans, dp);
xfs_trans_ijoin(args.trans, dp, 0);
/*
* Decide on what work routines to call based on the inode size.
@@ -809,7 +809,7 @@ xfs_attr_inactive(xfs_inode_t *dp)
* No need to make quota reservations here. We expect to release some
* blocks, not allocate, in the common case.
*/
xfs_trans_ijoin(trans, dp);
xfs_trans_ijoin(trans, dp, 0);
/*
* Decide on what work routines to call based on the inode size.
@@ -823,18 +823,6 @@ xfs_attr_inactive(xfs_inode_t *dp)
if (error)
goto out;
/*
* Signal synchronous inactive transactions unless this is a
* synchronous mount filesystem in which case we know that we're here
* because we've been called out of xfs_inactive which means that the
* last reference is gone and the unlink transaction has already hit
* the disk so async inactive transactions are safe.
*/
if (!(mp->m_flags & XFS_MOUNT_WSYNC)) {
if (dp->i_d.di_anextents > 0)
xfs_trans_set_sync(trans);
}
error = xfs_itruncate_extents(&trans, dp, XFS_ATTR_FORK, 0);
if (error)
goto out;
@@ -973,7 +961,7 @@ xfs_attr_leaf_addname(xfs_da_args_t *args)
* a new one. We need the inode to be in all transactions.
*/
if (committed)
xfs_trans_ijoin(args->trans, dp);
xfs_trans_ijoin(args->trans, dp, 0);
/*
* Commit the current trans (including the inode) and start
@@ -1075,7 +1063,7 @@ xfs_attr_leaf_addname(xfs_da_args_t *args)
* in all transactions.
*/
if (committed)
xfs_trans_ijoin(args->trans, dp);
xfs_trans_ijoin(args->trans, dp, 0);
} else
xfs_da_buf_done(bp);
@@ -1149,7 +1137,7 @@ xfs_attr_leaf_removename(xfs_da_args_t *args)
* a new one. We need the inode to be in all transactions.
*/
if (committed)
xfs_trans_ijoin(args->trans, dp);
xfs_trans_ijoin(args->trans, dp, 0);
} else
xfs_da_buf_done(bp);
return(0);
@@ -1303,7 +1291,7 @@ restart:
* in all transactions.
*/
if (committed)
xfs_trans_ijoin(args->trans, dp);
xfs_trans_ijoin(args->trans, dp, 0);
/*
* Commit the node conversion and start the next
@@ -1340,7 +1328,7 @@ restart:
* a new one. We need the inode to be in all transactions.
*/
if (committed)
xfs_trans_ijoin(args->trans, dp);
xfs_trans_ijoin(args->trans, dp, 0);
} else {
/*
* Addition succeeded, update Btree hashvals.
@@ -1452,7 +1440,7 @@ restart:
* in all transactions.
*/
if (committed)
xfs_trans_ijoin(args->trans, dp);
xfs_trans_ijoin(args->trans, dp, 0);
}
/*
@@ -1584,7 +1572,7 @@ xfs_attr_node_removename(xfs_da_args_t *args)
* a new one. We need the inode to be in all transactions.
*/
if (committed)
xfs_trans_ijoin(args->trans, dp);
xfs_trans_ijoin(args->trans, dp, 0);
/*
* Commit the Btree join operation and start a new trans.
@@ -1635,7 +1623,7 @@ xfs_attr_node_removename(xfs_da_args_t *args)
* in all transactions.
*/
if (committed)
xfs_trans_ijoin(args->trans, dp);
xfs_trans_ijoin(args->trans, dp, 0);
} else
xfs_da_brelse(args->trans, bp);
}
@@ -1975,10 +1963,9 @@ xfs_attr_rmtval_get(xfs_da_args_t *args)
lblkno = args->rmtblkno;
while (valuelen > 0) {
nmap = ATTR_RMTVALUE_MAPSIZE;
error = xfs_bmapi(args->trans, args->dp, (xfs_fileoff_t)lblkno,
args->rmtblkcnt,
XFS_BMAPI_ATTRFORK | XFS_BMAPI_METADATA,
NULL, 0, map, &nmap, NULL);
error = xfs_bmapi_read(args->dp, (xfs_fileoff_t)lblkno,
args->rmtblkcnt, map, &nmap,
XFS_BMAPI_ATTRFORK);
if (error)
return(error);
ASSERT(nmap >= 1);
@@ -2052,10 +2039,9 @@ xfs_attr_rmtval_set(xfs_da_args_t *args)
*/
xfs_bmap_init(args->flist, args->firstblock);
nmap = 1;
error = xfs_bmapi(args->trans, dp, (xfs_fileoff_t)lblkno,
error = xfs_bmapi_write(args->trans, dp, (xfs_fileoff_t)lblkno,
blkcnt,
XFS_BMAPI_ATTRFORK | XFS_BMAPI_METADATA |
XFS_BMAPI_WRITE,
XFS_BMAPI_ATTRFORK | XFS_BMAPI_METADATA,
args->firstblock, args->total, &map, &nmap,
args->flist);
if (!error) {
@@ -2074,7 +2060,7 @@ xfs_attr_rmtval_set(xfs_da_args_t *args)
* a new one. We need the inode to be in all transactions.
*/
if (committed)
xfs_trans_ijoin(args->trans, dp);
xfs_trans_ijoin(args->trans, dp, 0);
ASSERT(nmap == 1);
ASSERT((map.br_startblock != DELAYSTARTBLOCK) &&
@@ -2104,14 +2090,11 @@ xfs_attr_rmtval_set(xfs_da_args_t *args)
*/
xfs_bmap_init(args->flist, args->firstblock);
nmap = 1;
error = xfs_bmapi(NULL, dp, (xfs_fileoff_t)lblkno,
args->rmtblkcnt,
XFS_BMAPI_ATTRFORK | XFS_BMAPI_METADATA,
args->firstblock, 0, &map, &nmap,
NULL);
if (error) {
error = xfs_bmapi_read(dp, (xfs_fileoff_t)lblkno,
args->rmtblkcnt, &map, &nmap,
XFS_BMAPI_ATTRFORK);
if (error)
return(error);
}
ASSERT(nmap == 1);
ASSERT((map.br_startblock != DELAYSTARTBLOCK) &&
(map.br_startblock != HOLESTARTBLOCK));
@@ -2121,16 +2104,17 @@ xfs_attr_rmtval_set(xfs_da_args_t *args)
bp = xfs_buf_get(mp->m_ddev_targp, dblkno, blkcnt,
XBF_LOCK | XBF_DONT_BLOCK);
ASSERT(!xfs_buf_geterror(bp));
if (!bp)
return ENOMEM;
tmp = (valuelen < XFS_BUF_SIZE(bp)) ? valuelen :
XFS_BUF_SIZE(bp);
xfs_buf_iomove(bp, 0, tmp, src, XBRW_WRITE);
if (tmp < XFS_BUF_SIZE(bp))
xfs_buf_zero(bp, tmp, XFS_BUF_SIZE(bp) - tmp);
if ((error = xfs_bwrite(mp, bp))) {/* GROT: NOTE: synchronous write */
return (error);
}
error = xfs_bwrite(bp); /* GROT: NOTE: synchronous write */
xfs_buf_relse(bp);
if (error)
return error;
src += tmp;
valuelen -= tmp;
@@ -2166,16 +2150,12 @@ xfs_attr_rmtval_remove(xfs_da_args_t *args)
/*
* Try to remember where we decided to put the value.
*/
xfs_bmap_init(args->flist, args->firstblock);
nmap = 1;
error = xfs_bmapi(NULL, args->dp, (xfs_fileoff_t)lblkno,
args->rmtblkcnt,
XFS_BMAPI_ATTRFORK | XFS_BMAPI_METADATA,
args->firstblock, 0, &map, &nmap,
args->flist);
if (error) {
error = xfs_bmapi_read(args->dp, (xfs_fileoff_t)lblkno,
args->rmtblkcnt, &map, &nmap,
XFS_BMAPI_ATTRFORK);
if (error)
return(error);
}
ASSERT(nmap == 1);
ASSERT((map.br_startblock != DELAYSTARTBLOCK) &&
(map.br_startblock != HOLESTARTBLOCK));
@@ -2188,8 +2168,7 @@ xfs_attr_rmtval_remove(xfs_da_args_t *args)
*/
bp = xfs_incore(mp->m_ddev_targp, dblkno, blkcnt, XBF_TRYLOCK);
if (bp) {
XFS_BUF_STALE(bp);
XFS_BUF_UNDELAYWRITE(bp);
xfs_buf_stale(bp);
xfs_buf_relse(bp);
bp = NULL;
}
@@ -2227,7 +2206,7 @@ xfs_attr_rmtval_remove(xfs_da_args_t *args)
* a new one. We need the inode to be in all transactions.
*/
if (committed)
xfs_trans_ijoin(args->trans, args->dp);
xfs_trans_ijoin(args->trans, args->dp, 0);
/*
* Close out trans and start the next one in the chain.
+4 -3
View File
@@ -2926,9 +2926,8 @@ xfs_attr_leaf_freextent(xfs_trans_t **trans, xfs_inode_t *dp,
* Try to remember where we decided to put the value.
*/
nmap = 1;
error = xfs_bmapi(*trans, dp, (xfs_fileoff_t)tblkno, tblkcnt,
XFS_BMAPI_ATTRFORK | XFS_BMAPI_METADATA,
NULL, 0, &map, &nmap, NULL);
error = xfs_bmapi_read(dp, (xfs_fileoff_t)tblkno, tblkcnt,
&map, &nmap, XFS_BMAPI_ATTRFORK);
if (error) {
return(error);
}
@@ -2948,6 +2947,8 @@ xfs_attr_leaf_freextent(xfs_trans_t **trans, xfs_inode_t *dp,
bp = xfs_trans_get_buf(*trans,
dp->i_mount->m_ddev_targp,
dblkno, dblkcnt, XBF_LOCK);
if (!bp)
return ENOMEM;
xfs_trans_binval(*trans, bp);
/*
* Roll to next transaction.
+1270 -1279
View File
File diff suppressed because it is too large Load Diff
+66 -252
View File
@@ -62,27 +62,23 @@ typedef struct xfs_bmap_free
#define XFS_BMAP_MAX_NMAP 4
/*
* Flags for xfs_bmapi
* Flags for xfs_bmapi_*
*/
#define XFS_BMAPI_WRITE 0x001 /* write operation: allocate space */
#define XFS_BMAPI_DELAY 0x002 /* delayed write operation */
#define XFS_BMAPI_ENTIRE 0x004 /* return entire extent, not trimmed */
#define XFS_BMAPI_METADATA 0x008 /* mapping metadata not user data */
#define XFS_BMAPI_ATTRFORK 0x010 /* use attribute fork not data */
#define XFS_BMAPI_PREALLOC 0x040 /* preallocation op: unwritten space */
#define XFS_BMAPI_IGSTATE 0x080 /* Ignore state - */
#define XFS_BMAPI_ENTIRE 0x001 /* return entire extent, not trimmed */
#define XFS_BMAPI_METADATA 0x002 /* mapping metadata not user data */
#define XFS_BMAPI_ATTRFORK 0x004 /* use attribute fork not data */
#define XFS_BMAPI_PREALLOC 0x008 /* preallocation op: unwritten space */
#define XFS_BMAPI_IGSTATE 0x010 /* Ignore state - */
/* combine contig. space */
#define XFS_BMAPI_CONTIG 0x100 /* must allocate only one extent */
#define XFS_BMAPI_CONTIG 0x020 /* must allocate only one extent */
/*
* unwritten extent conversion - this needs write cache flushing and no additional
* allocation alignments. When specified with XFS_BMAPI_PREALLOC it converts
* from written to unwritten, otherwise convert from unwritten to written.
*/
#define XFS_BMAPI_CONVERT 0x200
#define XFS_BMAPI_CONVERT 0x040
#define XFS_BMAPI_FLAGS \
{ XFS_BMAPI_WRITE, "WRITE" }, \
{ XFS_BMAPI_DELAY, "DELAY" }, \
{ XFS_BMAPI_ENTIRE, "ENTIRE" }, \
{ XFS_BMAPI_METADATA, "METADATA" }, \
{ XFS_BMAPI_ATTRFORK, "ATTRFORK" }, \
@@ -113,21 +109,28 @@ static inline void xfs_bmap_init(xfs_bmap_free_t *flp, xfs_fsblock_t *fbp)
* Argument structure for xfs_bmap_alloc.
*/
typedef struct xfs_bmalloca {
xfs_fsblock_t firstblock; /* i/o first block allocated */
xfs_fsblock_t rval; /* starting block of new extent */
xfs_fileoff_t off; /* offset in file filling in */
xfs_fsblock_t *firstblock; /* i/o first block allocated */
struct xfs_bmap_free *flist; /* bmap freelist */
struct xfs_trans *tp; /* transaction pointer */
struct xfs_inode *ip; /* incore inode pointer */
struct xfs_bmbt_irec *prevp; /* extent before the new one */
struct xfs_bmbt_irec *gotp; /* extent after, or delayed */
xfs_extlen_t alen; /* i/o length asked/allocated */
struct xfs_bmbt_irec prev; /* extent before the new one */
struct xfs_bmbt_irec got; /* extent after, or delayed */
xfs_fileoff_t offset; /* offset in file filling in */
xfs_extlen_t length; /* i/o length asked/allocated */
xfs_fsblock_t blkno; /* starting block of new extent */
struct xfs_btree_cur *cur; /* btree cursor */
xfs_extnum_t idx; /* current extent index */
int nallocs;/* number of extents alloc'd */
int logflags;/* flags for transaction logging */
xfs_extlen_t total; /* total blocks needed for xaction */
xfs_extlen_t minlen; /* minimum allocation size (blocks) */
xfs_extlen_t minleft; /* amount must be left after alloc */
char eof; /* set if allocating past last extent */
char wasdel; /* replacing a delayed allocation */
char userdata;/* set if is user data */
char low; /* low on space, using seq'l ags */
char aeof; /* allocated space at eof */
char conv; /* overwriting unwritten extents */
} xfs_bmalloca_t;
@@ -152,251 +155,62 @@ typedef struct xfs_bmalloca {
{ BMAP_RIGHT_FILLING, "RF" }, \
{ BMAP_ATTRFORK, "ATTR" }
/*
* Add bmap trace insert entries for all the contents of the extent list.
*
* Quite excessive tracing. Only do this for debug builds.
*/
#if defined(__KERNEL) && defined(DEBUG)
void
xfs_bmap_trace_exlist(
struct xfs_inode *ip, /* incore inode pointer */
xfs_extnum_t cnt, /* count of entries in list */
int whichfork,
unsigned long caller_ip); /* data or attr fork */
void xfs_bmap_trace_exlist(struct xfs_inode *ip, xfs_extnum_t cnt,
int whichfork, unsigned long caller_ip);
#define XFS_BMAP_TRACE_EXLIST(ip,c,w) \
xfs_bmap_trace_exlist(ip,c,w, _THIS_IP_)
#else
#define XFS_BMAP_TRACE_EXLIST(ip,c,w)
#endif
/*
* Convert inode from non-attributed to attributed.
* Must not be in a transaction, ip must not be locked.
*/
int /* error code */
xfs_bmap_add_attrfork(
struct xfs_inode *ip, /* incore inode pointer */
int size, /* space needed for new attribute */
int rsvd); /* flag for reserved block allocation */
/*
* Add the extent to the list of extents to be free at transaction end.
* The list is maintained sorted (by block number).
*/
void
xfs_bmap_add_free(
xfs_fsblock_t bno, /* fs block number of extent */
xfs_filblks_t len, /* length of extent */
xfs_bmap_free_t *flist, /* list of extents */
struct xfs_mount *mp); /* mount point structure */
/*
* Routine to clean up the free list data structure when
* an error occurs during a transaction.
*/
void
xfs_bmap_cancel(
xfs_bmap_free_t *flist); /* free list to clean up */
/*
* Compute and fill in the value of the maximum depth of a bmap btree
* in this filesystem. Done once, during mount.
*/
void
xfs_bmap_compute_maxlevels(
struct xfs_mount *mp, /* file system mount structure */
int whichfork); /* data or attr fork */
/*
* Returns the file-relative block number of the first unused block in the file.
* This is the lowest-address hole if the file has holes, else the first block
* past the end of file.
*/
int /* error */
xfs_bmap_first_unused(
struct xfs_trans *tp, /* transaction pointer */
struct xfs_inode *ip, /* incore inode */
xfs_extlen_t len, /* size of hole to find */
xfs_fileoff_t *unused, /* unused block num */
int whichfork); /* data or attr fork */
/*
* Returns the file-relative block number of the last block + 1 before
* last_block (input value) in the file.
* This is not based on i_size, it is based on the extent list.
* Returns 0 for local files, as they do not have an extent list.
*/
int /* error */
xfs_bmap_last_before(
struct xfs_trans *tp, /* transaction pointer */
struct xfs_inode *ip, /* incore inode */
xfs_fileoff_t *last_block, /* last block */
int whichfork); /* data or attr fork */
/*
* Returns the file-relative block number of the first block past eof in
* the file. This is not based on i_size, it is based on the extent list.
* Returns 0 for local files, as they do not have an extent list.
*/
int /* error */
xfs_bmap_last_offset(
struct xfs_trans *tp, /* transaction pointer */
struct xfs_inode *ip, /* incore inode */
xfs_fileoff_t *unused, /* last block num */
int whichfork); /* data or attr fork */
/*
* Returns whether the selected fork of the inode has exactly one
* block or not. For the data fork we check this matches di_size,
* implying the file's range is 0..bsize-1.
*/
int
xfs_bmap_one_block(
struct xfs_inode *ip, /* incore inode */
int whichfork); /* data or attr fork */
/*
* Read in the extents to iu_extents.
* All inode fields are set up by caller, we just traverse the btree
* and copy the records in.
*/
int /* error */
xfs_bmap_read_extents(
struct xfs_trans *tp, /* transaction pointer */
struct xfs_inode *ip, /* incore inode */
int whichfork); /* data or attr fork */
/*
* Map file blocks to filesystem blocks.
* File range is given by the bno/len pair.
* Adds blocks to file if a write ("flags & XFS_BMAPI_WRITE" set)
* into a hole or past eof.
* Only allocates blocks from a single allocation group,
* to avoid locking problems.
* The returned value in "firstblock" from the first call in a transaction
* must be remembered and presented to subsequent calls in "firstblock".
* An upper bound for the number of blocks to be allocated is supplied to
* the first call in "total"; if no allocation group has that many free
* blocks then the call will fail (return NULLFSBLOCK in "firstblock").
*/
int /* error */
xfs_bmapi(
struct xfs_trans *tp, /* transaction pointer */
struct xfs_inode *ip, /* incore inode */
xfs_fileoff_t bno, /* starting file offs. mapped */
xfs_filblks_t len, /* length to map in file */
int flags, /* XFS_BMAPI_... */
xfs_fsblock_t *firstblock, /* first allocated block
controls a.g. for allocs */
xfs_extlen_t total, /* total blocks needed */
struct xfs_bmbt_irec *mval, /* output: map values */
int *nmap, /* i/o: mval size/count */
xfs_bmap_free_t *flist); /* i/o: list extents to free */
/*
* Map file blocks to filesystem blocks, simple version.
* One block only, read-only.
* For flags, only the XFS_BMAPI_ATTRFORK flag is examined.
* For the other flag values, the effect is as if XFS_BMAPI_METADATA
* was set and all the others were clear.
*/
int /* error */
xfs_bmapi_single(
struct xfs_trans *tp, /* transaction pointer */
struct xfs_inode *ip, /* incore inode */
int whichfork, /* data or attr fork */
xfs_fsblock_t *fsb, /* output: mapped block */
xfs_fileoff_t bno); /* starting file offs. mapped */
/*
* Unmap (remove) blocks from a file.
* If nexts is nonzero then the number of extents to remove is limited to
* that value. If not all extents in the block range can be removed then
* *done is set.
*/
int /* error */
xfs_bunmapi(
struct xfs_trans *tp, /* transaction pointer */
struct xfs_inode *ip, /* incore inode */
xfs_fileoff_t bno, /* starting offset to unmap */
xfs_filblks_t len, /* length to unmap in file */
int flags, /* XFS_BMAPI_... */
xfs_extnum_t nexts, /* number of extents max */
xfs_fsblock_t *firstblock, /* first allocated block
controls a.g. for allocs */
xfs_bmap_free_t *flist, /* i/o: list extents to free */
int *done); /* set if not done yet */
/*
* Check an extent list, which has just been read, for
* any bit in the extent flag field.
*/
int
xfs_check_nostate_extents(
struct xfs_ifork *ifp,
xfs_extnum_t idx,
xfs_extnum_t num);
uint
xfs_default_attroffset(
struct xfs_inode *ip);
int xfs_bmap_add_attrfork(struct xfs_inode *ip, int size, int rsvd);
void xfs_bmap_add_free(xfs_fsblock_t bno, xfs_filblks_t len,
struct xfs_bmap_free *flist, struct xfs_mount *mp);
void xfs_bmap_cancel(struct xfs_bmap_free *flist);
void xfs_bmap_compute_maxlevels(struct xfs_mount *mp, int whichfork);
int xfs_bmap_first_unused(struct xfs_trans *tp, struct xfs_inode *ip,
xfs_extlen_t len, xfs_fileoff_t *unused, int whichfork);
int xfs_bmap_last_before(struct xfs_trans *tp, struct xfs_inode *ip,
xfs_fileoff_t *last_block, int whichfork);
int xfs_bmap_last_offset(struct xfs_trans *tp, struct xfs_inode *ip,
xfs_fileoff_t *unused, int whichfork);
int xfs_bmap_one_block(struct xfs_inode *ip, int whichfork);
int xfs_bmap_read_extents(struct xfs_trans *tp, struct xfs_inode *ip,
int whichfork);
int xfs_bmapi_read(struct xfs_inode *ip, xfs_fileoff_t bno,
xfs_filblks_t len, struct xfs_bmbt_irec *mval,
int *nmap, int flags);
int xfs_bmapi_delay(struct xfs_inode *ip, xfs_fileoff_t bno,
xfs_filblks_t len, struct xfs_bmbt_irec *mval,
int *nmap, int flags);
int xfs_bmapi_write(struct xfs_trans *tp, struct xfs_inode *ip,
xfs_fileoff_t bno, xfs_filblks_t len, int flags,
xfs_fsblock_t *firstblock, xfs_extlen_t total,
struct xfs_bmbt_irec *mval, int *nmap,
struct xfs_bmap_free *flist);
int xfs_bunmapi(struct xfs_trans *tp, struct xfs_inode *ip,
xfs_fileoff_t bno, xfs_filblks_t len, int flags,
xfs_extnum_t nexts, xfs_fsblock_t *firstblock,
struct xfs_bmap_free *flist, int *done);
int xfs_check_nostate_extents(struct xfs_ifork *ifp, xfs_extnum_t idx,
xfs_extnum_t num);
uint xfs_default_attroffset(struct xfs_inode *ip);
#ifdef __KERNEL__
/*
* Routine to be called at transaction's end by xfs_bmapi, xfs_bunmapi
* caller. Frees all the extents that need freeing, which must be done
* last due to locking considerations.
*
* Return 1 if the given transaction was committed and a new one allocated,
* and 0 otherwise.
*/
int /* error */
xfs_bmap_finish(
struct xfs_trans **tp, /* transaction pointer addr */
xfs_bmap_free_t *flist, /* i/o: list extents to free */
int *committed); /* xact committed or not */
/* bmap to userspace formatter - copy to user & advance pointer */
typedef int (*xfs_bmap_format_t)(void **, struct getbmapx *, int *);
/*
* Get inode's extents as described in bmv, and format for output.
*/
int /* error code */
xfs_getbmap(
xfs_inode_t *ip,
struct getbmapx *bmv, /* user bmap structure */
xfs_bmap_format_t formatter, /* format to user */
void *arg); /* formatter arg */
/*
* Check if the endoff is outside the last extent. If so the caller will grow
* the allocation to a stripe unit boundary
*/
int
xfs_bmap_eof(
struct xfs_inode *ip,
xfs_fileoff_t endoff,
int whichfork,
int *eof);
/*
* Count fsblocks of the given fork.
*/
int
xfs_bmap_count_blocks(
xfs_trans_t *tp,
struct xfs_inode *ip,
int whichfork,
int *count);
int
xfs_bmap_punch_delalloc_range(
struct xfs_inode *ip,
xfs_fileoff_t start_fsb,
xfs_fileoff_t length);
int xfs_bmap_finish(struct xfs_trans **tp, struct xfs_bmap_free *flist,
int *committed);
int xfs_getbmap(struct xfs_inode *ip, struct getbmapx *bmv,
xfs_bmap_format_t formatter, void *arg);
int xfs_bmap_eof(struct xfs_inode *ip, xfs_fileoff_t endoff,
int whichfork, int *eof);
int xfs_bmap_count_blocks(struct xfs_trans *tp, struct xfs_inode *ip,
int whichfork, int *count);
int xfs_bmap_punch_delalloc_range(struct xfs_inode *ip,
xfs_fileoff_t start_fsb, xfs_fileoff_t length);
#endif /* __KERNEL__ */
#endif /* __XFS_BMAP_H__ */
+6 -5
View File
@@ -631,7 +631,7 @@ xfs_btree_read_bufl(
}
ASSERT(!xfs_buf_geterror(bp));
if (bp)
XFS_BUF_SET_VTYPE_REF(bp, B_FS_MAP, refval);
xfs_buf_set_ref(bp, refval);
*bpp = bp;
return 0;
}
@@ -939,13 +939,13 @@ xfs_btree_set_refs(
switch (cur->bc_btnum) {
case XFS_BTNUM_BNO:
case XFS_BTNUM_CNT:
XFS_BUF_SET_VTYPE_REF(bp, B_FS_MAP, XFS_ALLOC_BTREE_REF);
xfs_buf_set_ref(bp, XFS_ALLOC_BTREE_REF);
break;
case XFS_BTNUM_INO:
XFS_BUF_SET_VTYPE_REF(bp, B_FS_INOMAP, XFS_INO_BTREE_REF);
xfs_buf_set_ref(bp, XFS_INO_BTREE_REF);
break;
case XFS_BTNUM_BMAP:
XFS_BUF_SET_VTYPE_REF(bp, B_FS_MAP, XFS_BMAP_BTREE_REF);
xfs_buf_set_ref(bp, XFS_BMAP_BTREE_REF);
break;
default:
ASSERT(0);
@@ -970,7 +970,8 @@ xfs_btree_get_buf_block(
*bpp = xfs_trans_get_buf(cur->bc_tp, mp->m_ddev_targp, d,
mp->m_bsize, flags);
ASSERT(!xfs_buf_geterror(*bpp));
if (!*bpp)
return ENOMEM;
*block = XFS_BUF_TO_BLOCK(*bpp);
return 0;
+98 -146
View File
@@ -43,7 +43,6 @@
static kmem_zone_t *xfs_buf_zone;
STATIC int xfsbufd(void *);
STATIC void xfs_buf_delwri_queue(xfs_buf_t *, int);
static struct workqueue_struct *xfslogd_workqueue;
struct workqueue_struct *xfsdatad_workqueue;
@@ -66,10 +65,6 @@ struct workqueue_struct *xfsconvertd_workqueue;
#define xb_to_km(flags) \
(((flags) & XBF_DONT_BLOCK) ? KM_NOFS : KM_SLEEP)
#define xfs_buf_allocate(flags) \
kmem_zone_alloc(xfs_buf_zone, xb_to_km(flags))
#define xfs_buf_deallocate(bp) \
kmem_zone_free(xfs_buf_zone, (bp));
static inline int
xfs_buf_is_vmapped(
@@ -152,6 +147,7 @@ xfs_buf_stale(
struct xfs_buf *bp)
{
bp->b_flags |= XBF_STALE;
xfs_buf_delwri_dequeue(bp);
atomic_set(&(bp)->b_lru_ref, 0);
if (!list_empty(&bp->b_lru)) {
struct xfs_buftarg *btp = bp->b_target;
@@ -167,14 +163,19 @@ xfs_buf_stale(
ASSERT(atomic_read(&bp->b_hold) >= 1);
}
STATIC void
_xfs_buf_initialize(
xfs_buf_t *bp,
xfs_buftarg_t *target,
struct xfs_buf *
xfs_buf_alloc(
struct xfs_buftarg *target,
xfs_off_t range_base,
size_t range_length,
xfs_buf_flags_t flags)
{
struct xfs_buf *bp;
bp = kmem_zone_alloc(xfs_buf_zone, xb_to_km(flags));
if (unlikely(!bp))
return NULL;
/*
* We don't want certain flags to appear in b_flags.
*/
@@ -203,8 +204,9 @@ _xfs_buf_initialize(
init_waitqueue_head(&bp->b_waiters);
XFS_STATS_INC(xb_create);
trace_xfs_buf_init(bp, _RET_IP_);
return bp;
}
/*
@@ -277,7 +279,7 @@ xfs_buf_free(
} else if (bp->b_flags & _XBF_KMEM)
kmem_free(bp->b_addr);
_xfs_buf_free_pages(bp);
xfs_buf_deallocate(bp);
kmem_zone_free(xfs_buf_zone, bp);
}
/*
@@ -416,10 +418,7 @@ _xfs_buf_map_pages(
/*
* Look up, and creates if absent, a lockable buffer for
* a given range of an inode. The buffer is returned
* locked. If other overlapping buffers exist, they are
* released before the new buffer is created and locked,
* which may imply that this call will block until those buffers
* are unlocked. No I/O is implied by this call.
* locked. No I/O is implied by this call.
*/
xfs_buf_t *
_xfs_buf_find(
@@ -481,8 +480,6 @@ _xfs_buf_find(
/* No match found */
if (new_bp) {
_xfs_buf_initialize(new_bp, btp, range_base,
range_length, flags);
rb_link_node(&new_bp->b_rbnode, parent, rbp);
rb_insert_color(&new_bp->b_rbnode, &pag->pag_buf_tree);
/* the buffer keeps the perag reference until it is freed */
@@ -525,35 +522,51 @@ found:
}
/*
* Assembles a buffer covering the specified range.
* Storage in memory for all portions of the buffer will be allocated,
* although backing storage may not be.
* Assembles a buffer covering the specified range. The code is optimised for
* cache hits, as metadata intensive workloads will see 3 orders of magnitude
* more hits than misses.
*/
xfs_buf_t *
struct xfs_buf *
xfs_buf_get(
xfs_buftarg_t *target,/* target for buffer */
xfs_off_t ioff, /* starting offset of range */
size_t isize, /* length of range */
xfs_buf_flags_t flags)
{
xfs_buf_t *bp, *new_bp;
struct xfs_buf *bp;
struct xfs_buf *new_bp;
int error = 0;
new_bp = xfs_buf_allocate(flags);
bp = _xfs_buf_find(target, ioff, isize, flags, NULL);
if (likely(bp))
goto found;
new_bp = xfs_buf_alloc(target, ioff << BBSHIFT, isize << BBSHIFT,
flags);
if (unlikely(!new_bp))
return NULL;
bp = _xfs_buf_find(target, ioff, isize, flags, new_bp);
if (!bp) {
kmem_zone_free(xfs_buf_zone, new_bp);
return NULL;
}
if (bp == new_bp) {
error = xfs_buf_allocate_memory(bp, flags);
if (error)
goto no_buffer;
} else {
xfs_buf_deallocate(new_bp);
if (unlikely(bp == NULL))
return NULL;
}
} else
kmem_zone_free(xfs_buf_zone, new_bp);
/*
* Now we have a workable buffer, fill in the block number so
* that we can do IO on it.
*/
bp->b_bn = ioff;
bp->b_count_desired = bp->b_buffer_length;
found:
if (!(bp->b_flags & XBF_MAPPED)) {
error = _xfs_buf_map_pages(bp, flags);
if (unlikely(error)) {
@@ -564,18 +577,10 @@ xfs_buf_get(
}
XFS_STATS_INC(xb_get);
/*
* Always fill in the block number now, the mapped cases can do
* their own overlay of this later.
*/
bp->b_bn = ioff;
bp->b_count_desired = bp->b_buffer_length;
trace_xfs_buf_get(bp, flags, _RET_IP_);
return bp;
no_buffer:
no_buffer:
if (flags & (XBF_LOCK | XBF_TRYLOCK))
xfs_buf_unlock(bp);
xfs_buf_rele(bp);
@@ -689,19 +694,6 @@ xfs_buf_read_uncached(
return bp;
}
xfs_buf_t *
xfs_buf_get_empty(
size_t len,
xfs_buftarg_t *target)
{
xfs_buf_t *bp;
bp = xfs_buf_allocate(0);
if (bp)
_xfs_buf_initialize(bp, target, 0, len, 0);
return bp;
}
/*
* Return a buffer allocated as an empty buffer and associated to external
* memory via xfs_buf_associate_memory() back to it's empty state.
@@ -787,10 +779,9 @@ xfs_buf_get_uncached(
int error, i;
xfs_buf_t *bp;
bp = xfs_buf_allocate(0);
bp = xfs_buf_alloc(target, 0, len, 0);
if (unlikely(bp == NULL))
goto fail;
_xfs_buf_initialize(bp, target, 0, len, 0);
error = _xfs_buf_get_pages(bp, page_count, 0);
if (error)
@@ -818,7 +809,7 @@ xfs_buf_get_uncached(
__free_page(bp->b_pages[i]);
_xfs_buf_free_pages(bp);
fail_free_buf:
xfs_buf_deallocate(bp);
kmem_zone_free(xfs_buf_zone, bp);
fail:
return NULL;
}
@@ -937,12 +928,6 @@ void
xfs_buf_unlock(
struct xfs_buf *bp)
{
if ((bp->b_flags & (XBF_DELWRI|_XBF_DELWRI_Q)) == XBF_DELWRI) {
atomic_inc(&bp->b_hold);
bp->b_flags |= XBF_ASYNC;
xfs_buf_delwri_queue(bp, 0);
}
XB_CLEAR_OWNER(bp);
up(&bp->b_sema);
@@ -1019,9 +1004,19 @@ xfs_buf_ioerror(
trace_xfs_buf_ioerror(bp, error, _RET_IP_);
}
void
xfs_buf_ioerror_alert(
struct xfs_buf *bp,
const char *func)
{
xfs_alert(bp->b_target->bt_mount,
"metadata I/O error: block 0x%llx (\"%s\") error %d buf count %zd",
(__uint64_t)XFS_BUF_ADDR(bp), func,
bp->b_error, XFS_BUF_COUNT(bp));
}
int
xfs_bwrite(
struct xfs_mount *mp,
struct xfs_buf *bp)
{
int error;
@@ -1033,25 +1028,13 @@ xfs_bwrite(
xfs_bdstrat_cb(bp);
error = xfs_buf_iowait(bp);
if (error)
xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR);
xfs_buf_relse(bp);
if (error) {
xfs_force_shutdown(bp->b_target->bt_mount,
SHUTDOWN_META_IO_ERROR);
}
return error;
}
void
xfs_bdwrite(
void *mp,
struct xfs_buf *bp)
{
trace_xfs_buf_bdwrite(bp, _RET_IP_);
bp->b_flags &= ~XBF_READ;
bp->b_flags |= (XBF_DELWRI | XBF_ASYNC);
xfs_buf_delwri_queue(bp, 1);
}
/*
* Called when we want to stop a buffer from getting written or read.
* We attach the EIO error, muck with its flags, and call xfs_buf_ioend
@@ -1074,9 +1057,8 @@ xfs_bioerror(
* We're calling xfs_buf_ioend, so delete XBF_DONE flag.
*/
XFS_BUF_UNREAD(bp);
XFS_BUF_UNDELAYWRITE(bp);
XFS_BUF_UNDONE(bp);
XFS_BUF_STALE(bp);
xfs_buf_stale(bp);
xfs_buf_ioend(bp, 0);
@@ -1103,9 +1085,8 @@ xfs_bioerror_relse(
* change that interface.
*/
XFS_BUF_UNREAD(bp);
XFS_BUF_UNDELAYWRITE(bp);
XFS_BUF_DONE(bp);
XFS_BUF_STALE(bp);
xfs_buf_stale(bp);
bp->b_iodone = NULL;
if (!(fl & XBF_ASYNC)) {
/*
@@ -1115,7 +1096,7 @@ xfs_bioerror_relse(
* ASYNC buffers.
*/
xfs_buf_ioerror(bp, EIO);
XFS_BUF_FINISH_IOWAIT(bp);
complete(&bp->b_iowait);
} else {
xfs_buf_relse(bp);
}
@@ -1275,15 +1256,10 @@ xfs_buf_iorequest(
{
trace_xfs_buf_iorequest(bp, _RET_IP_);
if (bp->b_flags & XBF_DELWRI) {
xfs_buf_delwri_queue(bp, 1);
return 0;
}
ASSERT(!(bp->b_flags & XBF_DELWRI));
if (bp->b_flags & XBF_WRITE) {
if (bp->b_flags & XBF_WRITE)
xfs_buf_wait_unpin(bp);
}
xfs_buf_hold(bp);
/* Set the count to 1 initially, this will stop an I/O
@@ -1481,9 +1457,13 @@ xfs_setsize_buftarg_flags(
btp->bt_smask = sectorsize - 1;
if (set_blocksize(btp->bt_bdev, sectorsize)) {
char name[BDEVNAME_SIZE];
bdevname(btp->bt_bdev, name);
xfs_warn(btp->bt_mount,
"Cannot set_blocksize to %u on device %s\n",
sectorsize, xfs_buf_target_name(btp));
sectorsize, name);
return EINVAL;
}
@@ -1514,12 +1494,12 @@ xfs_setsize_buftarg(
}
STATIC int
xfs_alloc_delwrite_queue(
xfs_alloc_delwri_queue(
xfs_buftarg_t *btp,
const char *fsname)
{
INIT_LIST_HEAD(&btp->bt_delwrite_queue);
spin_lock_init(&btp->bt_delwrite_lock);
INIT_LIST_HEAD(&btp->bt_delwri_queue);
spin_lock_init(&btp->bt_delwri_lock);
btp->bt_flags = 0;
btp->bt_task = kthread_run(xfsbufd, btp, "xfsbufd/%s", fsname);
if (IS_ERR(btp->bt_task))
@@ -1549,7 +1529,7 @@ xfs_alloc_buftarg(
spin_lock_init(&btp->bt_lru_lock);
if (xfs_setsize_buftarg_early(btp, bdev))
goto error;
if (xfs_alloc_delwrite_queue(btp, fsname))
if (xfs_alloc_delwri_queue(btp, fsname))
goto error;
btp->bt_shrinker.shrink = xfs_buftarg_shrink;
btp->bt_shrinker.seeks = DEFAULT_SEEKS;
@@ -1565,56 +1545,48 @@ error:
/*
* Delayed write buffer handling
*/
STATIC void
void
xfs_buf_delwri_queue(
xfs_buf_t *bp,
int unlock)
xfs_buf_t *bp)
{
struct list_head *dwq = &bp->b_target->bt_delwrite_queue;
spinlock_t *dwlk = &bp->b_target->bt_delwrite_lock;
struct xfs_buftarg *btp = bp->b_target;
trace_xfs_buf_delwri_queue(bp, _RET_IP_);
ASSERT((bp->b_flags&(XBF_DELWRI|XBF_ASYNC)) == (XBF_DELWRI|XBF_ASYNC));
ASSERT(!(bp->b_flags & XBF_READ));
spin_lock(dwlk);
/* If already in the queue, dequeue and place at tail */
spin_lock(&btp->bt_delwri_lock);
if (!list_empty(&bp->b_list)) {
/* if already in the queue, move it to the tail */
ASSERT(bp->b_flags & _XBF_DELWRI_Q);
if (unlock)
atomic_dec(&bp->b_hold);
list_del(&bp->b_list);
}
if (list_empty(dwq)) {
list_move_tail(&bp->b_list, &btp->bt_delwri_queue);
} else {
/* start xfsbufd as it is about to have something to do */
wake_up_process(bp->b_target->bt_task);
if (list_empty(&btp->bt_delwri_queue))
wake_up_process(bp->b_target->bt_task);
atomic_inc(&bp->b_hold);
bp->b_flags |= XBF_DELWRI | _XBF_DELWRI_Q | XBF_ASYNC;
list_add_tail(&bp->b_list, &btp->bt_delwri_queue);
}
bp->b_flags |= _XBF_DELWRI_Q;
list_add_tail(&bp->b_list, dwq);
bp->b_queuetime = jiffies;
spin_unlock(dwlk);
if (unlock)
xfs_buf_unlock(bp);
spin_unlock(&btp->bt_delwri_lock);
}
void
xfs_buf_delwri_dequeue(
xfs_buf_t *bp)
{
spinlock_t *dwlk = &bp->b_target->bt_delwrite_lock;
int dequeued = 0;
spin_lock(dwlk);
spin_lock(&bp->b_target->bt_delwri_lock);
if ((bp->b_flags & XBF_DELWRI) && !list_empty(&bp->b_list)) {
ASSERT(bp->b_flags & _XBF_DELWRI_Q);
list_del_init(&bp->b_list);
dequeued = 1;
}
bp->b_flags &= ~(XBF_DELWRI|_XBF_DELWRI_Q);
spin_unlock(dwlk);
spin_unlock(&bp->b_target->bt_delwri_lock);
if (dequeued)
xfs_buf_rele(bp);
@@ -1646,16 +1618,9 @@ xfs_buf_delwri_promote(
if (bp->b_queuetime < jiffies - age)
return;
bp->b_queuetime = jiffies - age;
spin_lock(&btp->bt_delwrite_lock);
list_move(&bp->b_list, &btp->bt_delwrite_queue);
spin_unlock(&btp->bt_delwrite_lock);
}
STATIC void
xfs_buf_runall_queues(
struct workqueue_struct *queue)
{
flush_workqueue(queue);
spin_lock(&btp->bt_delwri_lock);
list_move(&bp->b_list, &btp->bt_delwri_queue);
spin_unlock(&btp->bt_delwri_lock);
}
/*
@@ -1669,15 +1634,13 @@ xfs_buf_delwri_split(
unsigned long age)
{
xfs_buf_t *bp, *n;
struct list_head *dwq = &target->bt_delwrite_queue;
spinlock_t *dwlk = &target->bt_delwrite_lock;
int skipped = 0;
int force;
force = test_and_clear_bit(XBT_FORCE_FLUSH, &target->bt_flags);
INIT_LIST_HEAD(list);
spin_lock(dwlk);
list_for_each_entry_safe(bp, n, dwq, b_list) {
spin_lock(&target->bt_delwri_lock);
list_for_each_entry_safe(bp, n, &target->bt_delwri_queue, b_list) {
ASSERT(bp->b_flags & XBF_DELWRI);
if (!xfs_buf_ispinned(bp) && xfs_buf_trylock(bp)) {
@@ -1694,10 +1657,9 @@ xfs_buf_delwri_split(
} else
skipped++;
}
spin_unlock(dwlk);
spin_unlock(&target->bt_delwri_lock);
return skipped;
}
/*
@@ -1747,7 +1709,7 @@ xfsbufd(
}
/* sleep for a long time if there is nothing to do. */
if (list_empty(&target->bt_delwrite_queue))
if (list_empty(&target->bt_delwri_queue))
tout = MAX_SCHEDULE_TIMEOUT;
schedule_timeout_interruptible(tout);
@@ -1783,9 +1745,7 @@ xfs_flush_buftarg(
LIST_HEAD(wait_list);
struct blk_plug plug;
xfs_buf_runall_queues(xfsconvertd_workqueue);
xfs_buf_runall_queues(xfsdatad_workqueue);
xfs_buf_runall_queues(xfslogd_workqueue);
flush_workqueue(xfslogd_workqueue);
set_bit(XBT_FORCE_FLUSH, &target->bt_flags);
pincount = xfs_buf_delwri_split(target, &tmp_list, 0);
@@ -1866,11 +1826,3 @@ xfs_buf_terminate(void)
destroy_workqueue(xfslogd_workqueue);
kmem_zone_destroy(xfs_buf_zone);
}
#ifdef CONFIG_KDB_MODULES
struct list_head *
xfs_get_buftarg_list(void)
{
return &xfs_buftarg_list;
}
#endif
+10 -39
View File
@@ -105,8 +105,8 @@ typedef struct xfs_buftarg {
/* per device delwri queue */
struct task_struct *bt_task;
struct list_head bt_delwrite_queue;
spinlock_t bt_delwrite_lock;
struct list_head bt_delwri_queue;
spinlock_t bt_delwri_lock;
unsigned long bt_flags;
/* LRU control structures */
@@ -175,7 +175,8 @@ extern xfs_buf_t *xfs_buf_get(xfs_buftarg_t *, xfs_off_t, size_t,
extern xfs_buf_t *xfs_buf_read(xfs_buftarg_t *, xfs_off_t, size_t,
xfs_buf_flags_t);
extern xfs_buf_t *xfs_buf_get_empty(size_t, xfs_buftarg_t *);
struct xfs_buf *xfs_buf_alloc(struct xfs_buftarg *, xfs_off_t, size_t,
xfs_buf_flags_t);
extern void xfs_buf_set_empty(struct xfs_buf *bp, size_t len);
extern xfs_buf_t *xfs_buf_get_uncached(struct xfs_buftarg *, size_t, int);
extern int xfs_buf_associate_memory(xfs_buf_t *, void *, size_t);
@@ -197,14 +198,14 @@ extern void xfs_buf_unlock(xfs_buf_t *);
((bp)->b_sema.count <= 0)
/* Buffer Read and Write Routines */
extern int xfs_bwrite(struct xfs_mount *mp, struct xfs_buf *bp);
extern void xfs_bdwrite(void *mp, xfs_buf_t *bp);
extern int xfs_bwrite(struct xfs_buf *bp);
extern void xfsbdstrat(struct xfs_mount *, struct xfs_buf *);
extern int xfs_bdstrat_cb(struct xfs_buf *);
extern void xfs_buf_ioend(xfs_buf_t *, int);
extern void xfs_buf_ioerror(xfs_buf_t *, int);
extern void xfs_buf_ioerror_alert(struct xfs_buf *, const char *func);
extern int xfs_buf_iorequest(xfs_buf_t *);
extern int xfs_buf_iowait(xfs_buf_t *);
extern void xfs_buf_iomove(xfs_buf_t *, size_t, size_t, void *,
@@ -221,38 +222,22 @@ static inline int xfs_buf_geterror(xfs_buf_t *bp)
extern xfs_caddr_t xfs_buf_offset(xfs_buf_t *, size_t);
/* Delayed Write Buffer Routines */
extern void xfs_buf_delwri_dequeue(xfs_buf_t *);
extern void xfs_buf_delwri_promote(xfs_buf_t *);
extern void xfs_buf_delwri_queue(struct xfs_buf *);
extern void xfs_buf_delwri_dequeue(struct xfs_buf *);
extern void xfs_buf_delwri_promote(struct xfs_buf *);
/* Buffer Daemon Setup Routines */
extern int xfs_buf_init(void);
extern void xfs_buf_terminate(void);
static inline const char *
xfs_buf_target_name(struct xfs_buftarg *target)
{
static char __b[BDEVNAME_SIZE];
return bdevname(target->bt_bdev, __b);
}
#define XFS_BUF_ZEROFLAGS(bp) \
((bp)->b_flags &= ~(XBF_READ|XBF_WRITE|XBF_ASYNC|XBF_DELWRI| \
XBF_SYNCIO|XBF_FUA|XBF_FLUSH))
void xfs_buf_stale(struct xfs_buf *bp);
#define XFS_BUF_STALE(bp) xfs_buf_stale(bp);
#define XFS_BUF_UNSTALE(bp) ((bp)->b_flags &= ~XBF_STALE)
#define XFS_BUF_ISSTALE(bp) ((bp)->b_flags & XBF_STALE)
#define XFS_BUF_SUPER_STALE(bp) do { \
XFS_BUF_STALE(bp); \
xfs_buf_delwri_dequeue(bp); \
XFS_BUF_DONE(bp); \
} while (0)
#define XFS_BUF_DELAYWRITE(bp) ((bp)->b_flags |= XBF_DELWRI)
#define XFS_BUF_UNDELAYWRITE(bp) xfs_buf_delwri_dequeue(bp)
#define XFS_BUF_ISDELAYWRITE(bp) ((bp)->b_flags & XBF_DELWRI)
#define XFS_BUF_DONE(bp) ((bp)->b_flags |= XBF_DONE)
@@ -280,23 +265,16 @@ void xfs_buf_stale(struct xfs_buf *bp);
#define XFS_BUF_SIZE(bp) ((bp)->b_buffer_length)
#define XFS_BUF_SET_SIZE(bp, cnt) ((bp)->b_buffer_length = (cnt))
static inline void
xfs_buf_set_ref(
struct xfs_buf *bp,
int lru_ref)
static inline void xfs_buf_set_ref(struct xfs_buf *bp, int lru_ref)
{
atomic_set(&bp->b_lru_ref, lru_ref);
}
#define XFS_BUF_SET_VTYPE_REF(bp, type, ref) xfs_buf_set_ref(bp, ref)
#define XFS_BUF_SET_VTYPE(bp, type) do { } while (0)
static inline int xfs_buf_ispinned(struct xfs_buf *bp)
{
return atomic_read(&bp->b_pin_count);
}
#define XFS_BUF_FINISH_IOWAIT(bp) complete(&bp->b_iowait);
static inline void xfs_buf_relse(xfs_buf_t *bp)
{
xfs_buf_unlock(bp);
@@ -313,14 +291,7 @@ extern void xfs_wait_buftarg(xfs_buftarg_t *);
extern int xfs_setsize_buftarg(xfs_buftarg_t *, unsigned int, unsigned int);
extern int xfs_flush_buftarg(xfs_buftarg_t *, int);
#ifdef CONFIG_KDB_MODULES
extern struct list_head *xfs_get_buftarg_list(void);
#endif
#define xfs_getsize_buftarg(buftarg) block_size((buftarg)->bt_bdev)
#define xfs_readonly_buftarg(buftarg) bdev_read_only((buftarg)->bt_bdev)
#define xfs_binval(buftarg) xfs_flush_buftarg(buftarg, 1)
#define XFS_bflush(buftarg) xfs_flush_buftarg(buftarg, 1)
#endif /* __XFS_BUF_H__ */
+5 -7
View File
@@ -967,7 +967,8 @@ xfs_buf_iodone_callbacks(
* I/O errors, there's no point in giving this a retry.
*/
if (XFS_FORCED_SHUTDOWN(mp)) {
XFS_BUF_SUPER_STALE(bp);
xfs_buf_stale(bp);
XFS_BUF_DONE(bp);
trace_xfs_buf_item_iodone(bp, _RET_IP_);
goto do_callbacks;
}
@@ -975,9 +976,7 @@ xfs_buf_iodone_callbacks(
if (bp->b_target != lasttarg ||
time_after(jiffies, (lasttime + 5*HZ))) {
lasttime = jiffies;
xfs_alert(mp, "Device %s: metadata write error block 0x%llx",
xfs_buf_target_name(bp->b_target),
(__uint64_t)XFS_BUF_ADDR(bp));
xfs_buf_ioerror_alert(bp, __func__);
}
lasttarg = bp->b_target;
@@ -993,7 +992,7 @@ xfs_buf_iodone_callbacks(
xfs_buf_ioerror(bp, 0); /* errno of 0 unsets the flag */
if (!XFS_BUF_ISSTALE(bp)) {
XFS_BUF_DELAYWRITE(bp);
xfs_buf_delwri_queue(bp);
XFS_BUF_DONE(bp);
}
ASSERT(bp->b_iodone != NULL);
@@ -1006,9 +1005,8 @@ xfs_buf_iodone_callbacks(
* If the write of the buffer was synchronous, we want to make
* sure to return the error to the caller of xfs_bwrite().
*/
XFS_BUF_STALE(bp);
xfs_buf_stale(bp);
XFS_BUF_DONE(bp);
XFS_BUF_UNDELAYWRITE(bp);
trace_xfs_buf_error_relse(bp, _RET_IP_);
+16 -38
View File
@@ -1578,9 +1578,8 @@ xfs_da_grow_inode_int(
*/
nmap = 1;
ASSERT(args->firstblock != NULL);
error = xfs_bmapi(tp, dp, *bno, count,
xfs_bmapi_aflag(w)|XFS_BMAPI_WRITE|XFS_BMAPI_METADATA|
XFS_BMAPI_CONTIG,
error = xfs_bmapi_write(tp, dp, *bno, count,
xfs_bmapi_aflag(w)|XFS_BMAPI_METADATA|XFS_BMAPI_CONTIG,
args->firstblock, args->total, &map, &nmap,
args->flist);
if (error)
@@ -1602,9 +1601,8 @@ xfs_da_grow_inode_int(
for (b = *bno, mapi = 0; b < *bno + count; ) {
nmap = MIN(XFS_BMAP_MAX_NMAP, count);
c = (int)(*bno + count - b);
error = xfs_bmapi(tp, dp, b, c,
xfs_bmapi_aflag(w)|XFS_BMAPI_WRITE|
XFS_BMAPI_METADATA,
error = xfs_bmapi_write(tp, dp, b, c,
xfs_bmapi_aflag(w)|XFS_BMAPI_METADATA,
args->firstblock, args->total,
&mapp[mapi], &nmap, args->flist);
if (error)
@@ -1975,33 +1973,16 @@ xfs_da_do_buf(
/*
* Optimize the one-block case.
*/
if (nfsb == 1) {
xfs_fsblock_t fsb;
if ((error =
xfs_bmapi_single(trans, dp, whichfork, &fsb,
(xfs_fileoff_t)bno))) {
return error;
}
if (nfsb == 1)
mapp = &map;
if (fsb == NULLFSBLOCK) {
nmap = 0;
} else {
map.br_startblock = fsb;
map.br_startoff = (xfs_fileoff_t)bno;
map.br_blockcount = 1;
nmap = 1;
}
} else {
else
mapp = kmem_alloc(sizeof(*mapp) * nfsb, KM_SLEEP);
nmap = nfsb;
if ((error = xfs_bmapi(trans, dp, (xfs_fileoff_t)bno,
nfsb,
XFS_BMAPI_METADATA |
xfs_bmapi_aflag(whichfork),
NULL, 0, mapp, &nmap, NULL)))
goto exit0;
}
nmap = nfsb;
error = xfs_bmapi_read(dp, (xfs_fileoff_t)bno, nfsb, mapp,
&nmap, xfs_bmapi_aflag(whichfork));
if (error)
goto exit0;
} else {
map.br_startblock = XFS_DADDR_TO_FSB(mp, mappedbno);
map.br_startoff = (xfs_fileoff_t)bno;
@@ -2072,13 +2053,10 @@ xfs_da_do_buf(
if (!bp)
continue;
if (caller == 1) {
if (whichfork == XFS_ATTR_FORK) {
XFS_BUF_SET_VTYPE_REF(bp, B_FS_ATTR_BTREE,
XFS_ATTR_BTREE_REF);
} else {
XFS_BUF_SET_VTYPE_REF(bp, B_FS_DIR_BTREE,
XFS_DIR_BTREE_REF);
}
if (whichfork == XFS_ATTR_FORK)
xfs_buf_set_ref(bp, XFS_ATTR_BTREE_REF);
else
xfs_buf_set_ref(bp, XFS_DIR_BTREE_REF);
}
if (bplist) {
bplist[nbplist++] = bp;
+3 -3
View File
@@ -425,8 +425,8 @@ xfs_swap_extents(
}
xfs_trans_ijoin_ref(tp, ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
xfs_trans_ijoin_ref(tp, tip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
xfs_trans_ijoin(tp, tip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
xfs_trans_log_inode(tp, ip, ilf_fields);
xfs_trans_log_inode(tp, tip, tilf_fields);
@@ -438,7 +438,7 @@ xfs_swap_extents(
if (mp->m_flags & XFS_MOUNT_WSYNC)
xfs_trans_set_sync(tp);
error = xfs_trans_commit(tp, XFS_TRANS_SWAPEXT);
error = xfs_trans_commit(tp, 0);
trace_xfs_swap_extent_after(ip, 0);
trace_xfs_swap_extent_after(tip, 1);
+2 -4
View File
@@ -888,12 +888,10 @@ xfs_dir2_leaf_getdents(
* we already have in the table.
*/
nmap = map_size - map_valid;
error = xfs_bmapi(NULL, dp,
map_off,
error = xfs_bmapi_read(dp, map_off,
xfs_dir2_byte_to_da(mp,
XFS_DIR2_LEAF_OFFSET) - map_off,
XFS_BMAPI_METADATA, NULL, 0,
&map[map_valid], &nmap, NULL);
&map[map_valid], &nmap, 0);
/*
* Don't know if we should ignore this or
* try to return an error.
+10 -10
View File
@@ -38,7 +38,7 @@ xfs_trim_extents(
struct xfs_mount *mp,
xfs_agnumber_t agno,
xfs_fsblock_t start,
xfs_fsblock_t len,
xfs_fsblock_t end,
xfs_fsblock_t minlen,
__uint64_t *blocks_trimmed)
{
@@ -100,7 +100,7 @@ xfs_trim_extents(
* down partially overlapping ranges for now.
*/
if (XFS_AGB_TO_FSB(mp, agno, fbno) + flen < start ||
XFS_AGB_TO_FSB(mp, agno, fbno) >= start + len) {
XFS_AGB_TO_FSB(mp, agno, fbno) > end) {
trace_xfs_discard_exclude(mp, agno, fbno, flen);
goto next_extent;
}
@@ -145,7 +145,7 @@ xfs_ioc_trim(
struct request_queue *q = mp->m_ddev_targp->bt_bdev->bd_disk->queue;
unsigned int granularity = q->limits.discard_granularity;
struct fstrim_range range;
xfs_fsblock_t start, len, minlen;
xfs_fsblock_t start, end, minlen;
xfs_agnumber_t start_agno, end_agno, agno;
__uint64_t blocks_trimmed = 0;
int error, last_error = 0;
@@ -165,19 +165,19 @@ xfs_ioc_trim(
* matter as trimming blocks is an advisory interface.
*/
start = XFS_B_TO_FSBT(mp, range.start);
len = XFS_B_TO_FSBT(mp, range.len);
end = start + XFS_B_TO_FSBT(mp, range.len) - 1;
minlen = XFS_B_TO_FSB(mp, max_t(u64, granularity, range.minlen));
start_agno = XFS_FSB_TO_AGNO(mp, start);
if (start_agno >= mp->m_sb.sb_agcount)
if (start >= mp->m_sb.sb_dblocks)
return -XFS_ERROR(EINVAL);
if (end > mp->m_sb.sb_dblocks - 1)
end = mp->m_sb.sb_dblocks - 1;
end_agno = XFS_FSB_TO_AGNO(mp, start + len);
if (end_agno >= mp->m_sb.sb_agcount)
end_agno = mp->m_sb.sb_agcount - 1;
start_agno = XFS_FSB_TO_AGNO(mp, start);
end_agno = XFS_FSB_TO_AGNO(mp, end);
for (agno = start_agno; agno <= end_agno; agno++) {
error = -xfs_trim_extents(mp, agno, start, len, minlen,
error = -xfs_trim_extents(mp, agno, start, end, minlen,
&blocks_trimmed);
if (error)
last_error = error;
+17 -15
View File
@@ -377,16 +377,14 @@ xfs_qm_dqalloc(
return (ESRCH);
}
xfs_trans_ijoin_ref(tp, quotip, XFS_ILOCK_EXCL);
xfs_trans_ijoin(tp, quotip, XFS_ILOCK_EXCL);
nmaps = 1;
if ((error = xfs_bmapi(tp, quotip,
offset_fsb, XFS_DQUOT_CLUSTER_SIZE_FSB,
XFS_BMAPI_METADATA | XFS_BMAPI_WRITE,
&firstblock,
XFS_QM_DQALLOC_SPACE_RES(mp),
&map, &nmaps, &flist))) {
error = xfs_bmapi_write(tp, quotip, offset_fsb,
XFS_DQUOT_CLUSTER_SIZE_FSB, XFS_BMAPI_METADATA,
&firstblock, XFS_QM_DQALLOC_SPACE_RES(mp),
&map, &nmaps, &flist);
if (error)
goto error0;
}
ASSERT(map.br_blockcount == XFS_DQUOT_CLUSTER_SIZE_FSB);
ASSERT(nmaps == 1);
ASSERT((map.br_startblock != DELAYSTARTBLOCK) &&
@@ -402,8 +400,11 @@ xfs_qm_dqalloc(
dqp->q_blkno,
mp->m_quotainfo->qi_dqchunklen,
0);
if (!bp || (error = xfs_buf_geterror(bp)))
error = xfs_buf_geterror(bp);
if (error)
goto error1;
/*
* Make a chunk of dquots out of this buffer and log
* the entire thing.
@@ -485,9 +486,8 @@ xfs_qm_dqtobp(
/*
* Find the block map; no allocations yet
*/
error = xfs_bmapi(NULL, quotip, dqp->q_fileoffset,
XFS_DQUOT_CLUSTER_SIZE_FSB, XFS_BMAPI_METADATA,
NULL, 0, &map, &nmaps, NULL);
error = xfs_bmapi_read(quotip, dqp->q_fileoffset,
XFS_DQUOT_CLUSTER_SIZE_FSB, &map, &nmaps, 0);
xfs_iunlock(quotip, XFS_ILOCK_SHARED);
if (error)
@@ -605,7 +605,7 @@ xfs_qm_dqread(
dqp->q_res_rtbcount = be64_to_cpu(ddqp->d_rtbcount);
/* Mark the buf so that this will stay incore a little longer */
XFS_BUF_SET_VTYPE_REF(bp, B_FS_DQUOT, XFS_DQUOT_REF);
xfs_buf_set_ref(bp, XFS_DQUOT_REF);
/*
* We got the buffer with a xfs_trans_read_buf() (in dqtobp())
@@ -1242,9 +1242,11 @@ xfs_qm_dqflush(
}
if (flags & SYNC_WAIT)
error = xfs_bwrite(mp, bp);
error = xfs_bwrite(bp);
else
xfs_bdwrite(mp, bp);
xfs_buf_delwri_queue(bp);
xfs_buf_relse(bp);
trace_xfs_dqflush_done(dqp);
+6 -6
View File
@@ -229,16 +229,16 @@ xfs_fs_nfs_commit_metadata(
{
struct xfs_inode *ip = XFS_I(inode);
struct xfs_mount *mp = ip->i_mount;
int error = 0;
xfs_lsn_t lsn = 0;
xfs_ilock(ip, XFS_ILOCK_SHARED);
if (xfs_ipincount(ip)) {
error = _xfs_log_force_lsn(mp, ip->i_itemp->ili_last_lsn,
XFS_LOG_SYNC, NULL);
}
if (xfs_ipincount(ip))
lsn = ip->i_itemp->ili_last_lsn;
xfs_iunlock(ip, XFS_ILOCK_SHARED);
return error;
if (!lsn)
return 0;
return _xfs_log_force_lsn(mp, lsn, XFS_LOG_SYNC, NULL);
}
const struct export_operations xfs_export_operations = {
+125 -43
View File
@@ -124,6 +124,35 @@ xfs_iozero(
return (-status);
}
/*
* Fsync operations on directories are much simpler than on regular files,
* as there is no file data to flush, and thus also no need for explicit
* cache flush operations, and there are no non-transaction metadata updates
* on directories either.
*/
STATIC int
xfs_dir_fsync(
struct file *file,
loff_t start,
loff_t end,
int datasync)
{
struct xfs_inode *ip = XFS_I(file->f_mapping->host);
struct xfs_mount *mp = ip->i_mount;
xfs_lsn_t lsn = 0;
trace_xfs_dir_fsync(ip);
xfs_ilock(ip, XFS_ILOCK_SHARED);
if (xfs_ipincount(ip))
lsn = ip->i_itemp->ili_last_lsn;
xfs_iunlock(ip, XFS_ILOCK_SHARED);
if (!lsn)
return 0;
return _xfs_log_force_lsn(mp, lsn, XFS_LOG_SYNC, NULL);
}
STATIC int
xfs_file_fsync(
struct file *file,
@@ -137,6 +166,7 @@ xfs_file_fsync(
struct xfs_trans *tp;
int error = 0;
int log_flushed = 0;
xfs_lsn_t lsn = 0;
trace_xfs_file_fsync(ip);
@@ -149,10 +179,6 @@ xfs_file_fsync(
xfs_iflags_clear(ip, XFS_ITRUNCATED);
xfs_ilock(ip, XFS_IOLOCK_SHARED);
xfs_ioend_wait(ip);
xfs_iunlock(ip, XFS_IOLOCK_SHARED);
if (mp->m_flags & XFS_MOUNT_BARRIER) {
/*
* If we have an RT and/or log subvolume we need to make sure
@@ -216,11 +242,11 @@ xfs_file_fsync(
* transaction. So we play it safe and fire off the
* transaction anyway.
*/
xfs_trans_ijoin(tp, ip);
xfs_trans_ijoin(tp, ip, 0);
xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
xfs_trans_set_sync(tp);
error = _xfs_trans_commit(tp, 0, &log_flushed);
error = xfs_trans_commit(tp, 0);
lsn = ip->i_itemp->ili_last_lsn;
xfs_iunlock(ip, XFS_ILOCK_EXCL);
} else {
/*
@@ -231,14 +257,14 @@ xfs_file_fsync(
* disk yet, the inode will be still be pinned. If it is,
* force the log.
*/
if (xfs_ipincount(ip)) {
error = _xfs_log_force_lsn(mp,
ip->i_itemp->ili_last_lsn,
XFS_LOG_SYNC, &log_flushed);
}
if (xfs_ipincount(ip))
lsn = ip->i_itemp->ili_last_lsn;
xfs_iunlock(ip, XFS_ILOCK_SHARED);
}
if (!error && lsn)
error = _xfs_log_force_lsn(mp, lsn, XFS_LOG_SYNC, &log_flushed);
/*
* If we only have a single device, and the log force about was
* a no-op we might have to flush the data device cache here.
@@ -317,7 +343,19 @@ xfs_file_aio_read(
if (XFS_FORCED_SHUTDOWN(mp))
return -EIO;
if (unlikely(ioflags & IO_ISDIRECT)) {
/*
* Locking is a bit tricky here. If we take an exclusive lock
* for direct IO, we effectively serialise all new concurrent
* read IO to this file and block it behind IO that is currently in
* progress because IO in progress holds the IO lock shared. We only
* need to hold the lock exclusive to blow away the page cache, so
* only take lock exclusively if the page cache needs invalidation.
* This allows the normal direct IO case of no page cache pages to
* proceeed concurrently without serialisation.
*/
xfs_rw_ilock(ip, XFS_IOLOCK_SHARED);
if ((ioflags & IO_ISDIRECT) && inode->i_mapping->nrpages) {
xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED);
xfs_rw_ilock(ip, XFS_IOLOCK_EXCL);
if (inode->i_mapping->nrpages) {
@@ -330,8 +368,7 @@ xfs_file_aio_read(
}
}
xfs_rw_ilock_demote(ip, XFS_IOLOCK_EXCL);
} else
xfs_rw_ilock(ip, XFS_IOLOCK_SHARED);
}
trace_xfs_file_read(ip, size, iocb->ki_pos, ioflags);
@@ -407,11 +444,13 @@ xfs_aio_write_isize_update(
*/
STATIC void
xfs_aio_write_newsize_update(
struct xfs_inode *ip)
struct xfs_inode *ip,
xfs_fsize_t new_size)
{
if (ip->i_new_size) {
if (new_size == ip->i_new_size) {
xfs_rw_ilock(ip, XFS_ILOCK_EXCL);
ip->i_new_size = 0;
if (new_size == ip->i_new_size)
ip->i_new_size = 0;
if (ip->i_d.di_size > ip->i_size)
ip->i_d.di_size = ip->i_size;
xfs_rw_iunlock(ip, XFS_ILOCK_EXCL);
@@ -462,7 +501,7 @@ xfs_file_splice_write(
ret = generic_file_splice_write(pipe, outfilp, ppos, count, flags);
xfs_aio_write_isize_update(inode, ppos, ret);
xfs_aio_write_newsize_update(ip);
xfs_aio_write_newsize_update(ip, new_size);
xfs_iunlock(ip, XFS_IOLOCK_EXCL);
return ret;
}
@@ -500,11 +539,9 @@ xfs_zero_last_block(
last_fsb = XFS_B_TO_FSBT(mp, isize);
nimaps = 1;
error = xfs_bmapi(NULL, ip, last_fsb, 1, 0, NULL, 0, &imap,
&nimaps, NULL);
if (error) {
error = xfs_bmapi_read(ip, last_fsb, 1, &imap, &nimaps, 0);
if (error)
return error;
}
ASSERT(nimaps > 0);
/*
* If the block underlying isize is just a hole, then there
@@ -595,8 +632,8 @@ xfs_zero_eof(
while (start_zero_fsb <= end_zero_fsb) {
nimaps = 1;
zero_count_fsb = end_zero_fsb - start_zero_fsb + 1;
error = xfs_bmapi(NULL, ip, start_zero_fsb, zero_count_fsb,
0, NULL, 0, &imap, &nimaps, NULL);
error = xfs_bmapi_read(ip, start_zero_fsb, zero_count_fsb,
&imap, &nimaps, 0);
if (error) {
ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL));
return error;
@@ -659,6 +696,7 @@ xfs_file_aio_write_checks(
struct file *file,
loff_t *pos,
size_t *count,
xfs_fsize_t *new_sizep,
int *iolock)
{
struct inode *inode = file->f_mapping->host;
@@ -666,6 +704,9 @@ xfs_file_aio_write_checks(
xfs_fsize_t new_size;
int error = 0;
xfs_rw_ilock(ip, XFS_ILOCK_EXCL);
*new_sizep = 0;
restart:
error = generic_write_checks(file, pos, count, S_ISBLK(inode->i_mode));
if (error) {
xfs_rw_iunlock(ip, XFS_ILOCK_EXCL | *iolock);
@@ -673,20 +714,41 @@ xfs_file_aio_write_checks(
return error;
}
new_size = *pos + *count;
if (new_size > ip->i_size)
ip->i_new_size = new_size;
if (likely(!(file->f_mode & FMODE_NOCMTIME)))
file_update_time(file);
/*
* If the offset is beyond the size of the file, we need to zero any
* blocks that fall between the existing EOF and the start of this
* write.
* write. There is no need to issue zeroing if another in-flght IO ends
* at or before this one If zeronig is needed and we are currently
* holding the iolock shared, we need to update it to exclusive which
* involves dropping all locks and relocking to maintain correct locking
* order. If we do this, restart the function to ensure all checks and
* values are still valid.
*/
if (*pos > ip->i_size)
if ((ip->i_new_size && *pos > ip->i_new_size) ||
(!ip->i_new_size && *pos > ip->i_size)) {
if (*iolock == XFS_IOLOCK_SHARED) {
xfs_rw_iunlock(ip, XFS_ILOCK_EXCL | *iolock);
*iolock = XFS_IOLOCK_EXCL;
xfs_rw_ilock(ip, XFS_ILOCK_EXCL | *iolock);
goto restart;
}
error = -xfs_zero_eof(ip, *pos, ip->i_size);
}
/*
* If this IO extends beyond EOF, we may need to update ip->i_new_size.
* We have already zeroed space beyond EOF (if necessary). Only update
* ip->i_new_size if this IO ends beyond any other in-flight writes.
*/
new_size = *pos + *count;
if (new_size > ip->i_size) {
if (new_size > ip->i_new_size)
ip->i_new_size = new_size;
*new_sizep = new_size;
}
xfs_rw_iunlock(ip, XFS_ILOCK_EXCL);
if (error)
@@ -721,7 +783,7 @@ xfs_file_aio_write_checks(
* the dio layer. To avoid the problem with aio, we also need to wait for
* outstanding IOs to complete so that unwritten extent conversion is completed
* before we try to map the overlapping block. This is currently implemented by
* hitting it with a big hammer (i.e. xfs_ioend_wait()).
* hitting it with a big hammer (i.e. inode_dio_wait()).
*
* Returns with locks held indicated by @iolock and errors indicated by
* negative return values.
@@ -733,6 +795,7 @@ xfs_file_dio_aio_write(
unsigned long nr_segs,
loff_t pos,
size_t ocount,
xfs_fsize_t *new_size,
int *iolock)
{
struct file *file = iocb->ki_filp;
@@ -753,18 +816,35 @@ xfs_file_dio_aio_write(
if ((pos & mp->m_blockmask) || ((pos + count) & mp->m_blockmask))
unaligned_io = 1;
if (unaligned_io || mapping->nrpages || pos > ip->i_size)
/*
* We don't need to take an exclusive lock unless there page cache needs
* to be invalidated or unaligned IO is being executed. We don't need to
* consider the EOF extension case here because
* xfs_file_aio_write_checks() will relock the inode as necessary for
* EOF zeroing cases and fill out the new inode size as appropriate.
*/
if (unaligned_io || mapping->nrpages)
*iolock = XFS_IOLOCK_EXCL;
else
*iolock = XFS_IOLOCK_SHARED;
xfs_rw_ilock(ip, XFS_ILOCK_EXCL | *iolock);
xfs_rw_ilock(ip, *iolock);
ret = xfs_file_aio_write_checks(file, &pos, &count, iolock);
/*
* Recheck if there are cached pages that need invalidate after we got
* the iolock to protect against other threads adding new pages while
* we were waiting for the iolock.
*/
if (mapping->nrpages && *iolock == XFS_IOLOCK_SHARED) {
xfs_rw_iunlock(ip, *iolock);
*iolock = XFS_IOLOCK_EXCL;
xfs_rw_ilock(ip, *iolock);
}
ret = xfs_file_aio_write_checks(file, &pos, &count, new_size, iolock);
if (ret)
return ret;
if (mapping->nrpages) {
WARN_ON(*iolock != XFS_IOLOCK_EXCL);
ret = -xfs_flushinval_pages(ip, (pos & PAGE_CACHE_MASK), -1,
FI_REMAPF_LOCKED);
if (ret)
@@ -776,7 +856,7 @@ xfs_file_dio_aio_write(
* otherwise demote the lock if we had to flush cached pages
*/
if (unaligned_io)
xfs_ioend_wait(ip);
inode_dio_wait(inode);
else if (*iolock == XFS_IOLOCK_EXCL) {
xfs_rw_ilock_demote(ip, XFS_IOLOCK_EXCL);
*iolock = XFS_IOLOCK_SHARED;
@@ -798,6 +878,7 @@ xfs_file_buffered_aio_write(
unsigned long nr_segs,
loff_t pos,
size_t ocount,
xfs_fsize_t *new_size,
int *iolock)
{
struct file *file = iocb->ki_filp;
@@ -809,9 +890,9 @@ xfs_file_buffered_aio_write(
size_t count = ocount;
*iolock = XFS_IOLOCK_EXCL;
xfs_rw_ilock(ip, XFS_ILOCK_EXCL | *iolock);
xfs_rw_ilock(ip, *iolock);
ret = xfs_file_aio_write_checks(file, &pos, &count, iolock);
ret = xfs_file_aio_write_checks(file, &pos, &count, new_size, iolock);
if (ret)
return ret;
@@ -851,6 +932,7 @@ xfs_file_aio_write(
ssize_t ret;
int iolock;
size_t ocount = 0;
xfs_fsize_t new_size = 0;
XFS_STATS_INC(xs_write_calls);
@@ -870,10 +952,10 @@ xfs_file_aio_write(
if (unlikely(file->f_flags & O_DIRECT))
ret = xfs_file_dio_aio_write(iocb, iovp, nr_segs, pos,
ocount, &iolock);
ocount, &new_size, &iolock);
else
ret = xfs_file_buffered_aio_write(iocb, iovp, nr_segs, pos,
ocount, &iolock);
ocount, &new_size, &iolock);
xfs_aio_write_isize_update(inode, &iocb->ki_pos, ret);
@@ -894,7 +976,7 @@ xfs_file_aio_write(
}
out_unlock:
xfs_aio_write_newsize_update(ip);
xfs_aio_write_newsize_update(ip, new_size);
xfs_rw_iunlock(ip, iolock);
return ret;
}
@@ -1087,7 +1169,7 @@ const struct file_operations xfs_dir_file_operations = {
#ifdef CONFIG_COMPAT
.compat_ioctl = xfs_file_compat_ioctl,
#endif
.fsync = xfs_file_fsync,
.fsync = xfs_dir_fsync,
};
static const struct vm_operations_struct xfs_file_vm_ops = {
+2 -2
View File
@@ -682,7 +682,7 @@ xfs_filestream_new_ag(
ip = ap->ip;
mp = ip->i_mount;
cache = mp->m_filestream;
minlen = ap->alen;
minlen = ap->length;
*agp = NULLAGNUMBER;
/*
@@ -761,7 +761,7 @@ xfs_filestream_new_ag(
*/
ag = (ag == NULLAGNUMBER) ? 0 : (ag + 1) % mp->m_sb.sb_agcount;
flags = (ap->userdata ? XFS_PICK_USERDATA : 0) |
(ap->low ? XFS_PICK_LOWSPACE : 0);
(ap->flist->xbf_low ? XFS_PICK_LOWSPACE : 0);
err = _xfs_filestream_pick_ag(mp, ag, agp, flags, minlen);
if (err || *agp == NULLAGNUMBER)
+42 -18
View File
@@ -194,6 +194,10 @@ xfs_growfs_data_private(
bp = xfs_buf_get(mp->m_ddev_targp,
XFS_AG_DADDR(mp, agno, XFS_AGF_DADDR(mp)),
XFS_FSS_TO_BB(mp, 1), XBF_LOCK | XBF_MAPPED);
if (!bp) {
error = ENOMEM;
goto error0;
}
agf = XFS_BUF_TO_AGF(bp);
memset(agf, 0, mp->m_sb.sb_sectsize);
agf->agf_magicnum = cpu_to_be32(XFS_AGF_MAGIC);
@@ -216,16 +220,21 @@ xfs_growfs_data_private(
tmpsize = agsize - XFS_PREALLOC_BLOCKS(mp);
agf->agf_freeblks = cpu_to_be32(tmpsize);
agf->agf_longest = cpu_to_be32(tmpsize);
error = xfs_bwrite(mp, bp);
if (error) {
error = xfs_bwrite(bp);
xfs_buf_relse(bp);
if (error)
goto error0;
}
/*
* AG inode header block
*/
bp = xfs_buf_get(mp->m_ddev_targp,
XFS_AG_DADDR(mp, agno, XFS_AGI_DADDR(mp)),
XFS_FSS_TO_BB(mp, 1), XBF_LOCK | XBF_MAPPED);
if (!bp) {
error = ENOMEM;
goto error0;
}
agi = XFS_BUF_TO_AGI(bp);
memset(agi, 0, mp->m_sb.sb_sectsize);
agi->agi_magicnum = cpu_to_be32(XFS_AGI_MAGIC);
@@ -240,10 +249,11 @@ xfs_growfs_data_private(
agi->agi_dirino = cpu_to_be32(NULLAGINO);
for (bucket = 0; bucket < XFS_AGI_UNLINKED_BUCKETS; bucket++)
agi->agi_unlinked[bucket] = cpu_to_be32(NULLAGINO);
error = xfs_bwrite(mp, bp);
if (error) {
error = xfs_bwrite(bp);
xfs_buf_relse(bp);
if (error)
goto error0;
}
/*
* BNO btree root block
*/
@@ -251,6 +261,10 @@ xfs_growfs_data_private(
XFS_AGB_TO_DADDR(mp, agno, XFS_BNO_BLOCK(mp)),
BTOBB(mp->m_sb.sb_blocksize),
XBF_LOCK | XBF_MAPPED);
if (!bp) {
error = ENOMEM;
goto error0;
}
block = XFS_BUF_TO_BLOCK(bp);
memset(block, 0, mp->m_sb.sb_blocksize);
block->bb_magic = cpu_to_be32(XFS_ABTB_MAGIC);
@@ -262,10 +276,11 @@ xfs_growfs_data_private(
arec->ar_startblock = cpu_to_be32(XFS_PREALLOC_BLOCKS(mp));
arec->ar_blockcount = cpu_to_be32(
agsize - be32_to_cpu(arec->ar_startblock));
error = xfs_bwrite(mp, bp);
if (error) {
error = xfs_bwrite(bp);
xfs_buf_relse(bp);
if (error)
goto error0;
}
/*
* CNT btree root block
*/
@@ -273,6 +288,10 @@ xfs_growfs_data_private(
XFS_AGB_TO_DADDR(mp, agno, XFS_CNT_BLOCK(mp)),
BTOBB(mp->m_sb.sb_blocksize),
XBF_LOCK | XBF_MAPPED);
if (!bp) {
error = ENOMEM;
goto error0;
}
block = XFS_BUF_TO_BLOCK(bp);
memset(block, 0, mp->m_sb.sb_blocksize);
block->bb_magic = cpu_to_be32(XFS_ABTC_MAGIC);
@@ -285,10 +304,11 @@ xfs_growfs_data_private(
arec->ar_blockcount = cpu_to_be32(
agsize - be32_to_cpu(arec->ar_startblock));
nfree += be32_to_cpu(arec->ar_blockcount);
error = xfs_bwrite(mp, bp);
if (error) {
error = xfs_bwrite(bp);
xfs_buf_relse(bp);
if (error)
goto error0;
}
/*
* INO btree root block
*/
@@ -296,6 +316,10 @@ xfs_growfs_data_private(
XFS_AGB_TO_DADDR(mp, agno, XFS_IBT_BLOCK(mp)),
BTOBB(mp->m_sb.sb_blocksize),
XBF_LOCK | XBF_MAPPED);
if (!bp) {
error = ENOMEM;
goto error0;
}
block = XFS_BUF_TO_BLOCK(bp);
memset(block, 0, mp->m_sb.sb_blocksize);
block->bb_magic = cpu_to_be32(XFS_IBT_MAGIC);
@@ -303,10 +327,10 @@ xfs_growfs_data_private(
block->bb_numrecs = 0;
block->bb_u.s.bb_leftsib = cpu_to_be32(NULLAGBLOCK);
block->bb_u.s.bb_rightsib = cpu_to_be32(NULLAGBLOCK);
error = xfs_bwrite(mp, bp);
if (error) {
error = xfs_bwrite(bp);
xfs_buf_relse(bp);
if (error)
goto error0;
}
}
xfs_trans_agblocks_delta(tp, nfree);
/*
@@ -396,9 +420,9 @@ xfs_growfs_data_private(
* just issue a warning and continue. The real work is
* already done and committed.
*/
if (!(error = xfs_bwrite(mp, bp))) {
continue;
} else {
error = xfs_bwrite(bp);
xfs_buf_relse(bp);
if (error) {
xfs_warn(mp,
"write error %d updating secondary superblock for ag %d",
error, agno);

Some files were not shown because too many files have changed in this diff Show More