Merge tag 'for-linus-v3.11-rc1' of git://oss.sgi.com/xfs/xfs

Pull xfs update from Ben Myers:
 "This includes several bugfixes, part of the work for project quotas
  and group quotas to be used together, performance improvements for
  inode creation/deletion, buffer readahead, and bulkstat,
  implementation of the inode change count, an inode create transaction,
  and the removal of a bunch of dead code.

  There are also some duplicate commits that you already have from the
  3.10-rc series.

   - part of the work to allow project quotas and group quotas to be
     used together
   - inode change count
   - inode create transaction
   - block queue plugging in buffer readahead and bulkstat
   - ordered log vector support
   - removal of dead code in and around xfs_sync_inode_grab,
     xfs_ialloc_get_rec, XFS_MOUNT_RETERR, XFS_ALLOCFREE_LOG_RES,
     XFS_DIROP_LOG_RES, xfs_chash, ctl_table, and
     xfs_growfs_data_private
   - don't keep silent if sunit/swidth can not be changed via mount
   - fix a leak of remote symlink blocks into the filesystem when xattrs
     are used on symlinks
   - fix for fiemap to return FIEMAP_EXTENT_UNKOWN flag on delay extents
   - part of a fix for xfs_fsr
   - disable speculative preallocation with small files
   - performance improvements for inode creates and deletes"

* tag 'for-linus-v3.11-rc1' of git://oss.sgi.com/xfs/xfs: (61 commits)
  xfs: Remove incore use of XFS_OQUOTA_ENFD and XFS_OQUOTA_CHKD
  xfs: Change xfs_dquot_acct to be a 2-dimensional array
  xfs: Code cleanup and removal of some typedef usage
  xfs: Replace macro XFS_DQ_TO_QIP with a function
  xfs: Replace macro XFS_DQUOT_TREE with a function
  xfs: Define a new function xfs_is_quota_inode()
  xfs: implement inode change count
  xfs: Use inode create transaction
  xfs: Inode create item recovery
  xfs: Inode create transaction reservations
  xfs: Inode create log items
  xfs: Introduce an ordered buffer item
  xfs: Introduce ordered log vector support
  xfs: xfs_ifree doesn't need to modify the inode buffer
  xfs: don't do IO when creating an new inode
  xfs: don't use speculative prealloc for small files
  xfs: plug directory buffer readahead
  xfs: add pluging for bulkstat readahead
  xfs: Remove dead function prototype xfs_sync_inode_grab()
  xfs: Remove the left function variable from xfs_ialloc_get_rec()
  ...
This commit is contained in:
Linus Torvalds
2013-07-09 12:29:12 -07:00
43 changed files with 1184 additions and 481 deletions

View File

@@ -71,6 +71,7 @@ xfs-y += xfs_alloc.o \
xfs_dir2_sf.o \
xfs_ialloc.o \
xfs_ialloc_btree.o \
xfs_icreate_item.o \
xfs_inode.o \
xfs_log_recover.o \
xfs_mount.o \

View File

@@ -175,6 +175,7 @@ xfs_alloc_compute_diff(
xfs_agblock_t wantbno, /* target starting block */
xfs_extlen_t wantlen, /* target length */
xfs_extlen_t alignment, /* target alignment */
char userdata, /* are we allocating data? */
xfs_agblock_t freebno, /* freespace's starting block */
xfs_extlen_t freelen, /* freespace's length */
xfs_agblock_t *newbnop) /* result: best start block from free */
@@ -189,7 +190,14 @@ xfs_alloc_compute_diff(
ASSERT(freelen >= wantlen);
freeend = freebno + freelen;
wantend = wantbno + wantlen;
if (freebno >= wantbno) {
/*
* We want to allocate from the start of a free extent if it is past
* the desired block or if we are allocating user data and the free
* extent is before desired block. The second case is there to allow
* for contiguous allocation from the remaining free space if the file
* grows in the short term.
*/
if (freebno >= wantbno || (userdata && freeend < wantend)) {
if ((newbno1 = roundup(freebno, alignment)) >= freeend)
newbno1 = NULLAGBLOCK;
} else if (freeend >= wantend && alignment > 1) {
@@ -805,7 +813,8 @@ xfs_alloc_find_best_extent(
xfs_alloc_fix_len(args);
sdiff = xfs_alloc_compute_diff(args->agbno, args->len,
args->alignment, *sbnoa,
args->alignment,
args->userdata, *sbnoa,
*slena, &new);
/*
@@ -976,7 +985,8 @@ restart:
if (args->len < blen)
continue;
ltdiff = xfs_alloc_compute_diff(args->agbno, args->len,
args->alignment, ltbnoa, ltlena, &ltnew);
args->alignment, args->userdata, ltbnoa,
ltlena, &ltnew);
if (ltnew != NULLAGBLOCK &&
(args->len > blen || ltdiff < bdiff)) {
bdiff = ltdiff;
@@ -1128,7 +1138,8 @@ restart:
args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen);
xfs_alloc_fix_len(args);
ltdiff = xfs_alloc_compute_diff(args->agbno, args->len,
args->alignment, ltbnoa, ltlena, &ltnew);
args->alignment, args->userdata, ltbnoa,
ltlena, &ltnew);
error = xfs_alloc_find_best_extent(args,
&bno_cur_lt, &bno_cur_gt,
@@ -1144,7 +1155,8 @@ restart:
args->len = XFS_EXTLEN_MIN(gtlena, args->maxlen);
xfs_alloc_fix_len(args);
gtdiff = xfs_alloc_compute_diff(args->agbno, args->len,
args->alignment, gtbnoa, gtlena, &gtnew);
args->alignment, args->userdata, gtbnoa,
gtlena, &gtnew);
error = xfs_alloc_find_best_extent(args,
&bno_cur_gt, &bno_cur_lt,
@@ -1203,7 +1215,7 @@ restart:
}
rlen = args->len;
(void)xfs_alloc_compute_diff(args->agbno, rlen, args->alignment,
ltbnoa, ltlena, &ltnew);
args->userdata, ltbnoa, ltlena, &ltnew);
ASSERT(ltnew >= ltbno);
ASSERT(ltnew + rlen <= ltbnoa + ltlena);
ASSERT(ltnew + rlen <= be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length));

View File

@@ -196,6 +196,8 @@ typedef __be64 xfs_bmbt_ptr_t, xfs_bmdr_ptr_t;
#define XFS_BMDR_SPACE_CALC(nrecs) \
(int)(sizeof(xfs_bmdr_block_t) + \
((nrecs) * (sizeof(xfs_bmbt_key_t) + sizeof(xfs_bmbt_ptr_t))))
#define XFS_BMAP_BMDR_SPACE(bb) \
(XFS_BMDR_SPACE_CALC(be16_to_cpu((bb)->bb_numrecs)))
/*
* Maximum number of bmap btree levels.

View File

@@ -140,6 +140,16 @@ xfs_buf_item_size(
ASSERT(bip->bli_flags & XFS_BLI_LOGGED);
if (bip->bli_flags & XFS_BLI_ORDERED) {
/*
* The buffer has been logged just to order it.
* It is not being included in the transaction
* commit, so no vectors are used at all.
*/
trace_xfs_buf_item_size_ordered(bip);
return XFS_LOG_VEC_ORDERED;
}
/*
* the vector count is based on the number of buffer vectors we have
* dirty bits in. This will only be greater than one when we have a
@@ -212,6 +222,7 @@ xfs_buf_item_format_segment(
goto out;
}
/*
* Fill in an iovec for each set of contiguous chunks.
*/
@@ -299,18 +310,36 @@ xfs_buf_item_format(
/*
* If it is an inode buffer, transfer the in-memory state to the
* format flags and clear the in-memory state. We do not transfer
* format flags and clear the in-memory state.
*
* For buffer based inode allocation, we do not transfer
* this state if the inode buffer allocation has not yet been committed
* to the log as setting the XFS_BLI_INODE_BUF flag will prevent
* correct replay of the inode allocation.
*
* For icreate item based inode allocation, the buffers aren't written
* to the journal during allocation, and hence we should always tag the
* buffer as an inode buffer so that the correct unlinked list replay
* occurs during recovery.
*/
if (bip->bli_flags & XFS_BLI_INODE_BUF) {
if (!((bip->bli_flags & XFS_BLI_INODE_ALLOC_BUF) &&
if (xfs_sb_version_hascrc(&lip->li_mountp->m_sb) ||
!((bip->bli_flags & XFS_BLI_INODE_ALLOC_BUF) &&
xfs_log_item_in_current_chkpt(lip)))
bip->__bli_format.blf_flags |= XFS_BLF_INODE_BUF;
bip->bli_flags &= ~XFS_BLI_INODE_BUF;
}
if ((bip->bli_flags & (XFS_BLI_ORDERED|XFS_BLI_STALE)) ==
XFS_BLI_ORDERED) {
/*
* The buffer has been logged just to order it. It is not being
* included in the transaction commit, so don't format it.
*/
trace_xfs_buf_item_format_ordered(bip);
return;
}
for (i = 0; i < bip->bli_format_count; i++) {
vecp = xfs_buf_item_format_segment(bip, vecp, offset,
&bip->bli_formats[i]);
@@ -340,6 +369,7 @@ xfs_buf_item_pin(
ASSERT(atomic_read(&bip->bli_refcount) > 0);
ASSERT((bip->bli_flags & XFS_BLI_LOGGED) ||
(bip->bli_flags & XFS_BLI_ORDERED) ||
(bip->bli_flags & XFS_BLI_STALE));
trace_xfs_buf_item_pin(bip);
@@ -512,8 +542,9 @@ xfs_buf_item_unlock(
{
struct xfs_buf_log_item *bip = BUF_ITEM(lip);
struct xfs_buf *bp = bip->bli_buf;
int aborted, clean, i;
uint hold;
bool clean;
bool aborted;
int flags;
/* Clear the buffer's association with this transaction. */
bp->b_transp = NULL;
@@ -524,23 +555,21 @@ xfs_buf_item_unlock(
* (cancelled) buffers at unpin time, but we'll never go through the
* pin/unpin cycle if we abort inside commit.
*/
aborted = (lip->li_flags & XFS_LI_ABORTED) != 0;
aborted = (lip->li_flags & XFS_LI_ABORTED) ? true : false;
/*
* Before possibly freeing the buf item, determine if we should
* release the buffer at the end of this routine.
* Before possibly freeing the buf item, copy the per-transaction state
* so we can reference it safely later after clearing it from the
* buffer log item.
*/
hold = bip->bli_flags & XFS_BLI_HOLD;
/* Clear the per transaction state. */
bip->bli_flags &= ~(XFS_BLI_LOGGED | XFS_BLI_HOLD);
flags = bip->bli_flags;
bip->bli_flags &= ~(XFS_BLI_LOGGED | XFS_BLI_HOLD | XFS_BLI_ORDERED);
/*
* If the buf item is marked stale, then don't do anything. We'll
* unlock the buffer and free the buf item when the buffer is unpinned
* for the last time.
*/
if (bip->bli_flags & XFS_BLI_STALE) {
if (flags & XFS_BLI_STALE) {
trace_xfs_buf_item_unlock_stale(bip);
ASSERT(bip->__bli_format.blf_flags & XFS_BLF_CANCEL);
if (!aborted) {
@@ -557,13 +586,19 @@ xfs_buf_item_unlock(
* be the only reference to the buf item, so we free it anyway
* regardless of whether it is dirty or not. A dirty abort implies a
* shutdown, anyway.
*
* Ordered buffers are dirty but may have no recorded changes, so ensure
* we only release clean items here.
*/
clean = 1;
for (i = 0; i < bip->bli_format_count; i++) {
if (!xfs_bitmap_empty(bip->bli_formats[i].blf_data_map,
bip->bli_formats[i].blf_map_size)) {
clean = 0;
break;
clean = (flags & XFS_BLI_DIRTY) ? false : true;
if (clean) {
int i;
for (i = 0; i < bip->bli_format_count; i++) {
if (!xfs_bitmap_empty(bip->bli_formats[i].blf_data_map,
bip->bli_formats[i].blf_map_size)) {
clean = false;
break;
}
}
}
if (clean)
@@ -576,7 +611,7 @@ xfs_buf_item_unlock(
} else
atomic_dec(&bip->bli_refcount);
if (!hold)
if (!(flags & XFS_BLI_HOLD))
xfs_buf_relse(bp);
}
@@ -841,12 +876,6 @@ xfs_buf_item_log(
uint end;
struct xfs_buf *bp = bip->bli_buf;
/*
* Mark the item as having some dirty data for
* quick reference in xfs_buf_item_dirty.
*/
bip->bli_flags |= XFS_BLI_DIRTY;
/*
* walk each buffer segment and mark them dirty appropriately.
*/
@@ -873,7 +902,7 @@ xfs_buf_item_log(
/*
* Return 1 if the buffer has some data that has been logged (at any
* Return 1 if the buffer has been logged or ordered in a transaction (at any
* point, not just the current transaction) and 0 if not.
*/
uint
@@ -907,11 +936,11 @@ void
xfs_buf_item_relse(
xfs_buf_t *bp)
{
xfs_buf_log_item_t *bip;
xfs_buf_log_item_t *bip = bp->b_fspriv;
trace_xfs_buf_item_relse(bp, _RET_IP_);
ASSERT(!(bip->bli_item.li_flags & XFS_LI_IN_AIL));
bip = bp->b_fspriv;
bp->b_fspriv = bip->bli_item.li_bio_list;
if (bp->b_fspriv == NULL)
bp->b_iodone = NULL;

View File

@@ -120,6 +120,7 @@ xfs_blft_from_flags(struct xfs_buf_log_format *blf)
#define XFS_BLI_INODE_ALLOC_BUF 0x10
#define XFS_BLI_STALE_INODE 0x20
#define XFS_BLI_INODE_BUF 0x40
#define XFS_BLI_ORDERED 0x80
#define XFS_BLI_FLAGS \
{ XFS_BLI_HOLD, "HOLD" }, \
@@ -128,7 +129,8 @@ xfs_blft_from_flags(struct xfs_buf_log_format *blf)
{ XFS_BLI_LOGGED, "LOGGED" }, \
{ XFS_BLI_INODE_ALLOC_BUF, "INODE_ALLOC" }, \
{ XFS_BLI_STALE_INODE, "STALE_INODE" }, \
{ XFS_BLI_INODE_BUF, "INODE_BUF" }
{ XFS_BLI_INODE_BUF, "INODE_BUF" }, \
{ XFS_BLI_ORDERED, "ORDERED" }
#ifdef __KERNEL__

View File

@@ -24,6 +24,9 @@
#include "xfs_ag.h"
#include "xfs_mount.h"
#include "xfs_bmap_btree.h"
#include "xfs_alloc_btree.h"
#include "xfs_ialloc_btree.h"
#include "xfs_btree.h"
#include "xfs_dinode.h"
#include "xfs_inode.h"
#include "xfs_inode_item.h"
@@ -182,7 +185,7 @@ xfs_swap_extents_check_format(
*/
if (tip->i_d.di_format == XFS_DINODE_FMT_BTREE) {
if (XFS_IFORK_BOFF(ip) &&
tip->i_df.if_broot_bytes > XFS_IFORK_BOFF(ip))
XFS_BMAP_BMDR_SPACE(tip->i_df.if_broot) > XFS_IFORK_BOFF(ip))
return EINVAL;
if (XFS_IFORK_NEXTENTS(tip, XFS_DATA_FORK) <=
XFS_IFORK_MAXEXT(ip, XFS_DATA_FORK))
@@ -192,9 +195,8 @@ xfs_swap_extents_check_format(
/* Reciprocal target->temp btree format checks */
if (ip->i_d.di_format == XFS_DINODE_FMT_BTREE) {
if (XFS_IFORK_BOFF(tip) &&
ip->i_df.if_broot_bytes > XFS_IFORK_BOFF(tip))
XFS_BMAP_BMDR_SPACE(ip->i_df.if_broot) > XFS_IFORK_BOFF(tip))
return EINVAL;
if (XFS_IFORK_NEXTENTS(ip, XFS_DATA_FORK) <=
XFS_IFORK_MAXEXT(tip, XFS_DATA_FORK))
return EINVAL;

View File

@@ -1108,6 +1108,7 @@ xfs_dir2_leaf_readbuf(
struct xfs_mount *mp = dp->i_mount;
struct xfs_buf *bp = *bpp;
struct xfs_bmbt_irec *map = mip->map;
struct blk_plug plug;
int error = 0;
int length;
int i;
@@ -1236,6 +1237,7 @@ xfs_dir2_leaf_readbuf(
/*
* Do we need more readahead?
*/
blk_start_plug(&plug);
for (mip->ra_index = mip->ra_offset = i = 0;
mip->ra_want > mip->ra_current && i < mip->map_blocks;
i += mp->m_dirblkfsbs) {
@@ -1287,6 +1289,7 @@ xfs_dir2_leaf_readbuf(
}
}
}
blk_finish_plug(&plug);
out:
*bpp = bp;

View File

@@ -570,13 +570,13 @@ xfs_qm_dqtobp(
xfs_buf_t **O_bpp,
uint flags)
{
xfs_bmbt_irec_t map;
int nmaps = 1, error;
xfs_buf_t *bp;
xfs_inode_t *quotip = XFS_DQ_TO_QIP(dqp);
xfs_mount_t *mp = dqp->q_mount;
xfs_dqid_t id = be32_to_cpu(dqp->q_core.d_id);
xfs_trans_t *tp = (tpp ? *tpp : NULL);
struct xfs_bmbt_irec map;
int nmaps = 1, error;
struct xfs_buf *bp;
struct xfs_inode *quotip = xfs_dq_to_quota_inode(dqp);
struct xfs_mount *mp = dqp->q_mount;
xfs_dqid_t id = be32_to_cpu(dqp->q_core.d_id);
struct xfs_trans *tp = (tpp ? *tpp : NULL);
dqp->q_fileoffset = (xfs_fileoff_t)id / mp->m_quotainfo->qi_dqperchunk;
@@ -804,7 +804,7 @@ xfs_qm_dqget(
xfs_dquot_t **O_dqpp) /* OUT : locked incore dquot */
{
struct xfs_quotainfo *qi = mp->m_quotainfo;
struct radix_tree_root *tree = XFS_DQUOT_TREE(qi, type);
struct radix_tree_root *tree = xfs_dquot_tree(qi, type);
struct xfs_dquot *dqp;
int error;

View File

@@ -143,10 +143,6 @@ static inline xfs_dquot_t *xfs_inode_dquot(struct xfs_inode *ip, int type)
#define XFS_QM_ISUDQ(dqp) ((dqp)->dq_flags & XFS_DQ_USER)
#define XFS_QM_ISPDQ(dqp) ((dqp)->dq_flags & XFS_DQ_PROJ)
#define XFS_QM_ISGDQ(dqp) ((dqp)->dq_flags & XFS_DQ_GROUP)
#define XFS_DQ_TO_QINF(dqp) ((dqp)->q_mount->m_quotainfo)
#define XFS_DQ_TO_QIP(dqp) (XFS_QM_ISUDQ(dqp) ? \
XFS_DQ_TO_QINF(dqp)->qi_uquotaip : \
XFS_DQ_TO_QINF(dqp)->qi_gquotaip)
extern int xfs_qm_dqread(struct xfs_mount *, xfs_dqid_t, uint,
uint, struct xfs_dquot **);

View File

@@ -176,7 +176,7 @@ xfs_growfs_data_private(
if (!bp)
return EIO;
if (bp->b_error) {
int error = bp->b_error;
error = bp->b_error;
xfs_buf_relse(bp);
return error;
}

View File

@@ -38,6 +38,7 @@
#include "xfs_bmap.h"
#include "xfs_cksum.h"
#include "xfs_buf_item.h"
#include "xfs_icreate_item.h"
/*
@@ -150,12 +151,16 @@ xfs_check_agi_freecount(
#endif
/*
* Initialise a new set of inodes.
* Initialise a new set of inodes. When called without a transaction context
* (e.g. from recovery) we initiate a delayed write of the inode buffers rather
* than logging them (which in a transaction context puts them into the AIL
* for writeback rather than the xfsbufd queue).
*/
STATIC int
int
xfs_ialloc_inode_init(
struct xfs_mount *mp,
struct xfs_trans *tp,
struct list_head *buffer_list,
xfs_agnumber_t agno,
xfs_agblock_t agbno,
xfs_agblock_t length,
@@ -208,6 +213,18 @@ xfs_ialloc_inode_init(
version = 3;
ino = XFS_AGINO_TO_INO(mp, agno,
XFS_OFFBNO_TO_AGINO(mp, agbno, 0));
/*
* log the initialisation that is about to take place as an
* logical operation. This means the transaction does not
* need to log the physical changes to the inode buffers as log
* recovery will know what initialisation is actually needed.
* Hence we only need to log the buffers as "ordered" buffers so
* they track in the AIL as if they were physically logged.
*/
if (tp)
xfs_icreate_log(tp, agno, agbno, XFS_IALLOC_INODES(mp),
mp->m_sb.sb_inodesize, length, gen);
} else if (xfs_sb_version_hasnlink(&mp->m_sb))
version = 2;
else
@@ -223,13 +240,8 @@ xfs_ialloc_inode_init(
XBF_UNMAPPED);
if (!fbuf)
return ENOMEM;
/*
* Initialize all inodes in this buffer and then log them.
*
* XXX: It would be much better if we had just one transaction
* to log a whole cluster of inodes instead of all the
* individual transactions causing a lot of log traffic.
*/
/* Initialize the inode buffers and log them appropriately. */
fbuf->b_ops = &xfs_inode_buf_ops;
xfs_buf_zero(fbuf, 0, BBTOB(fbuf->b_length));
for (i = 0; i < ninodes; i++) {
@@ -247,18 +259,39 @@ xfs_ialloc_inode_init(
ino++;
uuid_copy(&free->di_uuid, &mp->m_sb.sb_uuid);
xfs_dinode_calc_crc(mp, free);
} else {
} else if (tp) {
/* just log the inode core */
xfs_trans_log_buf(tp, fbuf, ioffset,
ioffset + isize - 1);
}
}
if (version == 3) {
/* need to log the entire buffer */
xfs_trans_log_buf(tp, fbuf, 0,
BBTOB(fbuf->b_length) - 1);
if (tp) {
/*
* Mark the buffer as an inode allocation buffer so it
* sticks in AIL at the point of this allocation
* transaction. This ensures the they are on disk before
* the tail of the log can be moved past this
* transaction (i.e. by preventing relogging from moving
* it forward in the log).
*/
xfs_trans_inode_alloc_buf(tp, fbuf);
if (version == 3) {
/*
* Mark the buffer as ordered so that they are
* not physically logged in the transaction but
* still tracked in the AIL as part of the
* transaction and pin the log appropriately.
*/
xfs_trans_ordered_buf(tp, fbuf);
xfs_trans_log_buf(tp, fbuf, 0,
BBTOB(fbuf->b_length) - 1);
}
} else {
fbuf->b_flags |= XBF_DONE;
xfs_buf_delwri_queue(fbuf, buffer_list);
xfs_buf_relse(fbuf);
}
xfs_trans_inode_alloc_buf(tp, fbuf);
}
return 0;
}
@@ -303,7 +336,7 @@ xfs_ialloc_ag_alloc(
* First try to allocate inodes contiguous with the last-allocated
* chunk of inodes. If the filesystem is striped, this will fill
* an entire stripe unit with inodes.
*/
*/
agi = XFS_BUF_TO_AGI(agbp);
newino = be32_to_cpu(agi->agi_newino);
agno = be32_to_cpu(agi->agi_seqno);
@@ -402,7 +435,7 @@ xfs_ialloc_ag_alloc(
* rather than a linear progression to prevent the next generation
* number from being easily guessable.
*/
error = xfs_ialloc_inode_init(args.mp, tp, agno, args.agbno,
error = xfs_ialloc_inode_init(args.mp, tp, NULL, agno, args.agbno,
args.len, prandom_u32());
if (error)
@@ -615,8 +648,7 @@ xfs_ialloc_get_rec(
struct xfs_btree_cur *cur,
xfs_agino_t agino,
xfs_inobt_rec_incore_t *rec,
int *done,
int left)
int *done)
{
int error;
int i;
@@ -724,12 +756,12 @@ xfs_dialloc_ag(
pag->pagl_leftrec != NULLAGINO &&
pag->pagl_rightrec != NULLAGINO) {
error = xfs_ialloc_get_rec(tcur, pag->pagl_leftrec,
&trec, &doneleft, 1);
&trec, &doneleft);
if (error)
goto error1;
error = xfs_ialloc_get_rec(cur, pag->pagl_rightrec,
&rec, &doneright, 0);
&rec, &doneright);
if (error)
goto error1;
} else {

View File

@@ -150,6 +150,14 @@ int xfs_inobt_lookup(struct xfs_btree_cur *cur, xfs_agino_t ino,
int xfs_inobt_get_rec(struct xfs_btree_cur *cur,
xfs_inobt_rec_incore_t *rec, int *stat);
/*
* Inode chunk initialisation routine
*/
int xfs_ialloc_inode_init(struct xfs_mount *mp, struct xfs_trans *tp,
struct list_head *buffer_list,
xfs_agnumber_t agno, xfs_agblock_t agbno,
xfs_agblock_t length, unsigned int gen);
extern const struct xfs_buf_ops xfs_agi_buf_ops;
#endif /* __XFS_IALLOC_H__ */

View File

@@ -335,7 +335,8 @@ xfs_iget_cache_miss(
iflags = XFS_INEW;
if (flags & XFS_IGET_DONTCACHE)
iflags |= XFS_IDONTCACHE;
ip->i_udquot = ip->i_gdquot = NULL;
ip->i_udquot = NULL;
ip->i_gdquot = NULL;
xfs_iflags_set(ip, iflags);
/* insert the new inode */

View File

@@ -40,7 +40,6 @@ void xfs_inode_clear_eofblocks_tag(struct xfs_inode *ip);
int xfs_icache_free_eofblocks(struct xfs_mount *, struct xfs_eofblocks *);
void xfs_eofblocks_worker(struct work_struct *);
int xfs_sync_inode_grab(struct xfs_inode *ip);
int xfs_inode_ag_iterator(struct xfs_mount *mp,
int (*execute)(struct xfs_inode *ip, struct xfs_perag *pag,
int flags, void *args),

195
fs/xfs/xfs_icreate_item.c Normal file
View File

@@ -0,0 +1,195 @@
/*
* Copyright (c) 2008-2010, 2013 Dave Chinner
* All Rights Reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it would be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "xfs.h"
#include "xfs_fs.h"
#include "xfs_types.h"
#include "xfs_bit.h"
#include "xfs_log.h"
#include "xfs_inum.h"
#include "xfs_trans.h"
#include "xfs_buf_item.h"
#include "xfs_sb.h"
#include "xfs_ag.h"
#include "xfs_dir2.h"
#include "xfs_mount.h"
#include "xfs_trans_priv.h"
#include "xfs_bmap_btree.h"
#include "xfs_alloc_btree.h"
#include "xfs_ialloc_btree.h"
#include "xfs_attr_sf.h"
#include "xfs_dinode.h"
#include "xfs_inode.h"
#include "xfs_inode_item.h"
#include "xfs_btree.h"
#include "xfs_ialloc.h"
#include "xfs_error.h"
#include "xfs_icreate_item.h"
kmem_zone_t *xfs_icreate_zone; /* inode create item zone */
static inline struct xfs_icreate_item *ICR_ITEM(struct xfs_log_item *lip)
{
return container_of(lip, struct xfs_icreate_item, ic_item);
}
/*
* This returns the number of iovecs needed to log the given inode item.
*
* We only need one iovec for the icreate log structure.
*/
STATIC uint
xfs_icreate_item_size(
struct xfs_log_item *lip)
{
return 1;
}
/*
* This is called to fill in the vector of log iovecs for the
* given inode create log item.
*/
STATIC void
xfs_icreate_item_format(
struct xfs_log_item *lip,
struct xfs_log_iovec *log_vector)
{
struct xfs_icreate_item *icp = ICR_ITEM(lip);
log_vector->i_addr = (xfs_caddr_t)&icp->ic_format;
log_vector->i_len = sizeof(struct xfs_icreate_log);
log_vector->i_type = XLOG_REG_TYPE_ICREATE;
}
/* Pinning has no meaning for the create item, so just return. */
STATIC void
xfs_icreate_item_pin(
struct xfs_log_item *lip)
{
}
/* pinning has no meaning for the create item, so just return. */
STATIC void
xfs_icreate_item_unpin(
struct xfs_log_item *lip,
int remove)
{
}
STATIC void
xfs_icreate_item_unlock(
struct xfs_log_item *lip)
{
struct xfs_icreate_item *icp = ICR_ITEM(lip);
if (icp->ic_item.li_flags & XFS_LI_ABORTED)
kmem_zone_free(xfs_icreate_zone, icp);
return;
}
/*
* Because we have ordered buffers being tracked in the AIL for the inode
* creation, we don't need the create item after this. Hence we can free
* the log item and return -1 to tell the caller we're done with the item.
*/
STATIC xfs_lsn_t
xfs_icreate_item_committed(
struct xfs_log_item *lip,
xfs_lsn_t lsn)
{
struct xfs_icreate_item *icp = ICR_ITEM(lip);
kmem_zone_free(xfs_icreate_zone, icp);
return (xfs_lsn_t)-1;
}
/* item can never get into the AIL */
STATIC uint
xfs_icreate_item_push(
struct xfs_log_item *lip,
struct list_head *buffer_list)
{
ASSERT(0);
return XFS_ITEM_SUCCESS;
}
/* Ordered buffers do the dependency tracking here, so this does nothing. */
STATIC void
xfs_icreate_item_committing(
struct xfs_log_item *lip,
xfs_lsn_t lsn)
{
}
/*
* This is the ops vector shared by all buf log items.
*/
static struct xfs_item_ops xfs_icreate_item_ops = {
.iop_size = xfs_icreate_item_size,
.iop_format = xfs_icreate_item_format,
.iop_pin = xfs_icreate_item_pin,
.iop_unpin = xfs_icreate_item_unpin,
.iop_push = xfs_icreate_item_push,
.iop_unlock = xfs_icreate_item_unlock,
.iop_committed = xfs_icreate_item_committed,
.iop_committing = xfs_icreate_item_committing,
};
/*
* Initialize the inode log item for a newly allocated (in-core) inode.
*
* Inode extents can only reside within an AG. Hence specify the starting
* block for the inode chunk by offset within an AG as well as the
* length of the allocated extent.
*
* This joins the item to the transaction and marks it dirty so
* that we don't need a separate call to do this, nor does the
* caller need to know anything about the icreate item.
*/
void
xfs_icreate_log(
struct xfs_trans *tp,
xfs_agnumber_t agno,
xfs_agblock_t agbno,
unsigned int count,
unsigned int inode_size,
xfs_agblock_t length,
unsigned int generation)
{
struct xfs_icreate_item *icp;
icp = kmem_zone_zalloc(xfs_icreate_zone, KM_SLEEP);
xfs_log_item_init(tp->t_mountp, &icp->ic_item, XFS_LI_ICREATE,
&xfs_icreate_item_ops);
icp->ic_format.icl_type = XFS_LI_ICREATE;
icp->ic_format.icl_size = 1; /* single vector */
icp->ic_format.icl_ag = cpu_to_be32(agno);
icp->ic_format.icl_agbno = cpu_to_be32(agbno);
icp->ic_format.icl_count = cpu_to_be32(count);
icp->ic_format.icl_isize = cpu_to_be32(inode_size);
icp->ic_format.icl_length = cpu_to_be32(length);
icp->ic_format.icl_gen = cpu_to_be32(generation);
xfs_trans_add_item(tp, &icp->ic_item);
tp->t_flags |= XFS_TRANS_DIRTY;
icp->ic_item.li_desc->lid_flags |= XFS_LID_DIRTY;
}

52
fs/xfs/xfs_icreate_item.h Normal file
View File

@@ -0,0 +1,52 @@
/*
* Copyright (c) 2008-2010, Dave Chinner
* All Rights Reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it would be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifndef XFS_ICREATE_ITEM_H
#define XFS_ICREATE_ITEM_H 1
/*
* on disk log item structure
*
* Log recovery assumes the first two entries are the type and size and they fit
* in 32 bits. Also in host order (ugh) so they have to be 32 bit aligned so
* decoding can be done correctly.
*/
struct xfs_icreate_log {
__uint16_t icl_type; /* type of log format structure */
__uint16_t icl_size; /* size of log format structure */
__be32 icl_ag; /* ag being allocated in */
__be32 icl_agbno; /* start block of inode range */
__be32 icl_count; /* number of inodes to initialise */
__be32 icl_isize; /* size of inodes */
__be32 icl_length; /* length of extent to initialise */
__be32 icl_gen; /* inode generation number to use */
};
/* in memory log item structure */
struct xfs_icreate_item {
struct xfs_log_item ic_item;
struct xfs_icreate_log ic_format;
};
extern kmem_zone_t *xfs_icreate_zone; /* inode create item zone */
void xfs_icreate_log(struct xfs_trans *tp, xfs_agnumber_t agno,
xfs_agblock_t agbno, unsigned int count,
unsigned int inode_size, xfs_agblock_t length,
unsigned int generation);
#endif /* XFS_ICREATE_ITEM_H */

View File

@@ -1028,6 +1028,11 @@ xfs_dinode_calc_crc(
/*
* Read the disk inode attributes into the in-core inode structure.
*
* If we are initialising a new inode and we are not utilising the
* XFS_MOUNT_IKEEP inode cluster mode, we can simple build the new inode core
* with a random generation number. If we are keeping inodes around, we need to
* read the inode cluster to get the existing generation number off disk.
*/
int
xfs_iread(
@@ -1047,6 +1052,22 @@ xfs_iread(
if (error)
return error;
/* shortcut IO on inode allocation if possible */
if ((iget_flags & XFS_IGET_CREATE) &&
!(mp->m_flags & XFS_MOUNT_IKEEP)) {
/* initialise the on-disk inode core */
memset(&ip->i_d, 0, sizeof(ip->i_d));
ip->i_d.di_magic = XFS_DINODE_MAGIC;
ip->i_d.di_gen = prandom_u32();
if (xfs_sb_version_hascrc(&mp->m_sb)) {
ip->i_d.di_version = 3;
ip->i_d.di_ino = ip->i_ino;
uuid_copy(&ip->i_d.di_uuid, &mp->m_sb.sb_uuid);
} else
ip->i_d.di_version = 2;
return 0;
}
/*
* Get pointers to the on-disk inode and the buffer containing it.
*/
@@ -1133,17 +1154,16 @@ xfs_iread(
xfs_buf_set_ref(bp, XFS_INO_REF);
/*
* Use xfs_trans_brelse() to release the buffer containing the
* on-disk inode, because it was acquired with xfs_trans_read_buf()
* in xfs_imap_to_bp() above. If tp is NULL, this is just a normal
* Use xfs_trans_brelse() to release the buffer containing the on-disk
* inode, because it was acquired with xfs_trans_read_buf() in
* xfs_imap_to_bp() above. If tp is NULL, this is just a normal
* brelse(). If we're within a transaction, then xfs_trans_brelse()
* will only release the buffer if it is not dirty within the
* transaction. It will be OK to release the buffer in this case,
* because inodes on disk are never destroyed and we will be
* locking the new in-core inode before putting it in the hash
* table where other processes can find it. Thus we don't have
* to worry about the inode being changed just because we released
* the buffer.
* because inodes on disk are never destroyed and we will be locking the
* new in-core inode before putting it in the cache where other
* processes can find it. Thus we don't have to worry about the inode
* being changed just because we released the buffer.
*/
out_brelse:
xfs_trans_brelse(tp, bp);
@@ -2028,8 +2048,6 @@ xfs_ifree(
int error;
int delete;
xfs_ino_t first_ino;
xfs_dinode_t *dip;
xfs_buf_t *ibp;
ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
ASSERT(ip->i_d.di_nlink == 0);
@@ -2042,14 +2060,13 @@ xfs_ifree(
* Pull the on-disk inode from the AGI unlinked list.
*/
error = xfs_iunlink_remove(tp, ip);
if (error != 0) {
if (error)
return error;
}
error = xfs_difree(tp, ip->i_ino, flist, &delete, &first_ino);
if (error != 0) {
if (error)
return error;
}
ip->i_d.di_mode = 0; /* mark incore inode as free */
ip->i_d.di_flags = 0;
ip->i_d.di_dmevmask = 0;
@@ -2061,31 +2078,10 @@ xfs_ifree(
* by reincarnations of this inode.
*/
ip->i_d.di_gen++;
xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
error = xfs_imap_to_bp(ip->i_mount, tp, &ip->i_imap, &dip, &ibp,
0, 0);
if (error)
return error;
/*
* Clear the on-disk di_mode. This is to prevent xfs_bulkstat
* from picking up this inode when it is reclaimed (its incore state
* initialzed but not flushed to disk yet). The in-core di_mode is
* already cleared and a corresponding transaction logged.
* The hack here just synchronizes the in-core to on-disk
* di_mode value in advance before the actual inode sync to disk.
* This is OK because the inode is already unlinked and would never
* change its di_mode again for this inode generation.
* This is a temporary hack that would require a proper fix
* in the future.
*/
dip->di_mode = 0;
if (delete) {
if (delete)
error = xfs_ifree_cluster(ip, tp, first_ino);
}
return error;
}

View File

@@ -283,6 +283,15 @@ xfs_iomap_eof_want_preallocate(
if (offset + count <= XFS_ISIZE(ip))
return 0;
/*
* If the file is smaller than the minimum prealloc and we are using
* dynamic preallocation, don't do any preallocation at all as it is
* likely this is the only write to the file that is going to be done.
*/
if (!(mp->m_flags & XFS_MOUNT_DFLT_IOSIZE) &&
XFS_ISIZE(ip) < XFS_FSB_TO_B(mp, mp->m_writeio_blocks))
return 0;
/*
* If there are any real blocks past eof, then don't
* do any speculative allocation.
@@ -345,6 +354,10 @@ xfs_iomap_eof_prealloc_initial_size(
if (mp->m_flags & XFS_MOUNT_DFLT_IOSIZE)
return 0;
/* If the file is small, then use the minimum prealloc */
if (XFS_ISIZE(ip) < XFS_FSB_TO_B(mp, mp->m_dalign))
return 0;
/*
* As we write multiple pages, the offset will always align to the
* start of a page and hence point to a hole at EOF. i.e. if the size is

View File

@@ -987,7 +987,8 @@ xfs_fiemap_format(
if (bmv->bmv_oflags & BMV_OF_PREALLOC)
fiemap_flags |= FIEMAP_EXTENT_UNWRITTEN;
else if (bmv->bmv_oflags & BMV_OF_DELALLOC) {
fiemap_flags |= FIEMAP_EXTENT_DELALLOC;
fiemap_flags |= (FIEMAP_EXTENT_DELALLOC |
FIEMAP_EXTENT_UNKNOWN);
physical = 0; /* no block yet */
}
if (bmv->bmv_oflags & BMV_OF_LAST)

View File

@@ -43,7 +43,7 @@ xfs_internal_inum(
{
return (ino == mp->m_sb.sb_rbmino || ino == mp->m_sb.sb_rsumino ||
(xfs_sb_version_hasquota(&mp->m_sb) &&
(ino == mp->m_sb.sb_uquotino || ino == mp->m_sb.sb_gquotino)));
xfs_is_quota_inode(&mp->m_sb, ino)));
}
/*
@@ -383,11 +383,13 @@ xfs_bulkstat(
* Also start read-ahead now for this chunk.
*/
if (r.ir_freecount < XFS_INODES_PER_CHUNK) {
struct blk_plug plug;
/*
* Loop over all clusters in the next chunk.
* Do a readahead if there are any allocated
* inodes in that cluster.
*/
blk_start_plug(&plug);
agbno = XFS_AGINO_TO_AGBNO(mp, r.ir_startino);
for (chunkidx = 0;
chunkidx < XFS_INODES_PER_CHUNK;
@@ -399,6 +401,7 @@ xfs_bulkstat(
agbno, nbcluster,
&xfs_inode_buf_ops);
}
blk_finish_plug(&plug);
irbp->ir_startino = r.ir_startino;
irbp->ir_freecount = r.ir_freecount;
irbp->ir_free = r.ir_free;

Some files were not shown because too many files have changed in this diff Show More