diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c index 8a10d175567362de1e04ea162376bae126b53742..ebb3337e3393fa4d12e2f45e342684e93e535047 100644 --- a/fs/xfs/libxfs/xfs_alloc.c +++ b/fs/xfs/libxfs/xfs_alloc.c @@ -37,8 +37,8 @@ struct workqueue_struct *xfs_alloc_wq; #define XFSA_FIXUP_CNT_OK 2 STATIC int xfs_alloc_ag_vextent_exact(xfs_alloc_arg_t *); -STATIC int xfs_alloc_ag_vextent_near(xfs_alloc_arg_t *); -STATIC int xfs_alloc_ag_vextent_size(xfs_alloc_arg_t *); +STATIC int xfs_alloc_ag_vextent_near(xfs_alloc_arg_t *, uint32_t); +STATIC int xfs_alloc_ag_vextent_size(xfs_alloc_arg_t *, uint32_t); /* * Size of the AGFL. For CRC-enabled filesystes we steal a couple of slots in @@ -426,8 +426,8 @@ xfs_alloc_fix_len( */ STATIC int /* error code */ xfs_alloc_fixup_trees( - xfs_btree_cur_t *cnt_cur, /* cursor for by-size btree */ - xfs_btree_cur_t *bno_cur, /* cursor for by-block btree */ + struct xfs_btree_cur *cnt_cur, /* cursor for by-size btree */ + struct xfs_btree_cur *bno_cur, /* cursor for by-block btree */ xfs_agblock_t fbno, /* starting block of free extent */ xfs_extlen_t flen, /* length of free extent */ xfs_agblock_t rbno, /* starting block of returned extent */ @@ -1141,7 +1141,8 @@ xfs_alloc_ag_vextent_small( */ STATIC int /* error */ xfs_alloc_ag_vextent( - xfs_alloc_arg_t *args) /* argument structure for allocation */ + xfs_alloc_arg_t *args, /* argument structure for allocation */ + uint32_t alloc_flags) { int error=0; @@ -1157,10 +1158,10 @@ xfs_alloc_ag_vextent( args->wasfromfl = 0; switch (args->type) { case XFS_ALLOCTYPE_THIS_AG: - error = xfs_alloc_ag_vextent_size(args); + error = xfs_alloc_ag_vextent_size(args, alloc_flags); break; case XFS_ALLOCTYPE_NEAR_BNO: - error = xfs_alloc_ag_vextent_near(args); + error = xfs_alloc_ag_vextent_near(args, alloc_flags); break; case XFS_ALLOCTYPE_THIS_BNO: error = xfs_alloc_ag_vextent_exact(args); @@ -1214,8 +1215,8 @@ xfs_alloc_ag_vextent_exact( xfs_alloc_arg_t *args) /* allocation argument structure */ { struct xfs_agf __maybe_unused *agf = args->agbp->b_addr; - xfs_btree_cur_t *bno_cur;/* by block-number btree cursor */ - xfs_btree_cur_t *cnt_cur;/* by count btree cursor */ + struct xfs_btree_cur *bno_cur;/* by block-number btree cursor */ + struct xfs_btree_cur *cnt_cur;/* by count btree cursor */ int error; xfs_agblock_t fbno; /* start block of found extent */ xfs_extlen_t flen; /* length of found extent */ @@ -1568,7 +1569,8 @@ xfs_alloc_ag_vextent_lastblock( */ STATIC int xfs_alloc_ag_vextent_near( - struct xfs_alloc_arg *args) + struct xfs_alloc_arg *args, + uint32_t alloc_flags) { struct xfs_alloc_cur acur = {}; int error; /* error code */ @@ -1587,6 +1589,8 @@ xfs_alloc_ag_vextent_near( if (args->agbno > args->max_agbno) args->agbno = args->max_agbno; + /* Retry once quickly if we find busy extents before blocking. */ + alloc_flags |= XFS_ALLOC_FLAG_TRYFLUSH; restart: len = 0; @@ -1642,9 +1646,20 @@ xfs_alloc_ag_vextent_near( */ if (!acur.len) { if (acur.busy) { + /* + * Our only valid extents must have been busy. Flush and + * retry the allocation again. If we get an -EAGAIN + * error, we're being told that a deadlock was avoided + * and the current transaction needs committing before + * the allocation can be retried. + */ trace_xfs_alloc_near_busy(args); - xfs_extent_busy_flush(args->mp, args->pag, - acur.busy_gen); + error = xfs_extent_busy_flush(args->tp, args->pag, + acur.busy_gen, alloc_flags); + if (error) + goto out; + + alloc_flags &= ~XFS_ALLOC_FLAG_TRYFLUSH; goto restart; } trace_xfs_alloc_size_neither(args); @@ -1667,22 +1682,25 @@ xfs_alloc_ag_vextent_near( * and of the form k * prod + mod unless there's nothing that large. * Return the starting a.g. block, or NULLAGBLOCK if we can't do it. */ -STATIC int /* error */ +static int xfs_alloc_ag_vextent_size( - xfs_alloc_arg_t *args) /* allocation argument structure */ + struct xfs_alloc_arg *args, + uint32_t alloc_flags) { - struct xfs_agf *agf = args->agbp->b_addr; - xfs_btree_cur_t *bno_cur; /* cursor for bno btree */ - xfs_btree_cur_t *cnt_cur; /* cursor for cnt btree */ - int error; /* error result */ - xfs_agblock_t fbno; /* start of found freespace */ - xfs_extlen_t flen; /* length of found freespace */ - int i; /* temp status variable */ - xfs_agblock_t rbno; /* returned block number */ - xfs_extlen_t rlen; /* length of returned extent */ - bool busy; - unsigned busy_gen; + struct xfs_agf *agf = args->agbp->b_addr; + struct xfs_btree_cur *bno_cur; + struct xfs_btree_cur *cnt_cur; + xfs_agblock_t fbno; /* start of found freespace */ + xfs_extlen_t flen; /* length of found freespace */ + xfs_agblock_t rbno; /* returned block number */ + xfs_extlen_t rlen; /* length of returned extent */ + bool busy; + unsigned busy_gen; + int error; + int i; + /* Retry once quickly if we find busy extents before blocking. */ + alloc_flags |= XFS_ALLOC_FLAG_TRYFLUSH; restart: /* * Allocate and initialize a cursor for the by-size btree. @@ -1741,19 +1759,25 @@ xfs_alloc_ag_vextent_size( error = xfs_btree_increment(cnt_cur, 0, &i); if (error) goto error0; - if (i == 0) { - /* - * Our only valid extents must have been busy. - * Make it unbusy by forcing the log out and - * retrying. - */ - xfs_btree_del_cursor(cnt_cur, - XFS_BTREE_NOERROR); - trace_xfs_alloc_size_busy(args); - xfs_extent_busy_flush(args->mp, - args->pag, busy_gen); - goto restart; - } + if (i) + continue; + + /* + * Our only valid extents must have been busy. Flush and + * retry the allocation again. If we get an -EAGAIN + * error, we're being told that a deadlock was avoided + * and the current transaction needs committing before + * the allocation can be retried. + */ + trace_xfs_alloc_size_busy(args); + error = xfs_extent_busy_flush(args->tp, args->pag, + busy_gen, alloc_flags); + if (error) + goto error0; + + alloc_flags &= ~XFS_ALLOC_FLAG_TRYFLUSH; + xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); + goto restart; } } @@ -1833,9 +1857,21 @@ xfs_alloc_ag_vextent_size( args->len = rlen; if (rlen < args->minlen) { if (busy) { - xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); + /* + * Our only valid extents must have been busy. Flush and + * retry the allocation again. If we get an -EAGAIN + * error, we're being told that a deadlock was avoided + * and the current transaction needs committing before + * the allocation can be retried. + */ trace_xfs_alloc_size_busy(args); - xfs_extent_busy_flush(args->mp, args->pag, busy_gen); + error = xfs_extent_busy_flush(args->tp, args->pag, + busy_gen, alloc_flags); + if (error) + goto error0; + + alloc_flags &= ~XFS_ALLOC_FLAG_TRYFLUSH; + xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); goto restart; } goto out_nominleft; @@ -2559,7 +2595,7 @@ __xfs_free_extent_later( int /* error */ xfs_alloc_fix_freelist( struct xfs_alloc_arg *args, /* allocation argument structure */ - int flags) /* XFS_ALLOC_FLAG_... */ + uint32_t alloc_flags) { struct xfs_mount *mp = args->mp; struct xfs_perag *pag = args->pag; @@ -2575,7 +2611,7 @@ xfs_alloc_fix_freelist( ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES); if (!pag->pagf_init) { - error = xfs_alloc_read_agf(mp, tp, args->agno, flags, &agbp); + error = xfs_alloc_read_agf(mp, tp, args->agno, alloc_flags, &agbp); if (error) { /* Couldn't lock the AGF so skip this AG. */ if (error == -EAGAIN) @@ -2590,13 +2626,13 @@ xfs_alloc_fix_freelist( * point */ if (pag->pagf_metadata && (args->datatype & XFS_ALLOC_USERDATA) && - (flags & XFS_ALLOC_FLAG_TRYLOCK)) { - ASSERT(!(flags & XFS_ALLOC_FLAG_FREEING)); + (alloc_flags & XFS_ALLOC_FLAG_TRYLOCK)) { + ASSERT(!(alloc_flags & XFS_ALLOC_FLAG_FREEING)); goto out_agbp_relse; } need = xfs_alloc_min_freelist(mp, pag); - if (!xfs_alloc_space_available(args, need, flags | + if (!xfs_alloc_space_available(args, need, alloc_flags | XFS_ALLOC_FLAG_CHECK)) goto out_agbp_relse; @@ -2605,7 +2641,7 @@ xfs_alloc_fix_freelist( * Can fail if we're not blocking on locks, and it's held. */ if (!agbp) { - error = xfs_alloc_read_agf(mp, tp, args->agno, flags, &agbp); + error = xfs_alloc_read_agf(mp, tp, args->agno, alloc_flags, &agbp); if (error) { /* Couldn't lock the AGF so skip this AG. */ if (error == -EAGAIN) @@ -2620,7 +2656,7 @@ xfs_alloc_fix_freelist( /* If there isn't enough total space or single-extent, reject it. */ need = xfs_alloc_min_freelist(mp, pag); - if (!xfs_alloc_space_available(args, need, flags)) + if (!xfs_alloc_space_available(args, need, alloc_flags)) goto out_agbp_relse; /* @@ -2649,11 +2685,12 @@ xfs_alloc_fix_freelist( */ memset(&targs, 0, sizeof(targs)); /* struct copy below */ - if (flags & XFS_ALLOC_FLAG_NORMAP) + if (alloc_flags & XFS_ALLOC_FLAG_NORMAP) targs.oinfo = XFS_RMAP_OINFO_SKIP_UPDATE; else targs.oinfo = XFS_RMAP_OINFO_AG; - while (!(flags & XFS_ALLOC_FLAG_NOSHRINK) && pag->pagf_flcount > need) { + while (!(alloc_flags & XFS_ALLOC_FLAG_NOSHRINK) && + pag->pagf_flcount > need) { error = xfs_alloc_get_freelist(tp, agbp, &bno, 0); if (error) goto out_agbp_relse; @@ -2682,7 +2719,7 @@ xfs_alloc_fix_freelist( targs.resv = XFS_AG_RESV_AGFL; /* Allocate as many blocks as possible at once. */ - error = xfs_alloc_ag_vextent(&targs); + error = xfs_alloc_ag_vextent(&targs, alloc_flags); if (error) goto out_agflbp_relse; @@ -2692,7 +2729,7 @@ xfs_alloc_fix_freelist( * on a completely full ag. */ if (targs.agbno == NULLAGBLOCK) { - if (flags & XFS_ALLOC_FLAG_FREEING) + if (alloc_flags & XFS_ALLOC_FLAG_FREEING) break; goto out_agflbp_relse; } @@ -3149,7 +3186,7 @@ xfs_alloc_vextent( { xfs_agblock_t agsize; /* allocation group size */ int error; - int flags; /* XFS_ALLOC_FLAG_... locking flags */ + uint32_t alloc_flags; /* XFS_ALLOC_FLAG_... locking flags */ struct xfs_mount *mp; /* mount structure pointer */ xfs_agnumber_t sagno; /* starting allocation group number */ xfs_alloctype_t type; /* input allocation type */ @@ -3202,7 +3239,7 @@ xfs_alloc_vextent( break; } args->agbno = XFS_FSB_TO_AGBNO(mp, args->fsbno); - if ((error = xfs_alloc_ag_vextent(args))) + if ((error = xfs_alloc_ag_vextent(args, 0))) goto error0; break; case XFS_ALLOCTYPE_START_BNO: @@ -3231,13 +3268,13 @@ xfs_alloc_vextent( args->agno = XFS_FSB_TO_AGNO(mp, args->fsbno); args->type = XFS_ALLOCTYPE_THIS_AG; sagno = 0; - flags = 0; + alloc_flags = 0; } else { /* * Start with the given allocation group. */ args->agno = sagno = XFS_FSB_TO_AGNO(mp, args->fsbno); - flags = XFS_ALLOC_FLAG_TRYLOCK; + alloc_flags = XFS_ALLOC_FLAG_TRYLOCK; } /* * Loop over allocation groups twice; first time with @@ -3245,7 +3282,7 @@ xfs_alloc_vextent( */ for (;;) { args->pag = xfs_perag_get(mp, args->agno); - error = xfs_alloc_fix_freelist(args, flags); + error = xfs_alloc_fix_freelist(args, alloc_flags); if (error) { trace_xfs_alloc_vextent_nofix(args); goto error0; @@ -3254,7 +3291,8 @@ xfs_alloc_vextent( * If we get a buffer back then the allocation will fly. */ if (args->agbp) { - if ((error = xfs_alloc_ag_vextent(args))) + if ((error = xfs_alloc_ag_vextent(args, + alloc_flags))) goto error0; break; } @@ -3285,13 +3323,13 @@ xfs_alloc_vextent( * or switch to non-trylock mode. */ if (args->agno == sagno) { - if (flags == 0) { + if (alloc_flags == 0) { args->agbno = NULLAGBLOCK; trace_xfs_alloc_vextent_allfailed(args); break; } - flags = 0; + alloc_flags = 0; if (type == XFS_ALLOCTYPE_START_BNO) { args->agbno = XFS_FSB_TO_AGBNO(mp, args->fsbno); diff --git a/fs/xfs/libxfs/xfs_alloc.h b/fs/xfs/libxfs/xfs_alloc.h index 9ccec5d42c9794210d2d770011daaefa85399cc9..84a8b7b9db0dc3300a71448f71fc2316b54ea315 100644 --- a/fs/xfs/libxfs/xfs_alloc.h +++ b/fs/xfs/libxfs/xfs_alloc.h @@ -38,11 +38,12 @@ typedef unsigned int xfs_alloctype_t; /* * Flags for xfs_alloc_fix_freelist. */ -#define XFS_ALLOC_FLAG_TRYLOCK 0x00000001 /* use trylock for buffer locking */ -#define XFS_ALLOC_FLAG_FREEING 0x00000002 /* indicate caller is freeing extents*/ -#define XFS_ALLOC_FLAG_NORMAP 0x00000004 /* don't modify the rmapbt */ -#define XFS_ALLOC_FLAG_NOSHRINK 0x00000008 /* don't shrink the freelist */ -#define XFS_ALLOC_FLAG_CHECK 0x00000010 /* test only, don't modify args */ +#define XFS_ALLOC_FLAG_TRYLOCK (1U << 0) /* use trylock for buffer locking */ +#define XFS_ALLOC_FLAG_FREEING (1U << 1) /* indicate caller is freeing extents*/ +#define XFS_ALLOC_FLAG_NORMAP (1U << 2) /* don't modify the rmapbt */ +#define XFS_ALLOC_FLAG_NOSHRINK (1U << 3) /* don't shrink the freelist */ +#define XFS_ALLOC_FLAG_CHECK (1U << 4) /* test only, don't modify args */ +#define XFS_ALLOC_FLAG_TRYFLUSH (1U << 5) /* don't wait in busy extent flush */ /* * Argument structure for xfs_alloc routines. @@ -210,7 +211,7 @@ int xfs_alloc_read_agfl(struct xfs_mount *mp, struct xfs_trans *tp, xfs_agnumber_t agno, struct xfs_buf **bpp); int xfs_free_agfl_block(struct xfs_trans *, xfs_agnumber_t, xfs_agblock_t, struct xfs_buf *, struct xfs_owner_info *); -int xfs_alloc_fix_freelist(struct xfs_alloc_arg *args, int flags); +int xfs_alloc_fix_freelist(struct xfs_alloc_arg *args, uint32_t alloc_flags); int xfs_free_extent_fix_freelist(struct xfs_trans *tp, xfs_agnumber_t agno, struct xfs_buf **agbp); diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index 7ace8755d3536cef38fd457e8ec9998ccac804b6..23f17d9059d541c24e04cab76e64645cdf1dd81e 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -313,7 +313,7 @@ xfs_check_block( */ STATIC void xfs_bmap_check_leaf_extents( - xfs_btree_cur_t *cur, /* btree cursor or null */ + struct xfs_btree_cur *cur, /* btree cursor or null */ xfs_inode_t *ip, /* incore inode pointer */ int whichfork) /* data or attr fork */ { @@ -881,13 +881,15 @@ xfs_bmap_add_attrfork_btree( xfs_inode_t *ip, /* incore inode pointer */ int *flags) /* inode logging flags */ { - xfs_btree_cur_t *cur; /* btree cursor */ + struct xfs_btree_block *block = ip->i_df.if_broot; + struct xfs_btree_cur *cur; /* btree cursor */ int error; /* error return value */ xfs_mount_t *mp; /* file system mount struct */ int stat; /* newroot status */ mp = ip->i_mount; - if (ip->i_df.if_broot_bytes <= XFS_IFORK_DSIZE(ip)) + + if (XFS_BMAP_BMDR_SPACE(block) <= XFS_IFORK_DSIZE(ip)) *flags |= XFS_ILOG_DBROOT; else { cur = xfs_bmbt_init_cursor(mp, tp, ip, XFS_DATA_FORK); @@ -923,7 +925,7 @@ xfs_bmap_add_attrfork_extents( struct xfs_inode *ip, /* incore inode pointer */ int *flags) /* inode logging flags */ { - xfs_btree_cur_t *cur; /* bmap btree cursor */ + struct xfs_btree_cur *cur; /* bmap btree cursor */ int error; /* error return value */ if (ip->i_df.if_nextents * sizeof(struct xfs_bmbt_rec) <= @@ -1985,11 +1987,11 @@ xfs_bmap_add_extent_unwritten_real( xfs_inode_t *ip, /* incore inode pointer */ int whichfork, struct xfs_iext_cursor *icur, - xfs_btree_cur_t **curp, /* if *curp is null, not a btree */ + struct xfs_btree_cur **curp, /* if *curp is null, not a btree */ xfs_bmbt_irec_t *new, /* new data to add to file extents */ int *logflagsp) /* inode logging flags */ { - xfs_btree_cur_t *cur; /* btree cursor */ + struct xfs_btree_cur *cur; /* btree cursor */ int error; /* error return value */ int i; /* temp state */ struct xfs_ifork *ifp; /* inode fork pointer */ @@ -5124,7 +5126,7 @@ xfs_bmap_del_extent_real( xfs_inode_t *ip, /* incore inode pointer */ xfs_trans_t *tp, /* current transaction pointer */ struct xfs_iext_cursor *icur, - xfs_btree_cur_t *cur, /* if null, not a btree */ + struct xfs_btree_cur *cur, /* if null, not a btree */ xfs_bmbt_irec_t *del, /* data to remove from extents */ int *logflagsp, /* inode logging flags */ int whichfork, /* data or attr fork */ diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c index eb911b4e1620b3313a064ca11764dc9a407eca1a..b30001a857920cb78ba26fe561be13392ac0821a 100644 --- a/fs/xfs/libxfs/xfs_btree.c +++ b/fs/xfs/libxfs/xfs_btree.c @@ -391,14 +391,14 @@ xfs_btree_del_cursor( */ int /* error */ xfs_btree_dup_cursor( - xfs_btree_cur_t *cur, /* input cursor */ - xfs_btree_cur_t **ncur) /* output cursor */ + struct xfs_btree_cur *cur, /* input cursor */ + struct xfs_btree_cur **ncur) /* output cursor */ { xfs_buf_t *bp; /* btree block's buffer pointer */ int error; /* error return value */ int i; /* level number of btree block */ xfs_mount_t *mp; /* mount structure for filesystem */ - xfs_btree_cur_t *new; /* new cursor value */ + struct xfs_btree_cur *new; /* new cursor value */ xfs_trans_t *tp; /* transaction pointer, can be NULL */ tp = cur->bc_tp; @@ -694,7 +694,7 @@ xfs_btree_get_block( */ STATIC int /* success=1, failure=0 */ xfs_btree_firstrec( - xfs_btree_cur_t *cur, /* btree cursor */ + struct xfs_btree_cur *cur, /* btree cursor */ int level) /* level to change */ { struct xfs_btree_block *block; /* generic btree block pointer */ @@ -724,7 +724,7 @@ xfs_btree_firstrec( */ STATIC int /* success=1, failure=0 */ xfs_btree_lastrec( - xfs_btree_cur_t *cur, /* btree cursor */ + struct xfs_btree_cur *cur, /* btree cursor */ int level) /* level to change */ { struct xfs_btree_block *block; /* generic btree block pointer */ @@ -988,7 +988,7 @@ xfs_btree_readahead_ptr( */ STATIC void xfs_btree_setbuf( - xfs_btree_cur_t *cur, /* btree cursor */ + struct xfs_btree_cur *cur, /* btree cursor */ int lev, /* level in btree */ xfs_buf_t *bp) /* new buffer to set */ { diff --git a/fs/xfs/libxfs/xfs_btree.h b/fs/xfs/libxfs/xfs_btree.h index 8885b00020f6649863b6534c8f3bbbf737bccc2a..221b8a7c2f352e6519c60e5d6f6d0512fab5ba5b 100644 --- a/fs/xfs/libxfs/xfs_btree.h +++ b/fs/xfs/libxfs/xfs_btree.h @@ -215,7 +215,7 @@ struct xfs_btree_cur_ino { * Btree cursor structure. * This collects all information needed by the btree code in one place. */ -typedef struct xfs_btree_cur +struct xfs_btree_cur { struct xfs_trans *bc_tp; /* transaction we're in, if any */ struct xfs_mount *bc_mp; /* file system mount struct */ @@ -235,7 +235,7 @@ typedef struct xfs_btree_cur struct xfs_btree_cur_ag bc_ag; struct xfs_btree_cur_ino bc_ino; }; -} xfs_btree_cur_t; +}; /* cursor flags */ #define XFS_BTREE_LONG_PTRS (1<<0) /* pointers are 64bits long */ @@ -301,7 +301,7 @@ xfs_btree_check_sptr( */ void xfs_btree_del_cursor( - xfs_btree_cur_t *cur, /* btree cursor */ + struct xfs_btree_cur *cur, /* btree cursor */ int error); /* del because of error */ /* @@ -310,8 +310,8 @@ xfs_btree_del_cursor( */ int /* error */ xfs_btree_dup_cursor( - xfs_btree_cur_t *cur, /* input cursor */ - xfs_btree_cur_t **ncur);/* output cursor */ + struct xfs_btree_cur *cur, /* input cursor */ + struct xfs_btree_cur **ncur);/* output cursor */ /* * Compute first and last byte offsets for the fields given. @@ -516,7 +516,7 @@ struct xfs_ifork *xfs_btree_ifork_ptr(struct xfs_btree_cur *cur); /* Does this cursor point to the last block in the given level? */ static inline bool xfs_btree_islastblock( - xfs_btree_cur_t *cur, + struct xfs_btree_cur *cur, int level) { struct xfs_btree_block *block; diff --git a/fs/xfs/xfs_extent_busy.c b/fs/xfs/xfs_extent_busy.c index a4075685d9ebaf25f407163313438b601e7b739b..b19bff8d89f0473f33362fb8b0a16ffffbc4ac38 100644 --- a/fs/xfs/xfs_extent_busy.c +++ b/fs/xfs/xfs_extent_busy.c @@ -577,20 +577,45 @@ xfs_extent_busy_clear( /* * Flush out all busy extents for this AG. + * + * If the current transaction is holding busy extents, the caller may not want + * to wait for committed busy extents to resolve. If we are being told just to + * try a flush or progress has been made since we last skipped a busy extent, + * return immediately to allow the caller to try again. + * + * If we are freeing extents, we might actually be holding the only free extents + * in the transaction busy list and the log force won't resolve that situation. + * In this case, we must return -EAGAIN to avoid a deadlock by informing the + * caller it needs to commit the busy extents it holds before retrying the + * extent free operation. */ -void +int xfs_extent_busy_flush( - struct xfs_mount *mp, + struct xfs_trans *tp, struct xfs_perag *pag, - unsigned busy_gen) + unsigned busy_gen, + uint32_t alloc_flags) { DEFINE_WAIT (wait); int error; - error = xfs_log_force(mp, XFS_LOG_SYNC); + error = xfs_log_force(tp->t_mountp, XFS_LOG_SYNC); if (error) - return; + return error; + + /* Avoid deadlocks on uncommitted busy extents. */ + if (!list_empty(&tp->t_busy)) { + if (alloc_flags & XFS_ALLOC_FLAG_TRYFLUSH) + return 0; + + if (busy_gen != READ_ONCE(pag->pagb_gen)) + return 0; + + if (alloc_flags & XFS_ALLOC_FLAG_FREEING) + return -EAGAIN; + } + /* Wait for committed busy extents to resolve. */ do { prepare_to_wait(&pag->pagb_wait, &wait, TASK_KILLABLE); if (busy_gen != READ_ONCE(pag->pagb_gen)) @@ -599,6 +624,7 @@ xfs_extent_busy_flush( } while (1); finish_wait(&pag->pagb_wait, &wait); + return 0; } void diff --git a/fs/xfs/xfs_extent_busy.h b/fs/xfs/xfs_extent_busy.h index 8aea07100092397f4e1f5754c9b806ce5138cb9c..f145d532051955a22c492c98d5fb2a3baf95c4fe 100644 --- a/fs/xfs/xfs_extent_busy.h +++ b/fs/xfs/xfs_extent_busy.h @@ -50,9 +50,9 @@ bool xfs_extent_busy_trim(struct xfs_alloc_arg *args, xfs_agblock_t *bno, xfs_extlen_t *len, unsigned *busy_gen); -void -xfs_extent_busy_flush(struct xfs_mount *mp, struct xfs_perag *pag, - unsigned busy_gen); +int +xfs_extent_busy_flush(struct xfs_trans *tp, struct xfs_perag *pag, + unsigned busy_gen, uint32_t alloc_flags); void xfs_extent_busy_wait_all(struct xfs_mount *mp); diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c index c8e615f7ce099566deab24b3ef0982ab77677fb9..2d25aa62ab788059dc20ac9547e0eca3affb3043 100644 --- a/fs/xfs/xfs_extfree_item.c +++ b/fs/xfs/xfs_extfree_item.c @@ -347,6 +347,34 @@ xfs_trans_get_efd( return efdp; } +/* + * Fill the EFD with all extents from the EFI when we need to roll the + * transaction and continue with a new EFI. + * + * This simply copies all the extents in the EFI to the EFD rather than make + * assumptions about which extents in the EFI have already been processed. We + * currently keep the xefi list in the same order as the EFI extent list, but + * that may not always be the case. Copying everything avoids leaving a landmine + * were we fail to cancel all the extents in an EFI if the xefi list is + * processed in a different order to the extents in the EFI. + */ +static void +xfs_efd_from_efi( + struct xfs_efd_log_item *efdp) +{ + struct xfs_efi_log_item *efip = efdp->efd_efip; + uint i; + + ASSERT(efip->efi_format.efi_nextents > 0); + ASSERT(efdp->efd_next_extent < efip->efi_format.efi_nextents); + + for (i = 0; i < efip->efi_format.efi_nextents; i++) { + efdp->efd_format.efd_extents[i] = + efip->efi_format.efi_extents[i]; + } + efdp->efd_next_extent = efip->efi_format.efi_nextents; +} + /* * Free an extent and log it to the EFD. Note that the transaction is marked * dirty regardless of whether the extent free succeeds or fails to support the @@ -390,6 +418,17 @@ xfs_trans_free_extent( tp->t_flags |= XFS_TRANS_DIRTY; set_bit(XFS_LI_DIRTY, &efdp->efd_item.li_flags); + /* + * If we need a new transaction to make progress, the caller will log a + * new EFI with the current contents. It will also log an EFD to cancel + * the existing EFI, and so we need to copy all the unprocessed extents + * in this EFI to the EFD so this works correctly. + */ + if (error == -EAGAIN) { + xfs_efd_from_efi(efdp); + return error; + } + next_extent = efdp->efd_next_extent; ASSERT(next_extent < efdp->efd_format.efd_nextents); extp = &(efdp->efd_format.efd_extents[next_extent]); @@ -487,6 +526,14 @@ xfs_extent_free_finish_item( xefi = container_of(item, struct xfs_extent_free_item, xefi_list); error = xfs_trans_free_extent(tp, EFD_ITEM(done), xefi); + + /* + * Don't free the XEFI if we need a new transaction to complete + * processing of it. + */ + if (error == -EAGAIN) + return error; + kmem_cache_free(xfs_extfree_item_cache, xefi); return error; } @@ -601,6 +648,7 @@ xfs_efi_item_recover( xfs_fsblock_t startblock_fsb; int i; int error = 0; + bool requeue_only = false; /* * First check the validity of the extents described by the @@ -634,7 +682,25 @@ xfs_efi_item_recover( fake.xefi_startblock = extp->ext_start; fake.xefi_blockcount = extp->ext_len; - error = xfs_trans_free_extent(tp, efdp, &fake); + if (!requeue_only) + error = xfs_trans_free_extent(tp, efdp, &fake); + + /* + * If we can't free the extent without potentially deadlocking, + * requeue the rest of the extents to a new so that they get + * run again later with a new transaction context. + */ + if (error == -EAGAIN || requeue_only) { + error = xfs_free_extent_later(tp, fake.xefi_startblock, + fake.xefi_blockcount, + &XFS_RMAP_OINFO_ANY_OWNER, + fake.xefi_agresv); + if (!error) { + requeue_only = true; + continue; + } + }; + if (error) goto abort_error; diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c index dbb69b4bf3ed5fbe4bacb65a1b9ca78e79ae9ffa..e34458f69fb7589441ce55ea284e6e7b28b16fcf 100644 --- a/fs/xfs/xfs_trans_ail.c +++ b/fs/xfs/xfs_trans_ail.c @@ -806,7 +806,7 @@ xfs_trans_ail_update_bulk( trace_xfs_ail_insert(lip, 0, lsn); } lip->li_lsn = lsn; - list_add(&lip->li_ail, &tmp); + list_add_tail(&lip->li_ail, &tmp); } if (!list_empty(&tmp))